Beispiel #1
0
def load_crophealth_data():
    """
    Loads Sentinel-2 analysis-ready data (ARD) product for the crop health
    case-study area. The ARD product is provided for the last year.
    Last modified: January 2020

    outputs
    ds - data set containing combined, masked data from Sentinel-2a and -2b.
    Masked values are set to 'nan'
    """

    # Suppress warnings
    warnings.filterwarnings('ignore')

    # Initialise the data cube. 'app' argument is used to identify this app
    dc = datacube.Datacube(app='Crophealth-app')

    # Specify latitude and longitude ranges
    latitude = (-24.974997, -24.995971)
    longitude = (152.429994, 152.395805)

    # Specify the date range
    # Calculated as today's date, subtract 90 days to match NRT availability
    # Dates are converted to strings as required by loading function below
    end_date = dt.date.today()
    start_date = end_date - dt.timedelta(days=365)

    time = (start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d"))

    # Construct the data cube query
    products = ["s2a_ard_granule", "s2b_ard_granule"]

    query = {
        'x':
        longitude,
        'y':
        latitude,
        'time':
        time,
        'measurements': [
            'nbar_red', 'nbar_green', 'nbar_blue', 'nbar_nir_1', 'nbar_swir_2',
            'nbar_swir_3'
        ],
        'output_crs':
        'EPSG:3577',
        'resolution': (-10, 10)
    }

    # Load the data and mask out bad quality pixels
    ds_s2 = load_ard(dc, products=products, min_gooddata=0.5, **query)

    # Calculate the normalised difference vegetation index (NDVI) across
    # all pixels for each image.
    # This is stored as an attribute of the data
    ds_s2 = calculate_indices(ds_s2, index='NDVI', collection='ga_s2_1')

    # Return the data
    return (ds_s2)
Beispiel #2
0
prefire_end = '2020-01-06'
postfire_start = '2020-01-07'
postfire_end = '2020-05-01'

query_1 = {
    "x": (central_lon - buffer, central_lon + buffer),
    "y": (central_lat - buffer, central_lat + buffer),
    "time": (prefire_start, prefire_end),
    "output_crs": "EPSG:32755",
    "resolution": (-10, 10)
}

prefire_data = load_ard(
    dc=dc,
    products=['s2a_ard_granule', 's2b_ard_granule'],
    measurements=['nbart_nir_1', 'nbart_swir_3'],
    min_gooddata=0,
    # dask_chunks={'x': 'auto', 'y': 'auto'},
    group_by='solar_day',
    **query_1)

prefire_image = prefire_data.median(dim='time')
prefire_image = calculate_indices(prefire_image,
                                  index='NBR',
                                  collection='ga_s2_1',
                                  drop=False)
prefire_burnratio = prefire_image.NBR
prefire_burnratio.data

query_2 = {
    "x": (central_lon - buffer, central_lon + buffer),
    "y": (central_lat - buffer, central_lat + buffer),
def get_training_data_for_shp(gdf,
                              index,
                              row,
                              out_arrs,
                              out_vars,
                              products,
                              dc_query,
                              custom_func=None,
                              field=None,
                              calc_indices=None,
                              reduce_func=None,
                              drop=True,
                              zonal_stats=None):
    """
    Function to extract data from the ODC for training a machine learning classifier 
    using a geopandas geodataframe of labelled geometries. 
    This function provides a number of pre-defined methods for producing training data, 
    including: calcuating band indices, reducing time series using summary statistics, 
    and/or generating zonal statistics across polygons.  The 'custom_func' parameter provides 
    a method for the user to supply a custom function for generating features rather than using the
    pre-defined methods.

    Parameters
    ----------
    gdf : geopandas geodataframe
        geometry data in the form of a geopandas geodataframe
    products : list
        a list of products to load from the datacube. 
        e.g. ['ga_ls7e_ard_3', 'ga_ls8c_ard_3']
    dc_query : dictionary
        Datacube query object, should not contain lat and long (x or y)
        variables as these are supplied by the 'gdf' variable
    field : string 
        A string containing the name of column with class labels. 
        Field must contain numeric values.
    out_arrs : list 
        An empty list into which the training data arrays are stored.
    out_vars : list 
        An empty list into which the data varaible names are stored.
    custom_func : function, optional 
        A custom function for generating feature layers. If this parameter
        is set, all other options (excluding 'zonal_stats'), will be ignored.
        The result of the 'custom_func' must be a single xarray dataset 
        containing 2D coordinates (i.e x, y - no time dimension). The custom function
        has access to the datacube dataset extracted using the 'dc_query' params.
        Example custom function to return multiple products:
        `def custom_function(ds):
            dc = datacube.Datacube(app='custom_function')
            mad = dc.load(product='ls8_nbart_tmad_annual', like=ds.geobox)
            output = xr.merge([ds, mad])
            return output`
    calc_indices: list, optional
        If not using a custom func, then this parameter provides a method for
        calculating a number of remote sensing indices (e.g. `['NDWI', 'NDVI']`).
    reduce_func : string, optional 
        Function to reduce the data from multiple time steps to
        a single timestep. Options are 'mean', 'median', 'std',
        'max', 'min', 'geomedian'.  Ignored if 'custom_func' is provided.
    drop : boolean, optional 
        If this variable is set to True, and 'calc_indices' are supplied, the
        spectral bands will be dropped from the dataset leaving only the
        band indices as data variables in the dataset. Default is True.
    zonal_stats : string, optional
        An optional string giving the names of zonal statistics to calculate 
        for each polygon. Default is None (all pixel values are returned). Supported 
        values are 'mean', 'median', 'max', 'min', and 'std'. Will work in 
        conjunction with a 'custom_func'.


    Returns
    --------
    Two lists, a list of numpy.arrays containing classes and extracted data for 
    each pixel or polygon, and another containing the data variable names.

    """

    # prevent function altering dictionary kwargs
    dc_query = deepcopy(dc_query)

    # remove dask chunks if supplied as using
    # mulitprocessing for parallelization
    if 'dask_chunks' in dc_query.keys():
        dc_query.pop('dask_chunks', None)

    # connect to datacube
    dc = datacube.Datacube(app='training_data')

    # set up query based on polygon (convert to albers)
    geom = geometry.Geometry(gdf.geometry.values[index].__geo_interface__,
                             geometry.CRS('epsg:3577'))

    q = {"geopolygon": geom}

    # merge polygon query with user supplied query params
    dc_query.update(q)

    # load_ard doesn't handle derivative products, so check
    # products aren't one of those below
    others = [
        'ls5_nbart_geomedian_annual', 'ls7_nbart_geomedian_annual',
        'ls8_nbart_geomedian_annual', 'ls5_nbart_tmad_annual',
        'ls7_nbart_tmad_annual', 'ls8_nbart_tmad_annual',
        'landsat_barest_earth', 'ls8_barest_earth_albers'
    ]

    if products[0] in others:
        ds = dc.load(product=products[0], **dc_query)
        ds = ds.where(ds != 0, np.nan)

    else:
        # load data
        with HiddenPrints():
            ds = load_ard(dc=dc,
                          products=products,
                          output_crs='EPSG:3577',
                          **dc_query)

    # create polygon mask
    with HiddenPrints():
        mask = xr_rasterize(gdf.iloc[[index]], ds)

    # Use custom function for training data if it exists
    if custom_func is not None:
        with HiddenPrints():
            data = custom_func(ds)
            # Mask dataset
            data = data.where(mask)
    else:
        # Mask dataset
        ds = ds.where(mask)
        # first check enough variables are set to run functions
        if (len(ds.time.values) > 1) and (reduce_func == None):
            raise ValueError(
                "You're dataset has " + str(len(ds.time.values)) +
                " time-steps, please provide a reduction function," +
                " e.g. reduce_func='mean'")

        if calc_indices is not None:
            # determine which collection is being loaded
            if products[0] in others:
                collection = 'ga_ls_2'
            elif '3' in products[0]:
                collection = 'ga_ls_3'
            elif 's2' in products[0]:
                collection = 'ga_s2_1'

            if len(ds.time.values) > 1:

                if reduce_func in ['mean', 'median', 'std', 'max', 'min']:
                    with HiddenPrints():
                        data = calculate_indices(ds,
                                                 index=calc_indices,
                                                 drop=drop,
                                                 collection=collection)
                        # getattr is equivalent to calling data.reduce_func
                        method_to_call = getattr(data, reduce_func)
                        data = method_to_call(dim='time')

                elif reduce_func == 'geomedian':
                    data = GeoMedian().compute(ds)
                    with HiddenPrints():
                        data = calculate_indices(data,
                                                 index=calc_indices,
                                                 drop=drop,
                                                 collection=collection)

                else:
                    raise Exception(
                        reduce_func + " is not one of the supported" +
                        " reduce functions ('mean','median','std','max','min', 'geomedian')"
                    )

            else:
                with HiddenPrints():
                    data = calculate_indices(ds,
                                             index=calc_indices,
                                             drop=drop,
                                             collection=collection)

        # when band indices are not required, reduce the
        # dataset to a 2d array through reduce function
        if calc_indices is None:

            if len(ds.time.values) > 1:

                if reduce_func == 'geomedian':
                    data = GeoMedian().compute(ds)

                elif reduce_func in ['mean', 'median', 'std', 'max', 'min']:
                    method_to_call = getattr(ds, reduce_func)
                    data = method_to_call('time')
            else:
                data = ds.squeeze()

    if zonal_stats is None:
        # If no zonal stats were requested then extract all pixel values
        flat_train = sklearn_flatten(data)
        # Make a labelled array of identical size
        flat_val = np.repeat(row[field], flat_train.shape[0])
        stacked = np.hstack((np.expand_dims(flat_val, axis=1), flat_train))

    elif zonal_stats in ['mean', 'median', 'std', 'max', 'min']:
        method_to_call = getattr(data, zonal_stats)
        flat_train = method_to_call()
        flat_train = flat_train.to_array()
        stacked = np.hstack((row[field], flat_train))

    else:
        raise Exception(
            zonal_stats + " is not one of the supported" +
            " reduce functions ('mean','median','std','max','min')")

    # Append training data and labels to list
    out_arrs.append(stacked)
    out_vars.append([field] + list(data.data_vars))
Beispiel #4
0
def load_s2_nbart_ts_cor_dask(
    dc, lat_top, lat_bottom, lon_left, lon_right, start_of_epoch, end_of_epoch, chunks, cor_type
):

    
    allbands = [
        "nbart_blue",
        "nbart_green",
        "nbart_red",
        "nbart_nir_2",
        "nbart_swir_2",
        "nbart_swir_3",
        "fmask",
    ]
    
    # Define spatial and temporal coverage

    if (cor_type==0):
        newquery = {
        "crs": "EPSG:3577",
        "x": (lon_left, lon_right),
        "y": (lat_top, lat_bottom),
        "time": (start_of_epoch, end_of_epoch),
        "output_crs": "EPSG:3577",
        "resolution": (-20, 20),
        "measurements": allbands,
        "dask_chunks": chunks,
        "group_by": "solar_day",
        }
    elif (cor_type==1):
        
        #UTM projection zone code
        outcrs = utm_code(lon_left, lon_right)
        
        newquery = {
        "x": (lon_left, lon_right),
        "y": (lat_top, lat_bottom),
        "time": (start_of_epoch, end_of_epoch),
        "output_crs": outcrs,
        "resolution": (-20, 20),
        "measurements": allbands,
        "dask_chunks": chunks,
        "group_by": "solar_day",
        }

    # Names of targeted spectral bands


    # Band names used with in the dataset
    new_bandlabels = ["blue", "green", "red", "nir", "swir1", "swir2", "fmask"]

    # Load S2 data using Datacube API
 

    s2_ds = ddh.load_ard(dc,
             products=['s2a_ard_granule', 's2b_ard_granule'],
             min_gooddata=0.0,
             mask_pixel_quality=False,
             **newquery)
    
    # Rename spectral band names to new band labels
    rndic = dict(zip(allbands, new_bandlabels))
    s2_ds = s2_ds.rename(rndic)


 
    
    return s2_ds
Beispiel #5
0
def run_filmstrip_app(output_name,
                      time_range,
                      time_step,
                      tide_range=(0.0, 1.0),
                      resolution=(-30, 30),
                      max_cloud=50,
                      ls7_slc_off=False,
                      size_limit=200):
    '''
    An interactive app that allows the user to select a region from a
    map, then load Digital Earth Australia Landsat data and combine it
    using the geometric median ("geomedian") statistic to reveal the 
    median or 'typical' appearance of the landscape for a series of 
    time periods.
    
    The results for each time period are combined into a 'filmstrip' 
    plot which visualises how the landscape has changed in appearance 
    across time, with a 'change heatmap' panel highlighting potential 
    areas of greatest change.
    
    For coastal applications, the analysis can be customised to select 
    only satellite images obtained during a specific tidal range 
    (e.g. low, average or high tide).
    
    Last modified: June 2020

    Parameters
    ----------  
    output_name : str
        A name that will be used to name the output filmstrip plot file.
    time_range : tuple
        A tuple giving the date range to analyse 
        (e.g. `time_range = ('1988-01-01', '2017-12-31')`).
    time_step : dict
        This parameter sets the length of the time periods to compare 
        (e.g. `time_step = {'years': 5}` will generate one filmstrip 
        plot for every five years of data; `time_step = {'months': 18}` 
        will generate one plot for each 18 month period etc. Time 
        periods are counted from the first value given in `time_range`.
    tide_range : tuple, optional
        An optional parameter that can be used to generate filmstrip 
        plots based on specific ocean tide conditions. This can be 
        valuable for analysing change consistently along the coast. 
        For example, `tide_range = (0.0, 0.2)` will select only 
        satellite images acquired at the lowest 20% of tides; 
        `tide_range = (0.8, 1.0)` will select images from the highest 
        20% of tides. The default is `tide_range = (0.0, 1.0)` which 
        will select all images regardless of tide.
    resolution : tuple, optional
        The spatial resolution to load data. The default is 
        `resolution = (-30, 30)`, which will load data at 30 m pixel 
        resolution. Increasing this (e.g. to `resolution = (-100, 100)`) 
        can be useful for loading large spatial extents.
    max_cloud : int, optional
        This parameter can be used to exclude satellite images with 
        excessive cloud. The default is `50`, which will keep all images 
        with less than 50% cloud.
    ls7_slc_off : bool, optional
        An optional boolean indicating whether to include data from 
        after the Landsat 7 SLC failure (i.e. SLC-off). Defaults to 
        False, which removes all Landsat 7 observations > May 31 2003.
    size_limit : int, optional
        An optional size limit for the area selection in sq km.
        Defaults to 200 sq km.
        
    Returns
    -------
    ds_geomedian : xarray Dataset
        An xarray dataset containing geomedian composites for each 
        timestep in the analysis.
        
    '''    
    
    ########################
    # Select and load data #
    ########################
    
    # Define centre_coords as a global variable
    global centre_coords
        
    # Test if centre_coords is in the global namespace;
    # use default value if it isn't
    if 'centre_coords' not in globals():
        centre_coords = (-33.9719, 151.1934)
    
    # Plot interactive map to select area
    basemap = basemap_to_tiles(basemaps.Esri.WorldImagery)
    geopolygon = select_on_a_map(height='600px',
                                 layers=(basemap,),
                                 center=centre_coords , zoom=12)
        
    # Set centre coords based on most recent selection to re-focus
    # subsequent data selections
    centre_coords = geopolygon.centroid.points[0][::-1]

    # Test size of selected area
    area = geopolygon.to_crs(crs=CRS('epsg:3577')).area / 1000000
    if area > size_limit: 
        print(f'Warning: Your selected area is {area:.00f} sq km. '
              f'Please select an area of less than {size_limit} sq km.'
              f'\nTo select a smaller area, re-run the cell '
              f'above and draw a new polygon.')
        
    else:
        
        print('Starting analysis...')
        
        # Connect to datacube database
        dc = datacube.Datacube(app='Change_filmstrips')   
        
        # Configure local dask cluster
        create_local_dask_cluster()
        
        # Obtain native CRS 
        crs = mostcommon_crs(dc=dc, 
                             product='ga_ls5t_ard_3', 
                             query={'time': '1990', 
                                    'geopolygon': geopolygon})
        
        # Create query based on time range, area selected, custom params
        query = {'time': time_range,
                 'geopolygon': geopolygon,
                 'output_crs': crs,
                 'gqa_iterative_mean_xy': [0, 1],
                 'cloud_cover': [0, max_cloud],
                 'resolution': resolution,
                 'dask_chunks': {'time': 1, 'x': 2000, 'y': 2000},
                 'align': (resolution[1] / 2.0, resolution[1] / 2.0)}

        # Load data from all three Landsats
        ds = load_ard(dc=dc, 
                      measurements=['nbart_red', 
                                    'nbart_green', 
                                    'nbart_blue'],  
                      products=['ga_ls5t_ard_3', 
                                'ga_ls7e_ard_3', 
                                'ga_ls8c_ard_3'], 
                      min_gooddata=0.0,
                      ls7_slc_off=ls7_slc_off,
                      **query)
        
        # Optionally calculate tides for each timestep in the satellite 
        # dataset and drop any observations out side this range
        if tide_range != (0.0, 1.0):
            ds = tidal_tag(ds=ds, tidepost_lat=None, tidepost_lon=None)
            min_tide, max_tide = ds.tide_height.quantile(tide_range).values
            ds = ds.sel(time = (ds.tide_height >= min_tide) & 
                               (ds.tide_height <= max_tide))
            ds = ds.drop('tide_height')
            print(f'    Keeping {len(ds.time)} observations with tides '
                  f'between {min_tide:.2f} and {max_tide:.2f} m')
        
        # Create time step ranges to generate filmstrips from
        bins_dt = pd.date_range(start=time_range[0], 
                                end=time_range[1], 
                                freq=pd.DateOffset(**time_step))

        # Bin all satellite observations by timestep. If some observations
        # fall outside the upper bin, label these with the highest bin
        labels = bins_dt.astype('str')
        time_steps = (pd.cut(ds.time.values, bins_dt, labels = labels[:-1])
                      .add_categories(labels[-1])
                      .fillna(labels[-1])) 
        time_steps_var = xr.DataArray(time_steps, [('time', ds.time)], 
                                      name='timestep')

        # Resample data temporally into time steps, and compute geomedians
        geomedian_ds = (ds.groupby(time_steps_var)
                        .apply(lambda ds_subset:
                               xr_geomedian(ds_subset,
                                            num_threads=1,
                                            eps=0.2 * (1 / 10_000),
                                            nocheck=True)))
Beispiel #6
0
def dNBR_processing(coordinates):

    # Load all data in baseline period available from s2a/b_ard_granule datasets
    prefire_ard = load_ard(
        dc=dc,
        products=['s2a_ard_granule', 's2b_ard_granule'],
        x=(coordinates.x - 0.1, coordinates.x + 0.1),
        y=(coordinates.y - 0.1, coordinates.y + 0.1),
        time=(prefire_start, prefire_end),
        measurements=['nbart_nir_1', 'nbart_swir_3'],
        min_gooddata=0.1,
        output_crs='EPSG:32755',  # UTM Zone 55S
        resolution=(-10, 10),
        group_by='solar_day')

    prefire_ard = calculate_indices(prefire_ard,
                                    index='NBR',
                                    collection='ga_s2_1',
                                    drop=False)

    # Compute median using all observations in the dataset along the time axis
    prefire_image = prefire_ard.median(dim='time')

    # Delete baseline_combined
    del prefire_ard

    # Select NBR
    prefire_NBR = prefire_image.NBR

    del prefire_image

    # Load all data in post-fire period available from s2a/b_ard_granule datasets
    postfire_ard = load_ard(
        dc=dc,
        products=['s2a_ard_granule', 's2b_ard_granule'],
        x=(coordinates.x - 0.1, coordinates.x + 0.1),
        y=(coordinates.y - 0.1, coordinates.y + 0.1),
        time=(postfire_start, postfire_end),
        measurements=['nbart_nir_1', 'nbart_swir_3'],
        min_gooddata=0.1,
        output_crs='EPSG:32755',  # UTM Zone 55S
        resolution=(-10, 10),
        group_by='solar_day')

    # Calculate NBR on all post-fire images
    postfire_ard = calculate_indices(postfire_ard,
                                     index='NBR',
                                     collection='ga_s2_1',
                                     drop=False)

    # Calculate the median post-fire image
    postfire_image = postfire_ard.median(dim='time')

    del postfire_ard

    # Select NBR
    postfire_NBR = postfire_image.NBR

    del postfire_image

    # Calculate delta
    delta_NBR = prefire_NBR - postfire_NBR

    del prefire_NBR
    del postfire_NBR

    x = np.round_(coordinates.x, decimals=4)
    y = np.round_(coordinates.y, decimals=4)

    # Turn dNBR into a x-array dataset for export to GeoTIFF
    dnbr_dataset = delta_NBR.to_dataset(name='delta_NBR')
    # cog.write_cog(dnbr_dataset, './NBR_geotiffs/{x}_{y}_dNBR.tif')
    write_geotiff(f'/scratch/wj97/ab4513/dNBR_geotiffs/{x}_{y}_dNBR.tif',
                  dnbr_dataset)

    del delta_NBR
    del dnbr_dataset