Python make_mask Examples, datacube.storage.masking.make_mask Python Examples

Example #1

0

Show file

    def _load_pq(self, x, y, res, period, n_landsat):
        query = {
            'time': period,
            'x': x,
            'y': y,
            'crs': 'EPSG:3577',
            'measurements': ['pixelquality'],
            'resolution': res,
        }

        pq_stack = []
        for n in n_landsat:
            pq_stack.append(self.load(product='ls{}_pq_albers'.format(n),
                                      group_by='solar_day', fuse_func=ga_pq_fuser,
                                      resampling='nearest', **query))

        pq_stack = xr.concat(pq_stack, dim='time').sortby('time')

        # Land/sea mask isn't used at the moment. Possible alternatives are WOFS and ITEM.
        #pq_stack['land'] = masking.make_mask(pq_stack.pixelquality, land_sea='land')
        # masking cloud, saturation and invalid data (contiguous)
        pq_stack['no_cloud'] = masking.make_mask(pq_stack.pixelquality, cloud_acca='no_cloud',
                                                 cloud_fmask='no_cloud', cloud_shadow_acca='no_cloud_shadow',
                                                 cloud_shadow_fmask='no_cloud_shadow',blue_saturated=False,
                                                 green_saturated=False,
                                                 red_saturated=False,
                                                 nir_saturated=False,
                                                 swir1_saturated=False,
                                                 swir2_saturated=False,
                                                 contiguous=True)
        pq_stack['valid'] = masking.make_mask(pq_stack.pixelquality, contiguous=True)

        return pq_stack

Example #2

0

Show file

    def transform_data(self, data, pq_data, extent_mask, *masks):
        # pylint: disable=too-many-locals
        # extent mask data per band to preseve nodata
        _LOG.debug("transform begin %s", datetime.now())
        if extent_mask is not None:
            for band in data.data_vars:
                try:
                    data[band] = data[band].where(
                        extent_mask, other=data[band].attrs['nodata'])
                except AttributeError:
                    data[band] = data[band].where(extent_mask)

        _LOG.debug("extent mask complete %d", datetime.now())
        data = self.apply_masks(data, pq_data)
        _LOG.debug("mask complete %d", datetime.now())
        imgdata = Dataset()
        for band in self.value_map.keys():
            band_data = Dataset()
            for value in self.value_map[band]:
                target = Dataset()
                flags = value["flags"]
                rgb = Color(value["color"])
                dims = data[band].dims
                coords = data[band].coords
                bdata = data[band]
                colors = ["red", "green", "blue"]
                for color in colors:
                    c = numpy.full(data[band].shape, getattr(rgb, color))
                    target[color] = DataArray(c, dims=dims, coords=coords)

                if "or" in flags:
                    fs = flags["or"]
                    mask = None
                    for f in fs.items():
                        f = {f[0]: f[1]}
                        if mask is None:
                            mask = make_mask(bdata, **f)
                        else:
                            mask |= make_mask(bdata, **f)
                else:
                    fs = flags if "and" not in flags else flags["and"]
                    mask = make_mask(bdata, **fs)

                masked = target.where(mask)

                if len(band_data.data_vars) == 0:
                    band_data = masked
                else:
                    band_data = band_data.combine_first(masked)

            if len(imgdata.data_vars) == 0:
                imgdata = band_data
            else:
                imgdata = merge([imgdata, band_data])
        imgdata *= 255
        return imgdata.astype('uint8')

Example #3

0

Show file

 def create_mask(data, flags):
     if "or" in flags:
         fs = flags["or"]
         mask = None
         for f in fs.items():
             f = {f[0]: f[1]}
             if mask is None:
                 mask = make_mask(data, **f)
             else:
                 mask |= make_mask(data, **f)
     else:
         fs = flags if "and" not in flags else flags["and"]
         mask = make_mask(data, **fs)
     return mask

Example #4

0

Show file

File: koala_obs.py Project: lpinner/koala_obs

def get_data(query, mask_components, pnbars, pfcs, pqas):
    nbars = []
    fcs = []
    for pnbar, pfc, pqa in zip(pnbars, pfcs, pqas):

        #Load the NBAR, FC and corresponding PQ
        nbar = dc.load(product=pnbar, measurements=pnbar_measurements, **query)
        fc = dc.load(product=pfc, measurements=pfc_measurements, **query)
        pq = dc.load(product=pqa, fuse_func=ga_pq_fuser, **query)

        #Apply the PQ masks to the data
        try:
            cloud_free = masking.make_mask(pq, **mask_components)
            good_data = cloud_free.pixelquality.loc[
                query['time'][0]:query['time'][1]]
            nbar = nbar.where(good_data)
            fc = fc.where(good_data)
            del cloud_free, good_data
        except ValueError:
            continue

        nbars.append(nbar)
        fcs.append(fc)
        del nbar, fc, pq

    if not nbars:
        raise NoDataError

    #Concatenate data from different sensors together and sort so that observations are sorted by time rather
    # than sensor
    nbar = sort_data(nbars)
    fc = sort_data(fcs)

    return nbar, fc

Example #5

0

Show file

def return_good_pixels(data, sensor_pq, start_date, end_date):
    """
    This function uses pixel quality information to mask out and remove pixel quality artifacts from extracted data.
    """
    #Define which pixel quality artefacts you want removed from the datacube results
    mask_components = {'cloud_acca':'no_cloud',
    'cloud_shadow_acca' :'no_cloud_shadow',
    'cloud_shadow_fmask' : 'no_cloud_shadow',
    'cloud_fmask' :'no_cloud',
    'blue_saturated' : False,
    'green_saturated' : False,
    'red_saturated' : False,
    'nir_saturated' : False,
    'swir1_saturated' : False,
    'swir2_saturated' : False,
    'contiguous':True}
    #grab the projection info before masking/sorting
    crs = data.crs
    crswkt = data.crs.wkt
    affine = data.affine
    #Apply the PQ masks to the NDVI
    cloud_free = masking.make_mask(sensor_pq, **mask_components)
    good_data = cloud_free.pixelquality.loc[start_date:end_date]
    quality_data = data.where(good_data)
    return quality_data

Example #6

0

Show file

def load_miningrehab_data():
    """
    Loads Fractional Cover and Water Observations from Space products for the mining
    case-study area.
    Last modified: January 2020

    outputs
    ds - data set containing masked Fractional Cover data from Landsat 8
    Masked values are set to 'nan'
    """

    # Suppress warnings
    warnings.filterwarnings("ignore")

    # Initialise the data cube. 'app' argument is used to identify this app
    dc = datacube.Datacube(app="mining-app")

    # Specify latitude and longitude ranges
    latitude = (-34.426512, -34.434517)
    longitude = (116.648123, 116.630731)

    # Specify the date range
    time = ("2015-06-01", "2018-06-30")

    # Construct the data cube query
    query = {
        "x": longitude,
        "y": latitude,
        "time": time,
        "output_crs": "EPSG:3577",
        "resolution": (-25, 25),
    }

    print("Loading Fractional Cover for Landsat 8")
    dataset_fc = dc.load(product="ls8_fc_albers", **query)

    print("Loading WoFS for Landsat 8")
    dataset_wofs = dc.load(product="wofs_albers", like=dataset_fc)

    # Match the data
    shared_times = np.intersect1d(dataset_fc.time, dataset_wofs.time)

    ds_fc_matched = dataset_fc.sel(time=shared_times)
    ds_wofs_matched = dataset_wofs.sel(time=shared_times)

    # Mask FC
    dry_mask = masking.make_mask(ds_wofs_matched, dry=True)

    # Get fractional masked fc dataset (as proportion of 1, rather than 100)
    ds_fc_masked = ds_fc_matched.where(dry_mask.water == True) / 100

    # Resample
    ds_resampled = ds_fc_masked.resample(time="1M").median()
    ds_resampled.attrs["crs"] = dataset_fc.crs

    # Return the data
    return ds_resampled

Example #7

0

Show file

File: band_mapper.py Project: tom-butler/datacube-wms

 def apply_masks(self, data, pq_data):
     if pq_data is not None:
         net_mask = None
         for mask in self.masks:
             odc_mask = make_mask(pq_data, **mask.flags)
             mask_data = getattr(odc_mask, self.product.pq_band)
             if mask.invert:
                 mask_data = ~mask_data
             data = data.where(mask_data)
     return data

Example #8

0

Show file

File: main.py Project: 4lgar/datacube-stats

def make_mask_from_spec(loaded_mask_data, mask_spec):
    if mask_spec.get('flags') is not None:
        mask = make_mask(loaded_mask_data, **mask_spec['flags'])
    elif mask_spec.get('less_than') is not None:
        less_than = float(mask_spec['less_than'])
        mask = loaded_mask_data < less_than
    elif mask_spec.get('greater_than') is not None:
        greater_than = float(mask_spec['greater_than'])
        mask = loaded_mask_data > greater_than

    if mask_spec.get('invert') is True:
        mask = np.logical_not(mask)

    return mask

Example #9

0

Show file

def run(tile, gwf, center_dt):
    """Basic datapreparation recipe 001

    Computes mean NDVI for a landsat collection over a given time frame

    Args:
        tile (tuple): Tuple of (tile indices, Tile object). Tile object can be
            loaded as xarray.Dataset using gwf.load()
        gwf (GridWorkflow): GridWorkflow object instantiated with the corresponding
            product
        center_dt (datetime): Date to be used in making the filename

    Return:
        str: The filename of the netcdf file created
    """
    try:
        center_dt = center_dt.strftime("%Y-%m-%d")
        # TODO: Need a more dynamic way to handle this filename (e.g.: global variable for the path up to datacube_ingest)
        nc_filename = os.path.expanduser(
            '~/datacube_ingest/recipes/landsat_8_ndvi_mean/ndvi_mean_%d_%d_%s.nc'
            % (tile[0][0], tile[0][1], center_dt))
        if os.path.isfile(nc_filename):
            raise ValueError('%s already exist' % nc_filename)
        # Load Landsat sr
        sr = gwf.load(
            tile[1],
            dask_chunks={
                'x': 1667,
                'y': 1667
            },
        )
        # Compute ndvi
        sr['ndvi'] = (sr.nir - sr.red) / (sr.nir + sr.red) * 10000
        clear = masking.make_mask(sr.pixel_qa, clear=True)
        ndvi = sr.drop(
            ['pixel_qa', 'blue', 'red', 'green', 'nir', 'swir1', 'swir2'])
        ndvi_clear = ndvi.where(clear)
        # Run temporal reductions and rename DataArrays
        ndvi_mean = ndvi_clear.mean('time', keep_attrs=True)
        ndvi_mean['ndvi'].attrs['nodata'] = -9999
        ndvi_mean_int = ndvi_mean.apply(to_int)
        ndvi_mean_int.attrs['crs'] = sr.attrs['crs']
        write_dataset_to_netcdf(ndvi_mean_int,
                                nc_filename,
                                netcdfparams={'zlib': True})
        return nc_filename
    except Exception as e:
        print('Tile (%d, %d) not processed. %s' % (tile[0][0], tile[0][1], e))
        raise
        return None

Example #10

0

Show file

def cloud_coverage(dataset):
    from datacube.storage import masking  # Import masking capabilities
    from .dc_mosaic import create_median_mosaic

    clean_pixel_mask = masking.make_mask(
        dataset.quality,
        cloud=False,
        radiometric_saturation='none',
        terrain_occlusion=False)

    masked_cloud = dataset.where((dataset != 0) & clean_pixel_mask)

    mosaic = create_median_mosaic(masked_cloud, clean_pixel_mask, no_data=0)
    mosaic = mosaic.expand_dims({'time': 1})

    return threshold_percentage(mosaic.red)

Example #11

0

Show file

File: Palaeovalley_NDVI_linear_regression_raijin_JulDec.py Project: GWBAGDC/Palaeovalleys

def return_good_pixels(nbar, pq):
    """
    This function uses pixel quality information to mask out and remove pixel quality artifacts from extracted data.
    """
    mask_components = {'cloud_acca':'no_cloud',
    'cloud_shadow_acca' :'no_cloud_shadow',
    'cloud_shadow_fmask' : 'no_cloud_shadow',
    'cloud_fmask' :'no_cloud',
    'blue_saturated' : False,
    'green_saturated' : False,
    'red_saturated' : False,
    'nir_saturated' : False,
    'swir1_saturated' : False,
    'swir2_saturated' : False,
    'contiguous':True}
    pqmask = masking.make_mask(pq.pixelquality,  **mask_components)
    return nbar.where(pqmask)

Example #12

0

Show file

def run(tile, center_dt, path):
    """Basic datapreparation recipe 001

    Computes mean NDVI for a landsat collection over a given time frame

    Args:
        tile (tuple): Tuple of (tile indices, Tile object). Tile object can be
            loaded as xarray.Dataset using gwf.load()
        center_dt (datetime): Date to be used in making the filename
        path (str): Directory where files generated are to be written

    Return:
        str: The filename of the netcdf file created
    """
    try:
        center_dt = center_dt.strftime("%Y-%m-%d")
        nc_filename = os.path.join(
            path,
            'ndvi_mean_%d_%d_%s.nc' % (tile[0][0], tile[0][1], center_dt))
        if os.path.isfile(nc_filename):
            logger.warning(
                '%s already exists. Returning filename for database indexing',
                nc_filename)
            return nc_filename
        # Load Landsat sr
        sr = GridWorkflow.load(tile[1], dask_chunks={'x': 1667, 'y': 1667})
        # Compute ndvi
        sr['ndvi'] = (sr.nir - sr.red) / (sr.nir + sr.red) * 10000
        clear = masking.make_mask(sr.pixel_qa, clear=True)
        ndvi = sr.drop(
            ['pixel_qa', 'blue', 'red', 'green', 'nir', 'swir1', 'swir2'])
        ndvi_clear = ndvi.where(clear)
        # Run temporal reductions and rename DataArrays
        ndvi_mean = ndvi_clear.mean('time', keep_attrs=True)
        ndvi_mean['ndvi'].attrs['nodata'] = -9999
        ndvi_mean_int = ndvi_mean.apply(to_int)
        ndvi_mean_int.attrs['crs'] = sr.attrs['crs']
        write_dataset_to_netcdf(ndvi_mean_int,
                                nc_filename,
                                netcdfparams={'zlib': True})
        return nc_filename
    except Exception as e:
        logger.info('Tile (%d, %d) not processed. %s' %
                    (tile[0][0], tile[0][1], e))
        return None

Example #13

0

Show file

def compute_mosaic(products, measurements, **parsed_expressions):
    with Datacube() as dc:
        acq_range = parsed_expressions['time']
        click.echo("Processing time range {}".format(acq_range))
        datasets = []

        for prodname in products:
            dataset = dc.load(product=prodname,
                              measurements=measurements,
                              group_by='solar_day',
                              **parsed_expressions)
            if len(dataset) == 0:
                continue
            else:
                click.echo("Found {} time slices of {} during {}.".format(
                    len(dataset['time']), prodname, acq_range))

            pq = dc.load(product=prodname.replace('nbar', 'pq'),
                         group_by='solar_day',
                         fuse_func=pq_fuser,
                         **parsed_expressions)

            if len(pq) == 0:
                click.echo('No PQ found, skipping')
                continue

            crs = dataset.attrs['crs']
            dataset = dataset.where(dataset != -999)
            dataset.attrs['product'] = prodname
            dataset.attrs['crs'] = crs

            cloud_free = make_mask(pq.pixelquality, ga_good_pixel=True)
            dataset = dataset.where(cloud_free)

            if len(dataset) == 0:
                click.echo("Nothing left after PQ masking")
                continue

            datasets.append(dataset)

    dataset = xr.concat(datasets, dim='time')

    return dataset.median(dim='time')

Example #14

0

Show file

File: main.py Project: M3nin0/datacube-stats

    def load_slice(i):
        loc = [slice(i, i + 1), slice(None), slice(None)]
        d = GridWorkflow.load(tile[loc], **kwargs)

        if mask_nodata:
            d = sensible_mask_invalid_data(d)

        # Load all masks and combine them all into one
        mask = None
        for (m_tile, flags, load_args), invert in zip(masks, inverts):
            m = GridWorkflow.load(m_tile[loc], **load_args)
            m, *other = m.data_vars.values()
            # TODO make use of make_mask_from_spec here
            m = make_mask(m, **flags)

            if invert:
                m = np.logical_not(m)

            if mask is None:
                mask = m
            else:
                mask &= m

        if mask_inplace or not mask_nodata:
            where = sensible_where_inplace
        else:
            where = sensible_where

        if mask is not None:
            # Apply mask in place if asked or if we already performed
            # conversion to float32, this avoids reallocation of memory and
            # hence increases the largest data set size one can load without
            # running out of memory
            d = where(d, mask)

        if geom is not None:
            d = where(d, geometry_mask([geom], d.geobox, invert=True))

        if src_idx is not None:
            d.coords['source'] = ('time', np.repeat(src_idx, d.time.size))

        return d

Example #15

0

Show file

File: landsat_8_madmex_001.py Project: loicdtx/antares3

def run(tile, center_dt, path):
    """Basic datapreparation recipe 001

    Combines temporal statistics of surface reflectance and ndvi with terrain
    metrics

    Args:
        tile (tuple): Tuple of (tile indices, Tile object). Tile object can be
            loaded as xarray.Dataset using gwf.load()
        center_dt (datetime): Date to be used in making the filename
        path (str): Directory where files generated are to be written

    Return:
        str: The filename of the netcdf file created
    """
    try:
        center_dt = center_dt.strftime("%Y-%m-%d")
        nc_filename = os.path.join(
            path,
            'madmex_001_%d_%d_%s.nc' % (tile[0][0], tile[0][1], center_dt))
        # Load Landsat sr
        if os.path.isfile(nc_filename):
            logger.warning(
                '%s already exists. Returning filename for database indexing',
                nc_filename)
            return nc_filename
        sr_0 = GridWorkflow.load(tile[1], dask_chunks={'x': 1667, 'y': 1667})
        # Load terrain metrics using same spatial parameters than sr
        dc = datacube.Datacube(app='landsat_madmex_001_%s' % randomword(5))
        terrain = dc.load(product='srtm_cgiar_mexico',
                          like=sr_0,
                          time=(datetime(1970, 1, 1), datetime(2018, 1, 1)),
                          dask_chunks={
                              'x': 1667,
                              'y': 1667
                          })
        dc.close()
        # Mask clouds, shadow, water, ice,... and drop qa layer
        clear = masking.make_mask(sr_0.pixel_qa,
                                  cloud=False,
                                  cloud_shadow=False,
                                  snow=False)
        sr_1 = sr_0.where(clear)
        sr_2 = sr_1.drop('pixel_qa')
        # Convert Landsat data to float (nodata values are converted to np.Nan)
        sr_3 = sr_2.apply(func=to_float, keep_attrs=True)
        # Compute ndvi
        sr_3['ndvi'] = ((sr_3.nir - sr_3.red) / (sr_3.nir + sr_3.red)) * 10000
        sr_3['ndvi'].attrs['nodata'] = -9999
        # Run temporal reductions and rename DataArrays
        sr_mean = sr_3.mean('time', keep_attrs=True, skipna=True)
        sr_mean.rename(
            {
                'blue': 'blue_mean',
                'green': 'green_mean',
                'red': 'red_mean',
                'nir': 'nir_mean',
                'swir1': 'swir1_mean',
                'swir2': 'swir2_mean',
                'ndvi': 'ndvi_mean'
            },
            inplace=True)
        sr_min = sr_3.min('time', keep_attrs=True, skipna=True)
        sr_min.rename(
            {
                'blue': 'blue_min',
                'green': 'green_min',
                'red': 'red_min',
                'nir': 'nir_min',
                'swir1': 'swir1_min',
                'swir2': 'swir2_min',
                'ndvi': 'ndvi_min'
            },
            inplace=True)
        sr_max = sr_3.max('time', keep_attrs=True, skipna=True)
        sr_max.rename(
            {
                'blue': 'blue_max',
                'green': 'green_max',
                'red': 'red_max',
                'nir': 'nir_max',
                'swir1': 'swir1_max',
                'swir2': 'swir2_max',
                'ndvi': 'ndvi_max'
            },
            inplace=True)
        sr_std = sr_3.std('time', keep_attrs=True, skipna=True)
        sr_std.rename(
            {
                'blue': 'blue_std',
                'green': 'green_std',
                'red': 'red_std',
                'nir': 'nir_std',
                'swir1': 'swir1_std',
                'swir2': 'swir2_std',
                'ndvi': 'ndvi_std'
            },
            inplace=True)
        # Merge dataarrays
        combined = xr.merge([
            sr_mean.apply(to_int),
            sr_min.apply(to_int),
            sr_max.apply(to_int),
            sr_std.apply(to_int), terrain
        ])
        combined.attrs['crs'] = sr_0.attrs['crs']
        write_dataset_to_netcdf(combined, nc_filename)
        return nc_filename
    except Exception as e:
        logger.warning('Tile (%d, %d) not processed. %s' %
                       (tile[0][0], tile[0][1], e))
        return None

Example #16

0

Show file

File: DEADataHandling.py Project: RSPaW/dea-notebooks

def load_clearlandsat(dc, query, sensors=['ls5', 'ls7', 'ls8'], bands_of_interest=None,
                      product='nbart', masked_prop=0.99, mask_dict=None, apply_mask=False, ls7_slc_off=False):
    
    """
    Loads Landsat NBAR, NBART or FC25 and PQ data for multiple sensors (i.e. ls5, ls7, ls8), and returns a single 
    xarray dataset containing only observations that contain greater than a given proportion of clear pixels.    
  
    This function was designed to extract visually appealing time series of observations that are not
    affected by cloud, for example as an input to the `animated_timeseries` function from `DEAPlotting`.
    
    The proportion of clear pixels is calculated by summing the pixels that are flagged as being problematic
    in the Landsat PQ25 layer. By default only cloudy pixels or pixels without valid data in every band 
    are included in the calculation, but this can be customised using the `mask_dict` function.
    
    Last modified: August 2018
    Author: Robbi Bishop-Taylor, Bex Dunn
    
    :param dc: 
        A specific Datacube to import from, i.e. `dc = datacube.Datacube(app='Clear Landsat')`. This allows you to 
        also use development datacubes if they have been imported into the environment.
    
    :param query: 
        A dict containing the query bounds. Can include lat/lon, time etc. If no `time` query is given, the 
        function defaults to all timesteps available to all sensors (e.g. 1987-2018)

    :param sensors:
        An optional list of Landsat sensor names to load data for. Options are 'ls5', 'ls7', 'ls8'; defaults to all.

    :param product:
        An optional string specifying 'nbar', 'nbart' or 'fc'. Defaults to 'nbart'. For information on the difference, 
        see the 'GettingStartedWithLandsat' or 'Introduction_to_Fractional_Cover' notebooks on DEA-notebooks.
        
    :param bands_of_interest:
        An optional list of strings containing the bands to be read in; options include 'red', 'green', 'blue', 
        'nir', 'swir1', 'swir2'; defaults to all available bands if no bands are specified.

    :param masked_prop:
        An optional float giving the minimum percentage of clear pixels required for a Landsat observation to be 
        loaded. Defaults to 0.99 (i.e. only return observations with less than 1% of unclear pixels).
            
    :param mask_dict:
        An optional dict of arguments to the `masking.make_mask` function that can be used to identify clear 
        observations from the PQ layer using alternative masking criteria. The default value of None masks out 
        pixels flagged as cloud by either the ACCA or Fmask alogorithms, and that have values for every band 
        (equivalent to: `mask_dict={'cloud_acca': 'no_cloud', 'cloud_fmask': 'no_cloud', 'contiguous': True}`.
        See the `Landsat5-7-8-PQ` notebook on DEA Notebooks for a list of all possible options.
        
    :param apply_mask:
        An optional boolean indicating whether resulting observations should have the PQ mask applied to filter
        out any remaining unclear cells. For example, if `masked_prop=0.99`, the filtered images may still contain
        up to 1% unclear/cloudy pixels. The default of False simply returns the resulting observations without
        masking out these pixels; True removes them using the mask. 

    :param ls7_slc_off:
        An optional boolean indicating whether to include data from after the Landsat 7 SLC failure (i.e. SLC-off).
        Defaults to False, which removes all Landsat 7 observations after May 31 2003. 
    
    :returns:
        An xarray dataset containing only Landsat observations that contain greater than `masked_prop`
        proportion of clear pixels.  
        
    :example:
    
    >>> # Import modules
    >>> import datacube
    >>> import sys
    >>> 
    >>> # Import dea-notebooks functions using relative link to Scripts directory
    >>> sys.path.append('../10_Scripts')
    >>> import DEADataHandling   
    >>> 
    >>> # Define datacube to import from
    >>> dc = datacube.Datacube(app='Clear Landsat')
    >>> 
    >>> # Set up spatial and temporal query
    >>> query = {'x': (-191400.0, -183400.0),
    >>>          'y': (-1423460.0, -1415460.0),
    >>>          'time': ('1998-01-01', '2003-01-01'),
    >>>          'crs': 'EPSG:3577'}
    >>> 
    >>> # Load in red, green and blue bands for all clear Landsat observations with < 1% unclear values. 
    >>> combined_ds = DEADataHandling.load_clearlandsat(dc=dc, query=query, 
    >>>                                                 bands_of_interest=['red', 'green', 'blue'], 
    >>>                                                 masked_prop=0.99) 
    >>> combined_ds
        
    """
    

    # List to save results from each sensor
    filtered_sensors = []

    # Iterate through all sensors, returning only observations with > mask_prop clear pixels
    for sensor in sensors:
        
        try:
            
            # If bands of interest are given, assign measurements in dc.load call. This is
            # for compatibility with the existing dea-notebooks load_nbarx function.
            if bands_of_interest:
                
                # Lazily load Landsat data using dask              
                data = dc.load(product = '{}_{}_albers'.format(sensor, product),
                               measurements=bands_of_interest,
                               group_by = 'solar_day', 
                               dask_chunks={'time': 1},
                               **query)

            # If no bands of interest given, run without specifying measurements, and 
            # therefore return all available bands
            else:
                
                # Lazily load Landsat data using dask  
                data = dc.load(product = '{}_{}_albers'.format(sensor, product),
                               group_by = 'solar_day', 
                               dask_chunks={'time': 1},
                               **query)             

            # Load PQ data
            pq = dc.load(product = '{}_pq_albers'.format(sensor),
                         group_by = 'solar_day',
                         fuse_func=ga_pq_fuser,
                         dask_chunks={'time': 1},
                         **query)

            # Remove Landsat 7 SLC-off from PQ layer if ls7_slc_off=False
            if not ls7_slc_off and sensor == 'ls7':

                print('Ignoring SLC-off observations for ls7')
                data = data.where(data.time < np.datetime64('2003-05-30'), drop=True) 

            # Return only Landsat observations that have matching PQ data 
            time = (data.time - pq.time).time
            data = data.sel(time=time)
            pq = pq.sel(time=time)

            # Load PQ data using dask
            print('Loading {} PQ'.format(sensor))
            pq = pq.compute()
            
            # If a custom dict is provided for mask_dict, use these values to make mask from PQ
            if mask_dict:
                
                # Mask PQ using custom values by unpacking mask_dict **kwarg
                good_quality = masking.make_mask(pq.pixelquality, **mask_dict)
                
            else:

                # Identify pixels with no clouds in either ACCA for Fmask
                good_quality = masking.make_mask(pq.pixelquality,
                                                 cloud_acca='no_cloud',
                                                 cloud_fmask='no_cloud',
                                                 contiguous=True)

            # Compute good data for each observation as a percentage of total array pixels
            data_perc = good_quality.sum(dim=['x', 'y']) / (good_quality.shape[1] * good_quality.shape[2])
            
            # Add data_perc data to Landsat dataset as a new xarray variable
            data['data_perc'] = xr.DataArray(data_perc, [('time', data.time)])

            # Filter and finally import data using dask
            filtered = data.where(data.data_perc >= masked_prop, drop=True)
            print('    Loading {} filtered {} timesteps'.format(len(filtered.time), sensor))
            filtered = filtered.compute()
            
            # Optionally apply mask (instead of only filtering)
            if apply_mask:
                filtered = filtered.where(good_quality)
            
            # Append result to list
            filtered_sensors.append(filtered)
            
            # Close datasets
            filtered = None
            good_quality = None
            data = None
            pq = None            
                        
        except:
            
            # If there is no data for sensor or if another error occurs:
            print('    Skipping {}'.format(sensor))

    # Concatenate all sensors into one big xarray dataset, and then sort by time
    print('Combining and sorting ls5, ls7 and ls8 data')
    combined_ds = xr.concat(filtered_sensors, dim='time')
    combined_ds = combined_ds.sortby('time')
                                                               
    #Filter to replace no data values with nans
    combined_ds = masking.mask_invalid_data(combined_ds)

    # Return combined dataset
    return combined_ds

Example #17

0

Show file

File: DEADataHandling.py Project: RSPaW/dea-notebooks

def load_nbarx(dc, sensor, query, product='nbart', bands_of_interest='', filter_pq=True):
    """
    Loads NBAR (Nadir BRDF Adjusted Reflectance) or NBAR-T (terrain corrected NBAR) data for a
    sensor, masks using pixel quality (PQ), then optionally filters out terrain -999s (for NBAR-T).
    Returns an xarray dataset and CRS and Affine objects defining map projection and geotransform

    Last modified: May 2018
    Author: Bex Dunn
    Modified by: Claire Krause, Robbi Bishop-Taylor, Bex Dunn

    inputs
    dc - Handle for the Datacube to import from. This allows you to also use dev environments
    if that have been imported into the environment.
    sensor - Options are 'ls5', 'ls7', 'ls8'
    query - A dict containing the query bounds. Can include lat/lon, time etc. 

    optional
    product - 'nbar' or 'nbart'. Defaults to nbart unless otherwise specified
    bands_of_interest - List of strings containing the bands to be read in; defaults to all bands,
                        options include 'red', 'green', 'blue', 'nir', 'swir1', 'swir2'
    filter_pq - boolean. Will filter clouds and saturated pixels using PQ unless set to False


    outputs
    ds - Extracted and optionally PQ filtered dataset
    crs - CRS object defining dataset coordinate reference system
    affine - Affine object defining dataset affine transformation
    """

    product_name = '{}_{}_albers'.format(sensor, product)
    mask_product = '{}_{}_albers'.format(sensor, 'pq')
    print('Loading {}'.format(product_name))

    # If bands of interest are given, assign measurements in dc.load call
    if bands_of_interest:

        ds = dc.load(product=product_name, measurements=bands_of_interest,
                     group_by='solar_day', **query)

    # If no bands of interest given, run without specifying measurements
    else:

        ds = dc.load(product=product_name, group_by='solar_day', **query)

    # Proceed if the resulting call returns data
    if ds.variables:

        crs = ds.crs
        affine = ds.affine
        print('Loaded {}'.format(product_name))

        # If pixel quality filtering is enabled, extract PQ data to use as mask
        if filter_pq:

            sensor_pq = dc.load(product=mask_product, fuse_func=ga_pq_fuser,
                                group_by='solar_day', **query)

            # If PQ call returns data, use to mask input data
            if sensor_pq.variables:
                print('Generating mask {}'.format(mask_product))
                good_quality = masking.make_mask(sensor_pq.pixelquality,
                                                 cloud_acca='no_cloud',
                                                 cloud_shadow_acca='no_cloud_shadow',
                                                 cloud_shadow_fmask='no_cloud_shadow',
                                                 cloud_fmask='no_cloud',
                                                 blue_saturated=False,
                                                 green_saturated=False,
                                                 red_saturated=False,
                                                 nir_saturated=False,
                                                 swir1_saturated=False,
                                                 swir2_saturated=False,
                                                 contiguous=True)

                # Apply mask to preserve only good data
                ds = ds.where(good_quality)

            ds.attrs['crs'] = crs
            ds.attrs['affine'] = affine

        # Replace nodata values with nans

            ds = masking.mask_invalid_data(ds)

        return ds, crs, affine

    else:

        print('Failed to load {}'.format(product_name))
        return None, None, None

Example #18

0

Show file

File: waterline_funcs.py Project: tonybutzer/notebook

def load_cloudmaskedlandsat(dc, query, platforms=['ls5', 'ls7', 'ls8'], 
                            bands=['red', 'green', 'blue', 'nir', 'swir1', 'swir2']):
    
    '''
    This function returns cloud-masked Landsat `{platform}_usgs_sr_scene` 
    data by loading both Landsat and masking out any pixels affected by cloud, cloud shadow, 
    or any pixels missing data in any band. 
    
    Last modified: August 2019
    Author: Robbi Bishop-Taylor
    
    Parameters
    ----------  
    dc : datacube Datacube object
        A specific Datacube to import from, i.e. `dc = datacube.Datacube(app='Clear Landsat')`. 
        This allows you to also use development datacubes if required.    
    query : dict
        A dict containing the query bounds. Can include lat/lon, time etc. If no `time` query is 
        given, the function defaults to all timesteps available to all sensors (e.g. 1987-2018)
    platforms : list, optional
        An optional Landsat platform name to load data from. Options are 'ls5', 'ls7', 'ls8'.
    bands : list, optional
        An optional list of strings containing the bands to be read in; options default to 
        'red', 'green', 'blue', 'nir', 'swir1', 'swir2'.
        
    Returns
    -------
    landsat_ds : xarray Dataset
        An xarray dataset containing pixel-quality masked Landsat observations        
        
    '''

    # If bands do not include fmask, add it
    new_bands = set(bands)
    new_bands.add('pixel_qa')
    
  
    platform_data = []

    for platform in platforms:

        # Load landsat data
        landsat_ds = dc.load(product=f'{platform}_usgs_sr_scene', 
                             dask_chunks={'time': 1}, 
                             measurements=new_bands,
                             group_by='solar_day', 
                             **query)
        
        # Mask out all pixels affected by cloud, cloud shadow, or other invalid data
        from datacube.storage import masking
        valid_data = masking.make_mask(landsat_ds.pixel_qa, 
                          cloud='no_cloud', 
                          cloud_shadow='no_cloud_shadow', 
                         # water='no_water',
                          nodata=False)

        landsat_ds = landsat_ds.where(valid_data)
        
        platform_data.append(landsat_ds)

    return xr.concat(platform_data, dim='time').sortby('time')

Example #19

0

Show file

def extractNDVIFromCube(tileFile, minLat, maxLat, minLon, maxLon, year):

    dc = datacube.Datacube(app='ExtractAnnualNDVI')

    #Define wavelengths/bands of interest, remove this kwarg to retrieve all bands
    bands_of_interest = ['red', 'nir']

    #Define sensors of interest
    sensors = ['ls8', 'ls7', 'ls5']

    #define temporal range
    start_of_epoch = year + '-01-01'
    # latest observation
    end_of_epoch = year + '-12-31'

    query = {
        'time': (start_of_epoch, end_of_epoch),
    }
    query['x'] = (minLon, maxLon)
    query['y'] = (maxLat, minLat)
    query['crs'] = 'EPSG:4326'

    #Define which pixel quality artefacts you want removed from the results
    mask_components = {
        'cloud_acca': 'no_cloud',
        'cloud_shadow_acca': 'no_cloud_shadow',
        'cloud_shadow_fmask': 'no_cloud_shadow',
        'cloud_fmask': 'no_cloud',
        'blue_saturated': False,
        'green_saturated': False,
        'red_saturated': False,
        'nir_saturated': False,
        'swir1_saturated': False,
        'swir2_saturated': False,
        'contiguous': True
    }

    print("Read pixel image data into memory.")
    sensor_clean = {}
    for sensor in sensors:
        print(sensor)
        #Load the NBAR and corresponding PQ
        sensor_nbar = dc.load(product=sensor + '_nbar_albers',
                              group_by='solar_day',
                              measurements=bands_of_interest,
                              **query)
        if bool(sensor_nbar):
            sensor_pq = dc.load(product=sensor + '_pq_albers',
                                group_by='solar_day',
                                fuse_func=pq_fuser,
                                **query)
            #grab the projection info before masking/sorting
            crs = sensor_nbar.crs
            crswkt = sensor_nbar.crs.wkt
            affine = sensor_nbar.affine
            #this line is to make sure there's PQ to go with the NBAR
            sensor_nbar = sensor_nbar.sel(time=sensor_pq.time)
            #Apply the PQ masks to the NBAR
            cloud_free = masking.make_mask(sensor_pq, **mask_components)
            good_data = cloud_free.pixelquality.loc[
                start_of_epoch:end_of_epoch]
            sensor_nbar = sensor_nbar.where(good_data)
            sensor_clean[sensor] = sensor_nbar

    if bool(sensor_clean):
        #Concatenate data from different sensors together and sort so that observations are sorted by time rather than sensor
        print("Merge data from different sensors.")
        nbar_clean = xarray.concat(sensor_clean.values(), dim='time')
        time_sorted = nbar_clean.time.argsort()
        nbar_clean = nbar_clean.isel(time=time_sorted)
        nbar_clean.attrs['affine'] = affine
        nbar_clean.attrs['crs'] = crswkt

        print("Calculate NDVI.")
        ndvi = ((nbar_clean.nir - nbar_clean.red) /
                (nbar_clean.nir + nbar_clean.red))
        ndvi.attrs['affine'] = affine
        ndvi.attrs['crs'] = crswkt

        print("Create Composite")
        ndviMean = ndvi.mean(dim='time')
        ndviMean.attrs['affine'] = affine
        ndviMean.attrs['crs'] = crswkt

        print("Save Composite to netcdf")
        ndviMean.to_netcdf(path=tileFile, mode='w')

Example #20

0

Show file

print("Read pixel image data into memory.")
sensor_clean = {}
for sensor in sensors:
    print(sensor)
    #Load the NBAR and corresponding PQ
    sensor_nbar = dc.load(product= sensor+'_nbar_albers', group_by='solar_day', measurements = bands_of_interest, **query)
    if bool(sensor_nbar):
        sensor_pq = dc.load(product=sensor+'_pq_albers', group_by='solar_day', fuse_func=pq_fuser, **query)
        #grab the projection info before masking/sorting
        crs = sensor_nbar.crs
        crswkt = sensor_nbar.crs.wkt
        affine = sensor_nbar.affine
        #this line is to make sure there's PQ to go with the NBAR
        sensor_nbar = sensor_nbar.sel(time = sensor_pq.time)
        #Apply the PQ masks to the NBAR
        cloud_free = masking.make_mask(sensor_pq, **mask_components)
        good_data = cloud_free.pixelquality.loc[start_of_epoch:end_of_epoch]
        sensor_nbar = sensor_nbar.where(good_data)
        sensor_clean[sensor] = sensor_nbar

if bool(sensor_clean):
    #Concatenate data from different sensors together and sort so that observations are sorted by time rather than sensor
    print("Merge data from different sensors.")
    nbar_clean = xarray.concat(sensor_clean.values(), dim='time')
    time_sorted = nbar_clean.time.argsort()
    nbar_clean = nbar_clean.isel(time=time_sorted)
    nbar_clean.attrs['affine'] = affine
    nbar_clean.attrs['crs'] = crswkt
    
    print("\'Clean\' up the Red and NIR bands to remove any values less than zero.")
    nbar_clean['red'] = nbar_clean.red.where(nbar_clean.red>0)

Example #21

0

Show file

def load_ard(dc,
             products=None,
             min_gooddata=0.0,
             pq_categories_s2=[
                 'vegetation', 'snow or ice', 'water', 'bare soils',
                 'unclassified', 'dark area pixels'
             ],
             pq_categories_ls=None,
             mask_pixel_quality=True,
             ls7_slc_off=True,
             filter_func=None,
             **extras):
    '''
    Loads USGS Landsat Collection 1 and Collection 2 data for multiple 
    satellites (i.e. Landsat 5, 7, 8), and returns a single masked 
    xarray dataset containing only observations that contain greater 
    than a given proportion of good quality pixels. This can be used 
    to extract clean time series of observations that are not affected 
    by cloud, for example as an input to the `animated_timeseries` 
    function from `deafrica-sandbox-notebooks/deafrica_plotting`.
    
    The proportion of good quality pixels is calculated by summing the 
    pixels flagged as good quality in the product's pixel quality band 
    (i.e. 'pixel_qa' for USGS Collection 1, and 'quality_l2_aerosol' for
    USGS Collection 2). By default non-cloudy or non-shadowed pixels 
    are considered as good data, but this can be customised using the 
    `fmask_categories` parameter.
    
    Last modified: February 2020
    Parameters
    ----------
    dc : datacube Datacube object
        The Datacube to connect to, i.e. `dc = datacube.Datacube()`.
        This allows you to also use development datacubes if required.
    products : list
        A list of product names to load data from. Valid options are
        ['ls5_usgs_sr_scene', 'ls7_usgs_sr_scene', 'ls8_usgs_sr_scene'] for Landsat C1,
        ['usgs_ls8c_level2_2'] for Landsat C2, and
        ['s2a_msil2a', 's2b_msil2a'] for Sentinel 2.
    min_gooddata : float, optional
        An optional float giving the minimum percentage of good quality
        pixels required for a satellite observation to be loaded.
        Defaults to 0.0 which will return all observations regardless of
        pixel quality (set to e.g. 0.99 to return only observations with
        more than 99% good quality pixels).
    pq_categories_s2 : list, optional
        An optional list of S2 Scene Classification Layer (SCL) names 
        to treat as good quality observations in the above `min_gooddata` 
        calculation. T The default is ['vegetation','snow or ice','water',
        'bare soils','unclassified', 'dark area pixels'] which will return
        non-cloudy or shadowed land, snow, water, veg, and non-veg pixels.
    pq_categories_ls : dict, optional
        An optional dictionary that is used to generate a good quality 
        pixel mask from the selected USGS product's pixel quality band (i.e. 
        'pixel_qa' for USGS Collection 1, and 'quality_l2_aerosol' for
        USGS Collection 2). This mask is used for both masking out low
        quality pixels (e.g. cloud or shadow), and for dropping 
        observations entirely based on the above `min_gooddata` 
        calculation. Default is None, which will apply the following mask 
        for USGS Collection 1: `{'cloud': 'no_cloud', 'cloud_shadow': 
        'no_cloud_shadow', 'nodata': False}`, and for USGS Collection 2:
        `{'cloud_shadow': 'not_cloud_shadow', 'cloud_or_cirrus': 
        'not_cloud_or_cirrus', 'nodata': False}.
    mask_pixel_quality : bool, optional
        An optional boolean indicating whether to apply the good data
        mask to all observations that were not filtered out for having
        less good quality pixels than `min_gooddata`. E.g. if
        `min_gooddata=0.99`, the filtered observations may still contain
        up to 1% poor quality pixels. The default of False simply
        returns the resulting observations without masking out these
        pixels; True masks them and sets them to NaN using the good data
        mask. This will convert numeric values to floating point values
        which can cause memory issues, set to False to prevent this.
    ls7_slc_off : bool, optional
        An optional boolean indicating whether to include data from
        after the Landsat 7 SLC failure (i.e. SLC-off). Defaults to
        True, which keeps all Landsat 7 observations > May 31 2003.
    filter_func : function, optional
        An optional function that can be passed in to restrict the
        datasets that are loaded by the function. A filter function
        should take a `datacube.model.Dataset` object as an input (i.e.
        as returned from `dc.find_datasets`), and return a boolean.
        For example, a filter function could be used to return True on
        only datasets acquired in January:
        `dataset.time.begin.month == 1`
    **extras :
        A set of keyword arguments to `dc.load` that define the
        spatiotemporal query used to extract data. This typically
        includes `measurements`, `x`, `y`, `time`, `resolution`,
        `resampling`, `group_by` and `crs`. Keyword arguments can
        either be listed directly in the `load_ard` call like any
        other parameter (e.g. `measurements=['nbart_red']`), or by
        passing in a query kwarg dictionary (e.g. `**query`). For a
        list of possible options, see the `dc.load` documentation:
        https://datacube-core.readthedocs.io/en/latest/dev/api/generate/datacube.Datacube.load.html
    Returns
    -------
    combined_ds : xarray Dataset
        An xarray dataset containing only satellite observations that
        contains greater than `min_gooddata` proportion of good quality
        pixels.
    '''

    #########
    # Setup #
    #########
    extras = deepcopy(extras)
    query = _dc_query_only(**extras)

    # We deal with `dask_chunks` separately
    dask_chunks = extras.pop('dask_chunks', None)
    requested_measurements = extras.pop('measurements', None)

    # Warn user if they combine lazy load with min_gooddata
    if (min_gooddata > 0.0) and dask_chunks is not None:
        warnings.warn("Setting 'min_gooddata' percentage to > 0.0 "
                      "will cause dask arrays to compute when "
                      "loading pixel-quality data to calculate "
                      "'good pixel' percentage. This can "
                      "slow the return of your dataset.")

    # Verify that products were provided
    if not products:
        raise ValueError(f'Please provide a list of product names '
                         f'to load data from. Valid options include '
                         f'{c1_products}, {c2_products} and {s2_products}')

    elif all(['level2' in product for product in products]):
        product_type = 'c2'
    elif all(['sr' in product for product in products]):
        product_type = 'c1'
    elif all(['s2' in product for product in products]):
        product_type = 's2'

    # If `measurements` are specified but do not include pixel quality bands,
    #  add these to `measurements` according to collection
    if product_type == 'c2':
        print('Using pixel quality parameters for USGS Collection 2')
        fmask_band = 'quality_l2_aerosol'

    elif product_type == 'c1':
        print('Using pixel quality parameters for USGS Collection 1')
        fmask_band = 'pixel_qa'

    elif product_type == 's2':
        print('Using pixel quality parameters for Sentinel 2')
        fmask_band = 'scl'

    measurements = requested_measurements.copy(
    ) if requested_measurements else None

    if measurements:
        if fmask_band not in measurements:
            measurements.append(fmask_band)

    #################
    # Find datasets #
    #################

    # Extract datasets for each product using subset of dcload_kwargs
    dataset_list = []

    # Get list of datasets for each product
    print('Finding datasets')
    for product in products:

        # Obtain list of datasets for product
        print(f'    {product}')
        datasets = dc.find_datasets(product=product, **query)

        # Remove Landsat 7 SLC-off observations if ls7_slc_off=False
        #!!!Update when we have C2 lS7!!!
        if not ls7_slc_off and product in [
                'ls7_usgs_sr_scene', 'usgs_ls7e_level2_2'
        ]:
            print('    Ignoring SLC-off observations for ls7')
            datasets = [
                i for i in datasets
                if i.time.begin < datetime.datetime(2003, 5, 31)
            ]

        # Add any returned datasets to list
        dataset_list.extend(datasets)

    # Raise exception if no datasets are returned
    if len(dataset_list) == 0:
        raise ValueError("No data available for query: ensure that "
                         "the products specified have data for the "
                         "time and location requested")

    # If filter_func is specified, use this function to filter the list
    # of datasets prior to load
    if filter_func:
        print(f'Filtering datasets using filter function')
        dataset_list = [ds for ds in dataset_list if filter_func(ds)]

    # Raise exception if filtering removes all datasets
    if len(dataset_list) == 0:
        raise ValueError("No data available after filtering with "
                         "filter function")

    #############
    # Load data #
    #############

    # Note we always load using dask here so that
    # we can lazy load data before filtering by good data
    ds = dc.load(datasets=dataset_list,
                 measurements=measurements,
                 dask_chunks={} if dask_chunks is None else dask_chunks,
                 **extras)

    ###############
    # Apply masks #
    ###############
    #need to distinguish between products due to different
    # "fmask" band properties

    #collection 2 USGS
    if product_type == 'c2':
        if pq_categories_ls is None:
            quality_flags_prod = {
                'cloud_shadow': 'not_cloud_shadow',
                'cloud_or_cirrus': 'not_cloud_or_cirrus',
                'nodata': False
            }
        else:
            quality_flags_prod = pq_categories_ls

        pq_mask = masking.make_mask(ds[fmask_band], **quality_flags_prod)
    # collection 1 USGS
    if product_type == 'c1':
        if pq_categories_ls is None:
            quality_flags_prod = {
                'cloud': 'no_cloud',
                'cloud_shadow': 'no_cloud_shadow',
                'nodata': False
            }
        else:
            quality_flags_prod = pq_categories_ls

        pq_mask = masking.make_mask(ds[fmask_band], **quality_flags_prod)
    # sentinel 2
    if product_type == 's2':
        pq_mask = odc.algo.fmask_to_bool(ds[fmask_band],
                                         categories=pq_categories_s2)

    # Generate good quality data mask
    mask = None
    if mask_pixel_quality:
        print('Applying pixel quality/cloud mask')
        mask = pq_mask

    # Mask data if either of the above masks were generated
    if mask is not None:
        ds = odc.algo.keep_good_only(ds, where=mask)

    ####################
    # Filter good data #
    ####################

    # The good data percentage calculation has to load in all `fmask`
    # data, which can be slow. If the user has chosen no filtering
    # by using the default `min_gooddata = 0`, we can skip this step
    # completely to save processing time
    if min_gooddata > 0.0:

        # Compute good data for each observation as % of total pixels
        print('Counting good quality pixels for each time step')
        data_perc = (pq_mask.sum(axis=[1, 2], dtype='int32') /
                     (pq_mask.shape[1] * pq_mask.shape[2]))

        # Filter by `min_gooddata` to drop low quality observations
        total_obs = len(ds.time)
        ds = ds.sel(time=data_perc >= min_gooddata)
        print(f'Filtering to {len(ds.time)} out of {total_obs} '
              f'time steps with at least {min_gooddata:.1%} '
              f'good quality pixels')

    # Drop bands not originally requested by user
    if requested_measurements:
        ds = ds[requested_measurements]

    ###############
    # Return data #
    ###############

    # Set nodata valuses using odc.algo tools to reduce peak memory
    # use when converting data to a float32 dtype
    ds = odc.algo.to_f32(ds)

    # If user supplied dask_chunks, return data as a dask array without
    # actually loading it in
    if dask_chunks is not None:
        print(f'Returning {len(ds.time)} time steps as a dask array')
        return ds
    else:
        print(f'Loading {len(ds.time)} time steps')
        return ds.compute()

Example #22

0

Show file

def load_masked_usgs(dc,
                     products=None,
                     min_gooddata=0.0,
                     quality_flags=None,
                     mask_pixel_quality=True,
                     mask_invalid_data=True,
                     ls7_slc_off=True,
                     product_metadata=False,
                     dask_chunks={'time': 1},
                     lazy_load=False,
                     **dcload_kwargs):
    '''
    Loads USGS Landsat Collection 1 and Collection 2 data for multiple 
    satellites (i.e. Landsat 5, 7, 8), and returns a single masked 
    xarray dataset containing only observations that contain greater 
    than a given proportion of good quality pixels. This can be used 
    to extract clean time series of observations that are not affected 
    by cloud, for example as an input to the `animated_timeseries` 
    function from `deafrica-sandbox-notebooks/deafrica_plotting`.
    
    The proportion of good quality pixels is calculated by summing the 
    pixels flagged as good quality in the product's pixel quality band 
    (i.e. 'pixel_qa' for USGS Collection 1, and 'quality_l2_aerosol' for
    USGS Collection 2). By default non-cloudy or non-shadowed pixels 
    are considered as good data, but this can be customised using the 
    `quality_flags` parameter.
    
    MEMORY ISSUES: For large data extractions, it can be advisable to 
    set `mask_pixel_quality=False`. The masking step coerces all 
    numeric values to float32 when NaN values are inserted into the 
    array, potentially causing your data to use twice the memory. 
    Be aware that the resulting arrays will contain invalid values 
    which may affect future analyses.
    
    Last modified: October 2019
    
    Parameters
    ----------  
    dc : datacube Datacube object
        The Datacube to connect to, i.e. `dc = datacube.Datacube()`.
        This allows you to also use development datacubes if required.    
    products : list
        A list of product names to load data from. Valid options for 
        USGS Collection 1 are ['ls5_usgs_sr_scene', 'ls7_usgs_sr_scene', 
        'ls8_usgs_sr_scene'] and for USGS Collection 2 are 
        ['usgs_ls5t_level2_2', 'usgs_ls7e_level2_2', 'usgs_ls8c_level2_2'].
    min_gooddata : float, optional
        An optional float giving the minimum percentage of good quality 
        pixels required for a satellite observation to be loaded. 
        Defaults to 0.0 which will return all observations regardless of
        pixel quality (set to e.g. 0.99 to return only observations with
        more than 99% good quality pixels).
    quality_flags : dict, optional
        An optional dictionary that is used to generate a good quality 
        pixel mask from the selected product's pixel quality band (i.e. 
        'pixel_qa' for USGS Collection 1, and 'quality_l2_aerosol' for
        USGS Collection 2). This mask is used for both masking out low
        quality pixels (e.g. cloud or shadow), and for dropping 
        observations entirely based on the above `min_gooddata` 
        calculation. Default is None, which will apply the following mask 
        for USGS Collection 1: `{'cloud': 'no_cloud', 'cloud_shadow': 
        'no_cloud_shadow', 'nodata': False}`, and for USGS Collection 2:
        `{'cloud_shadow': 'not_cloud_shadow', 'cloud_or_cirrus': 
        'not_cloud_or_cirrus', 'nodata': False}.
    mask_pixel_quality : bool, optional
        An optional boolean indicating whether to apply the good data 
        mask to all observations that were not filtered out for having 
        less good quality pixels than `min_gooddata`. E.g. if 
        `min_gooddata=0.99`, the filtered observations may still contain 
        up to 1% poor quality pixels. The default of False simply 
        returns the resulting observations without masking out these 
        pixels; True masks them out and sets them to NaN using the good 
        data mask. This will convert numeric values to float32 which can 
        cause memory issues, set to False to prevent this.
    ls7_slc_off : bool, optional
        An optional boolean indicating whether to include data from 
        after the Landsat 7 SLC failure (i.e. SLC-off). Defaults to 
        True, which keeps all Landsat 7 observations > May 31 2003. 
    product_metadata : bool, optional
        An optional boolean indicating whether to return the dataset 
        with a `product` variable that gives the name of the product 
        that each observation in the time series came from (e.g. 
        'usgs_ls8c_level2_2'). Defaults to False.
    dask_chunks : dict, optional
        An optional dictionary containing the coords and sizes you wish 
        to create dask chunks over. Usually used in combination with 
        `lazy_load=True` (see below). For example: 
        `dask_chunks = {'x': 500, 'y': 500}`
    lazy_load : boolean, optional
        Setting this variable to True will delay the computation of the 
        function until you explicitly run `ds.compute()`. If used in 
        conjuction with `dask.distributed.Client()` this will allow for 
        automatic parallel computation. Be aware that computation will
        still occur if min_gooddata > 0, as the pixel quality will be
        loaded to compute the 'good data' percentage.
    **dcload_kwargs : 
        A set of keyword arguments to `dc.load` that define the 
        spatiotemporal query used to extract data. This can include `x`,
        `y`, `time`, `resolution`, `resampling`, `group_by`, `crs`
        etc, and can either be listed directly in the `load_ard` call 
        (e.g. `x=(150.0, 151.0)`), or by passing in a query kwarg 
        (e.g. `**query`). For a full list of possible options, see: 
        https://datacube-core.readthedocs.io/en/latest/dev/api/generate/datacube.Datacube.load.html          
        
    Returns
    -------
    combined_ds : xarray Dataset
        An xarray dataset containing only satellite observations that 
        contains greater than `min_gooddata` proportion of good quality 
        pixels.   
        
    '''

    # Due to possible bug in xarray 0.13.0, define temporary function
    # which converts dtypes in a way that preserves attributes
    def astype_attrs(da, dtype=np.float32):
        '''
        Loop through all data variables in the dataset, record 
        attributes, convert to float32, then reassign attributes. If 
        the data variable cannot be converted to float32 (e.g. for a
        non-numeric dtype like strings), skip and return the variable 
        unchanged.
        '''

        try:
            da_attr = da.attrs
            da = da.astype(dtype)
            da = da.assign_attrs(**da_attr)
            return da

        except ValueError:
            return da

    # List of valid USGS Collection 1 products
    c1_products = [
        'ls5_usgs_sr_scene', 'ls7_usgs_sr_scene', 'ls8_usgs_sr_scene'
    ]

    # List of valid USGS Collection 2 products
    c2_products = [
        'usgs_ls5t_level2_2', 'usgs_ls7e_level2_2', 'usgs_ls8c_level2_2'
    ]

    # Verify that products were provided
    if not products:
        raise ValueError(f'Please provide a list of product names '
                         f'to load data from. Valid options include '
                         f'{c1_products} and {c2_products}')

    # Verify that all provided products are valid
    not_in_list = [i for i in products if i not in c1_products + c2_products]
    if not_in_list:
        raise ValueError(f'The product(s) {not_in_list} are not '
                         f'supported by this function. Valid options '
                         f'include {c1_products} and {c2_products}')

    # Warn user if they combine lazy load with min_gooddata
    if (min_gooddata > 0.0) & lazy_load:
        warnings.warn("Setting 'min_gooddata' percentage to > 0.0 "
                      "will cause dask arrays \n to compute when "
                      "loading pixel-quality data to calculate "
                      "'good pixel' percentage. This will "
                      "significantly slow the return of your dataset.")

    # Create a list to hold data for each product
    product_data = []

    # Iterate through each requested product
    for product in products:

        try:

            print(f'Loading {product} data')

            # Set quality band according to collection
            if product in c2_products:
                print(
                    '    Using pixel quality parameters for USGS Collection 2')
                quality_band = 'quality_l2_aerosol'
            elif product in c1_products:
                print(
                    '    Using pixel quality parameters for USGS Collection 1')
                quality_band = 'pixel_qa'

            # Set quality flags according to collection
            if (product in c2_products) and not quality_flags:
                quality_flags_prod = {
                    'cloud_shadow': 'not_cloud_shadow',
                    'cloud_or_cirrus': 'not_cloud_or_cirrus',
                    'nodata': False
                }
            elif (product in c1_products) and not quality_flags:
                quality_flags_prod = {
                    'cloud': 'no_cloud',
                    'cloud_shadow': 'no_cloud_shadow',
                    'nodata': False
                }

            elif quality_flags:
                quality_flags_prod = quality_flags

            # If `measurements` are specified but do not include fmask, add it
            if (('measurements' in dcload_kwargs)
                    and (quality_band not in dcload_kwargs['measurements'])):
                dcload_kwargs['measurements'].append(quality_band)

            # Load data
            try:
                ds = dc.load(product=f'{product}',
                             dask_chunks=dask_chunks,
                             **dcload_kwargs)
            except KeyError as e:
                raise ValueError(f'Band {e} does not exist in this product. '
                                 f'Verify all requested `measurements` exist '
                                 f'in {products}')

            # Keep a record of the original number of observations
            total_obs = len(ds.time)

            # Remove Landsat 7 SLC-off observations if ls7_slc_off=False
            if not ls7_slc_off and product in [
                    'ls7_usgs_sr_scene', 'usgs_ls7e_level2_2'
            ]:
                print('    Ignoring SLC-off observations for ls7')
                ds = ds.sel(time=ds.time < np.datetime64('2003-05-30'))

            # Identify all pixels not affected by cloud/shadow/invalid
            good_quality = masking.make_mask(ds[quality_band],
                                             **quality_flags_prod)

            # The good data percentage calculation has to load in all `fmask`
            # data, which can be slow. If the user has chosen no filtering
            # by using the default `min_gooddata = 0`, we can skip this step
            # completely to save processing time
            if min_gooddata > 0.0:

                # Compute good data for each observation as % of total pixels
                data_perc = (good_quality.sum(axis=1).sum(axis=1) /
                             (good_quality.shape[1] * good_quality.shape[2]))

                # Filter by `min_gooddata` to drop low quality observations
                ds = ds.sel(time=data_perc >= min_gooddata)
                print(f'    Filtering to {len(ds.time)} '
                      f'out of {total_obs} observations')

            # Optionally apply pixel quality mask to observations remaining
            # after the filtering step above to mask out all remaining
            # bad quality pixels
            if mask_pixel_quality & (len(ds.time) > 0):
                print('    Applying pixel quality mask')

                # First change dtype to float32, then mask out values using
                # `.where()`. By casting to float32, we prevent `.where()`
                # from automatically casting to float64, using 2x the memory.
                # We need to do this by applying a custom function to every
                # variable in the dataset instead of using `.astype()`, due
                # to a possible bug in xarray 0.13.0 that drops attributes
                ds = ds.apply(astype_attrs, dtype=np.float32, keep_attrs=True)
                ds = ds.where(good_quality)

            # Optionally add satellite/product name as a new variable
            if product_metadata:
                ds['product'] = xr.DataArray([product] * len(ds.time),
                                             [('time', ds.time)])

            # If any data was returned, add result to list
            if len(ds.time) > 0:
                product_data.append(ds.drop(quality_band))

        # If  AttributeError due to there being no pixel quality variable
        # in the dataset, skip this product and move on to the next
        except AttributeError:
            print(f'    No data for {product}')

    # If any data was returned above, combine into one xarray
    if (len(product_data) > 0):

        # Concatenate results and sort by time
        try:
            print(f'Combining and sorting data')
            combined_ds = xr.concat(product_data, dim='time').sortby('time')

        except KeyError as e:
            raise ValueError(
                f'The requested products {products} contain '
                f'bands with non-matching names (e.g. {e}). Please '
                f'select products with identical band names.')

        # If `lazy_load` is True, return data as a dask array without
        # actually loading it in
        if lazy_load:
            print(f'    Returning {len(combined_ds.time)} observations'
                  ' as a dask array')
            return combined_ds

        else:
            print(f'    Returning {len(combined_ds.time)} observations ')
            return combined_ds.compute()

    # If no data was returned:
    else:
        print('No data returned for query')
        return None

Example #23

0

Show file

File: DatacubeTest.py Project: GeoscienceAustralia/agdc_mangrovemonitoring

autumn_valid = autumn.where(autumn != autumn.attrs['nodata'])
autumn_valid.plot(col='time', col_wrap=3)
plt.savefig('plot-nodata.png')

pq = dc.load(product='ls5_pq_albers',
             x=(132.2, 132.4),
             y=(-12.45, -12.14),
             time=('2003-01', '2003-12'))
pq_autumn = pq.pixelquality.loc['2003-3':'2003-8']
pq_autumn.plot(col='time', col_wrap=3)
plt.savefig('plot-pxlqual.png')

pandas.DataFrame.from_dict(masking.get_flags_def(pq), orient='index')

good_data = masking.make_mask(pq,
                              cloud_acca='no_cloud',
                              cloud_fmask='no_cloud',
                              contiguous=True)
autumn_good_data = good_data.pixelquality.loc['2003-3':'2003-8']
autumn_good_data.plot(col='time', col_wrap=3)
plt.savefig('plot-clouds.png')

autumn_cloud_free = autumn_valid.where(autumn_good_data)
autumn_cloud_free.plot(col='time', col_wrap=3)
plt.savefig('plot-cloudfree.png')

red = nbar.red.where(nbar.red != nbar.red.attrs['nodata'])
nir = nbar.nir.where(nbar.nir != nbar.nir.attrs['nodata'])
cloud_free = masking.make_mask(pq,
                               cloud_acca='no_cloud',
                               cloud_fmask='no_cloud',
                               contiguous=True).pixelquality

Example #24

0

Show file

             y=(22, 21),
             time=(datetime(2017, 6, 1), datetime(2018, 1, 1)),
             group_by='solar_day',
             dask_chunks={
                 'x': 2000,
                 'y': 2000
             })
sr['ndvi'] = (sr.nir - sr.red) / (sr.nir + sr.red)
terrain = dc.load(product='srtm_cgiar_mexico',
                  like=sr,
                  time=(datetime(1970, 1, 1), datetime(2018, 1, 1)),
                  dask_chunks={
                      'x': 2000,
                      'y': 2000
                  })
clear = masking.make_mask(sr.pixel_qa, clear=True)
sr_clear = sr.where(clear)
sr_clear2 = sr_clear.drop('pixel_qa')
sr_mean = sr_clear2.mean('time', keep_attrs=True)
sr_mean.rename(
    {
        'blue': 'blue_mean',
        'green': 'green_mean',
        'red': 'red_mean',
        'nir': 'nir_mean',
        'swir1': 'swir1_mean',
        'swir2': 'swir2_mean',
        'ndvi': 'ndvi_mean'
    },
    inplace=True)
sr_min = sr_clear2.min('time', keep_attrs=True)

Example #25

0

Show file

File: dcnbr2.py Project: daviesl/BEnKFIRe

def loadBands():
    global time_sorted
    global nbar_clean
    global all_nbr_sorted
    global query
    global start_of_epoch
    global end_of_epoch
    global valid_bit

    #Define temporal range
    #start_of_epoch = '2000-01-01'
    #need a variable here that defines a rolling 'latest observation'
    #end_of_epoch =  '2016-07-31'

    #Define wavelengths/bands of interest, remove this kwarg to retrieve all bands
    bands_of_interest = [  #'blue',
        'green',
        #'red',
        'nir',
        #'swir1',
        'swir2'
    ]

    #Define sensors of interest
    sensor1 = 'ls8'
    sensor2 = 'ls7'
    sensor3 = 'ls5'

    #Group PQ by solar day to avoid idiosyncracies of N/S overlap differences in PQ algorithm performance
    pq_albers_product = dc.index.products.get_by_name(sensor1 + '_pq_albers')
    valid_bit = pq_albers_product.measurements['pixelquality'][
        'flags_definition']['contiguous']['bits']

    #load sensor specific band adjustment tuples for TSS
    ls5_tss_constant = 3983
    ls5_tss_exponent = 1.6246
    ls7_tss_constant = 3983
    ls7_tss_exponent = 1.6246
    ls8_tss_constant = 3957
    ls8_tss_exponent = 1.6436

    #Retrieve the NBAR and PQ data for sensor n

    def dummy(**kwargs):
        print kwargs

    dc.load = dummy
    print "about to load"
    sensor1_nbar = dc.load(product=sensor1 + '_nbar_albers',
                           group_by='solar_day',
                           measurements=bands_of_interest,
                           **query)
    print "about to load again"
    raise SystemExit
    sensor1_pq = dc.load(product=sensor1 + '_pq_albers',
                         group_by='solar_day',
                         fuse_func=pq_fuser,
                         **query)
    print "done"

    sensor1_nbar
    affine = sensor1_nbar.affine

    #This line exists to make sure that there's a 1:1 match between NBAR and PQ
    sensor1_nbar = sensor1_nbar.sel(time=sensor1_pq.time)

    #Generate PQ masks and apply those masks to remove cloud, cloud shadow, saturated observations
    #Generate PQ masks and apply those masks to remove cloud, cloud shadow, saturated observations
    s1_cloud_free = masking.make_mask(sensor1_pq, ga_good_pixel=True)
    s1_good_data = s1_cloud_free.pixelquality.loc[start_of_epoch:end_of_epoch]
    sensor1_nbar = sensor1_nbar.where(s1_good_data)

    #Fix the TSS coefficients for each sensor
    all_indices = [  #'BRIGHT','GREEN','WET',
        'NDVI', 'NBR', 'NDWI', 'TSS'
    ]
    sensor1_rsindex = {}
    for i, name in enumerate(all_indices):
        #sensor1_rsindex['BRIGHT'] = pd.DataFrame((s1[0]*0.3037)+(s1[1]*0.2793)+(s1[2]*0.4343)+(s1[3]*0.5585)+(s1[4]*0.5082)+(s1[0]*0.1863))
        #sensor1_rsindex['GREEN'] = pd.DataFrame((s1[0]*-0.2848)+(s1[1]*-0.2435)+(s1[2]*-0.5436)+(s1[3]*0.7243)+(s1[4]*0.0840)+(s1[0]*-0.1800))
        #sensor1_rsindex['WET'] = pd.DataFrame((s1[0]*0.1509)+(s1[1]*0.1793)+(s1[2]*0.3299)+(s1[3]*0.3406)+(s1[4]*-0.7112)+(s1[0]*-0.4572))
        #sensor1_rsindex['NDVI'] = ((sensor1_nbar['nir']-sensor1_nbar['red'])/(sensor1_nbar['nir']+sensor1_nbar['red']))
        #sensor1_rsindex['NDWI'] = ((sensor1_nbar['swir1']-sensor1_nbar['green'])/(sensor1_nbar['swir1']+sensor1_nbar['green']))
        sensor1_rsindex['NBR'] = (
            (sensor1_nbar['nir'] - sensor1_nbar['swir2']) /
            (sensor1_nbar['nir'] + sensor1_nbar['swir2']))
        #Need this to reference into a tuple - Check with Damien
        #sensor1_rsindex['TSS'] = (ls8_tss_constant*((sensor1_nbar['green']+sensor1_nbar['red'])/20000)**ls8_tss_exponent)

    sensor2_nbar = dc.load(product=sensor2 + '_nbar_albers',
                           group_by='solar_day',
                           measurements=bands_of_interest,
                           **query)
    sensor2_pq = dc.load(product=sensor2 + '_pq_albers',
                         group_by='solar_day',
                         fuse_func=pq_fuser,
                         **query)

    sensor2_nbar = sensor2_nbar.sel(time=sensor2_pq.time)

    s2_cloud_free = masking.make_mask(sensor2_pq, ga_good_pixel=True)
    s2_good_data = s2_cloud_free.pixelquality.loc[start_of_epoch:end_of_epoch]
    sensor2_nbar = sensor2_nbar.where(s2_good_data)

    all_indices = [  #'BRIGHT','GREEN','WET',
        'NDVI', 'NBR', 'NDWI', 'TSS'
    ]
    sensor2_rsindex = {}
    for i, name in enumerate(all_indices):
        #sensor2_rsindex['BRIGHT'] = pd.DataFrame((s1[0]*0.3037)+(s1[1]*0.2793)+(s1[2]*0.4343)+(s1[3]*0.5585)+(s1[4]*0.5082)+(s1[0]*0.1863))
        #sensor2_rsindex['GREEN'] = pd.DataFrame((s1[0]*-0.2848)+(s1[1]*-0.2435)+(s1[2]*-0.5436)+(s1[3]*0.7243)+(s1[4]*0.0840)+(s1[0]*-0.1800))
        #sensor2_rsindex['WET'] = pd.DataFrame((s1[0]*0.1509)+(s1[1]*0.1793)+(s1[2]*0.3299)+(s1[3]*0.3406)+(s1[4]*-0.7112)+(s1[0]*-0.4572))
        #sensor2_rsindex['NDVI'] = ((sensor2_nbar['nir']-sensor2_nbar['red'])/(sensor2_nbar['nir']+sensor2_nbar['red']))
        #sensor2_rsindex['NDWI'] = ((sensor2_nbar['swir1']-sensor2_nbar['green'])/(sensor2_nbar['swir1']+sensor2_nbar['green']))
        sensor2_rsindex['NBR'] = (
            (sensor2_nbar['nir'] - sensor2_nbar['swir2']) /
            (sensor2_nbar['nir'] + sensor2_nbar['swir2']))
        #sensor2_rsindex['TSS'] = (ls7_tss_constant*((sensor2_nbar['green']+sensor2_nbar['red'])/20000)**ls7_tss_exponent)

    sensor3_nbar = dc.load(product=sensor3 + '_nbar_albers',
                           group_by='solar_day',
                           measurements=bands_of_interest,
                           **query)
    sensor3_pq = dc.load(product=sensor3 + '_pq_albers',
                         group_by='solar_day',
                         fuse_func=pq_fuser,
                         **query)

    sensor3_nbar = sensor3_nbar.sel(time=sensor3_pq.time)

    s3_cloud_free = masking.make_mask(sensor3_pq, ga_good_pixel=True)
    s3_good_data = s3_cloud_free.pixelquality.loc[start_of_epoch:end_of_epoch]
    sensor3_nbar = sensor3_nbar.where(s3_good_data)

    all_indices = [  #'BRIGHT','GREEN','WET',
        'NDVI', 'NBR', 'NDWI', 'TSS'
    ]
    sensor3_rsindex = {}
    for i, name in enumerate(all_indices):
        #sensor2_rsindex['BRIGHT'] = pd.DataFrame((s1[0]*0.3037)+(s1[1]*0.2793)+(s1[2]*0.4343)+(s1[3]*0.5585)+(s1[4]*0.5082)+(s1[0]*0.1863))
        #sensor2_rsindex['GREEN'] = pd.DataFrame((s1[0]*-0.2848)+(s1[1]*-0.2435)+(s1[2]*-0.5436)+(s1[3]*0.7243)+(s1[4]*0.0840)+(s1[0]*-0.1800))
        #sensor2_rsindex['WET'] = pd.DataFrame((s1[0]*0.1509)+(s1[1]*0.1793)+(s1[2]*0.3299)+(s1[3]*0.3406)+(s1[4]*-0.7112)+(s1[0]*-0.4572))
        #sensor3_rsindex['NDVI'] = ((sensor3_nbar['nir']-sensor3_nbar['red'])/(sensor3_nbar['nir']+sensor3_nbar['red']))
        #sensor3_rsindex['NDWI'] = ((sensor3_nbar['swir1']-sensor3_nbar['green'])/(sensor3_nbar['swir1']+sensor3_nbar['green']))
        sensor3_rsindex['NBR'] = (
            (sensor3_nbar['nir'] - sensor3_nbar['swir2']) /
            (sensor3_nbar['nir'] + sensor3_nbar['swir2']))
        #sensor3_rsindex['TSS'] = ((sensor3_nbar['green']+sensor3_nbar['red'])/2)
        #sensor3_rsindex['TSS'] = (ls5_tss_constant*((sensor3_nbar['green']+sensor3_nbar['red'])/20000)**ls5_tss_exponent)

        #Concatenate and sort the different sensor xarrays into a single xarray

    nbar_clean = xr.concat([sensor1_nbar, sensor2_nbar, sensor3_nbar],
                           dim='time')
    time_sorted = nbar_clean.time.argsort()
    nbar_clean = nbar_clean.isel(time=time_sorted)
    nbar_clean.attrs['affine'] = affine
    '''
	all_tss_sorted = xr.concat([sensor1_rsindex['TSS'], sensor2_rsindex['TSS'], sensor3_rsindex['TSS']], dim='time')
	time_sorted = all_tss_sorted.time.argsort()
	all_tss_sorted = all_tss_sorted.isel(time=time_sorted)'''
    """all_ndvi_sorted = xr.concat([sensor1_rsindex['NDVI'], sensor2_rsindex['NDVI'], sensor3_rsindex['NDVI']], dim='time')
	time_sorted = all_ndvi_sorted.time.argsort()
	all_ndvi_sorted = all_ndvi_sorted.isel(time=time_sorted)
	"""
    all_nbr_sorted = xr.concat([
        sensor1_rsindex['NBR'], sensor2_rsindex['NBR'], sensor3_rsindex['NBR']
    ],
                               dim='time')
    time_sorted = all_nbr_sorted.time.argsort()
    all_nbr_sorted = all_nbr_sorted.isel(time=time_sorted)
    all_nbr_sorted.attrs['affine'] = affine

    #clean up per sensor xarrays to free up some memory
    del sensor1_nbar
    del sensor2_nbar
    del sensor3_nbar
    del sensor1_rsindex
    del sensor2_rsindex
    del sensor3_rsindex

    print 'The number of time slices at this location is'
    print all_nbr_sorted.shape[0]

Example #26

0

Show file

print(lon_range, lat_range)
print(crs)

for platform in platform_list:
    product_name = '{}_{}_albers'.format(platform, product_type)
    print('Loading product: {}'.format(product_name))
    output_file = '/g/data/u46/users/dra547/erf_07_09_2013_' + product_name + '.cdf'
    print(output_file)

    dataset = dc.load(product=product_name,
                      x=lon_range,
                      y=lat_range,
                      time=(acq_min, acq_max),
                      group_by='solar_day',
                      crs=crs,
                      measurements=measurements_list)
    # Load PQ Mask
    mask_product = '{}_{}_albers'.format(platform, 'pq')
    sensor_pq = dc.load(product=mask_product,
                        group_by='solar_day',
                        fuse_func=ga_pq_fuser,
                        like=dataset)
    cloud_free = make_mask(sensor_pq.pixelquality, ga_good_pixel=True)

    dataset = dataset.where(cloud_free).fillna(-999).astype('int16')
    dataset.attrs[
        'crs'] = sensor_pq.crs  # Temporarily required until xarray issue #1009 gets into a release

    print(dataset)
    write_dataset_to_netcdf(dataset, output_file)

Example #27

0

Show file

def get_map(args):
    # Version parameter
    # GetMap 1.1.1 must be supported for Terria
    version = get_arg(args,
                      "version",
                      "WMS version",
                      permitted_values=["1.1.1", "1.3.0"])

    # CRS parameter
    if version == "1.1.1":
        crs_arg = "srs"
    else:
        crs_arg = "crs"
    crsid = get_arg(args,
                    crs_arg,
                    "Coordinate Reference System",
                    errcode=WMSException.INVALID_CRS,
                    permitted_values=service_cfg["published_CRSs"].keys())
    crs = geometry.CRS(crsid)

    # Layers and Styles parameters
    product = get_product_from_arg(args)
    styles = args.get("styles", "").split(",")
    if len(styles) != 1:
        raise WMSException("Multi-layer GetMap requests not supported")
    style_r = styles[0]
    if not style_r:
        style_r = product.platform.default_style
    style = product.platform.style_index.get(style_r)
    if not style:
        raise WMSException("Style %s is not defined" % style_r,
                           WMSException.STYLE_NOT_DEFINED,
                           locator="Style parameter")

    # Format parameter
    fmt = get_arg(args,
                  "format",
                  "image format",
                  errcode=WMSException.INVALID_FORMAT,
                  lower=True,
                  permitted_values=["image/png"])

    # BBox, height and width parameters
    geobox = _get_geobox(args, crs)

    # Zoom Factor
    zf = zoom_factor(args, crs)

    # Time parameter
    time = get_time(args, product)

    # Tiling.
    tiler = RGBTileGenerator(product, geobox, time, style=style)
    dc = get_cube()
    datasets = tiler.datasets(dc.index)
    if style.pq_mask_flags:
        pq_datasets = tiler.datasets(dc.index, mask=True)
    else:
        pq_datasets = None
    if not datasets:
        body = _write_empty(geobox)
    elif zf < product.min_zoom:
        # Zoomed out to far to properly render data.
        # Construct a polygon which is the union of the extents of the matching datasets.
        extent = None
        for ds in datasets:
            if extent:
                extent = extent.union(ds.extent)
            else:
                extent = ds.extent
        extent = extent.to_crs(geobox.crs)

        body = _write_polygon(geobox, extent, product.zoom_fill)
    else:
        masks = []
        data = tiler.data(datasets)
        for band in style.needed_bands:
            extent_mask = (data[band] != data[band].attrs['nodata'])
        if pq_datasets:
            # ??????
            # sources = datacube.Datacube.group_datasets(datasets, datacube.api.query.query_group_by())
            # pq_sources = datacube.Datacube.group_datasets(pq_datasets, datacube.api.query.query_group_by())
            # sources, pq_sources = xarray.align(sources, pq_sources)

            pq_data = tiler.data(pq_datasets, mask=True)

            mask = make_mask(pq_data, **style.pq_mask_flags)
            mask_data = mask.pixelquality
            masks.append(mask_data)

        if data:
            body = _write_png(data, style, extent_mask, *masks)
        else:
            body = _write_empty(geobox)
    release_cube(dc)
    return body, 200, resp_headers({"Content-Type": "image/png"})

Example #28

0

Show file

File: DEADataHandling.py Project: davecollett/dea-notebooks

def load_clearlandsat(dc, query, sensors=('ls5', 'ls7', 'ls8'), product='nbart',
                      bands_of_interest=None, masked_prop=0.99, mask_dict=None,
                      mask_pixel_quality=False, mask_invalid_data=True, ls7_slc_off=False, satellite_metadata=False):
    
    """Load cloud-free data from multiple Landsat satellites as an xarray dataset
    
    Loads Landsat NBAR, NBART or FC25 and PQ data for multiple sensors (i.e. ls5, ls7, ls8) and returns a single 
    xarray dataset containing only observations that contain greater than a given proportion of good quality pixels.
    This function can be used to extract visually appealing time series of observations that are not affected by cloud,
    for example as an input to the `animated_timeseries` function from `DEAPlotting`.
    
    The proportion of clear pixels is calculated by summing the pixels that are not flagged as being poor quality
    in the Landsat PQ25 layer. By default only cloudy pixels or pixels that are missing data in any band are
    used to calculate the number of poor quality pixels, but this can be customised using the `mask_dict` parameter.
    
    Last modified: October 2018
    Author: Robbi Bishop-Taylor, Bex Dunn    
    
    Parameters
    ----------    
    dc : datacube Datacube object
        A specific Datacube to import from, i.e. `dc = datacube.Datacube(app='Clear Landsat')`. This allows you to 
        also use development datacubes if they have been imported into the environment.    
    query : dict
        A dict containing the query bounds. Can include lat/lon, time etc. If no `time` query is given, the 
        function defaults to all timesteps available to all sensors (e.g. 1987-2018)
    sensors : list, optional
        An optional list of Landsat sensor names to load data for. Options are 'ls5', 'ls7', 'ls8'; defaults to all.
    product : str, optional
        An optional string specifying 'nbar', 'nbart' or 'fc'. Defaults to 'nbart'. For information on the difference, 
        see the '02_DEA_datasets/Introduction_to_Landsat' or '02_DEA_datasets/Introduction_to_Fractional_Cover'
        notebooks from DEA-notebooks.
    bands_of_interest : list, optional
        An optional list of strings containing the bands to be read in; options include 'red', 'green', 'blue', 
        'nir', 'swir1', 'swir2'; defaults to all available bands if no bands are specified.
    masked_prop : float, optional
        An optional float giving the minimum percentage of clear pixels required for a Landsat observation to be 
        loaded. Defaults to 0.99 (i.e. only return observations with less than 1% of poor quality pixels).
    mask_dict : dict, optional
        An optional dict of arguments to the `masking.make_mask` function that can be used to identify good/poor
        quality pixels from the PQ layer using alternative masking criteria. The default value of None masks
        out pixels flagged as cloud by either the ACCA or Fmask algorithms, or pixels that are missing data in any
        band (equivalent to: `mask_dict={'cloud_acca': 'no_cloud', 'cloud_fmask': 'no_cloud', 'contiguous': True}`.
        See the `02_DEA_datasets/Introduction_to_LandsatPQ.ipynb` notebook on DEA Notebooks for a list of all
        possible options.
    mask_pixel_quality : bool, optional
        An optional boolean indicating whether to apply the pixel quality mask to all observations that were not
        filtered out for having less good quality pixels that `masked_prop`. For example, if `masked_prop=0.99`, the
        filtered images may still contain up to 1% poor quality pixels. The default of False simply returns the
        resulting observations without masking out these pixels; True masks them out and sets them to NaN using the
        pixel quality mask, but has the side effect of changing the data type of the output arrays from int16 to
        float64 which can cause memory issues. To reduce memory usage, set to False.
    mask_invalid_data : bool, optional
        An optional boolean indicating whether invalid -999 nodata values should be replaced with NaN. Defaults to
        True; this has the side effect of changing the data type of the output arrays from int16 to float64 which
        can cause memory issues. To reduce memory usage, set to False.
    ls7_slc_off : bool, optional
        An optional boolean indicating whether to include data from after the Landsat 7 SLC failure (i.e. SLC-off).
        Defaults to False, which removes all Landsat 7 observations after May 31 2003. 
    satellite_metadata : bool, optional
        An optional boolean indicating whether to return the dataset with a `satellite` variable that gives the name 
        of the satellite that made each observation in the timeseries (i.e. ls5, ls7, ls8). Defaults to False. 
    
    Returns
    -------
    combined_ds : xarray Dataset
        An xarray dataset containing only Landsat observations that contain greater than `masked_prop`
        proportion of clear pixels.   
        
    Notes
    -----
    Memory issues: For large data extractions, it is recommended that you set both `mask_pixel_quality=False` and 
    `mask_invalid_data=False`. Otherwise, all output variables will be coerced to float64 when NaN values are 
    inserted into the array, potentially causing your data to use 4x as much memory. Be aware that the resulting
    arrays will contain invalid -999 values which should be considered in analyses.
        
    Example
    -------    
    >>> # Import modules
    >>> import datacube
    >>> import sys

    >>> # Import dea-notebooks functions using relative link to 10_Scripts directory
    >>> sys.path.append('../10_Scripts')
    >>> import DEADataHandling

    >>> # Connect to a datacube containing Landsat data
    >>> dc = datacube.Datacube(app='load_clearlandsat')

    >>> # Set up spatial and temporal query
    >>> query = {'x': (954163, 972163),
    ...          'y': (-3573891, -3555891),
    ...          'time': ('2011-06-01', '2013-06-01'),
    ...          'crs': 'EPSG:3577'}   

    >>> # Load observations with less than 25% cloud from ls5, ls7 and ls8 as a single combined dataset
    >>> landsat_ds = DEADataHandling.load_clearlandsat(dc=dc, query=query, sensors=['ls5', 'ls7', 'ls8'], 
    ...                                    bands_of_interest=['red', 'green', 'blue'], 
    ...                                    masked_prop=0.75, mask_pixel_quality=True, ls7_slc_off=True)
    Loading ls5 pixel quality
        Loading 4 filtered ls5 timesteps
    Loading ls7 pixel quality
        Loading 29 filtered ls7 timesteps
    Loading ls8 pixel quality
        Loading 3 filtered ls8 timesteps
    Combining and sorting ls5, ls7, ls8 data
        Replacing invalid -999 values with NaN (data will be coerced to float64)

    >>> # Test that function returned data
    >>> len(landsat_ds.time) > 0
    True
                
    """    

    # List to save results from each sensor and list to keep names of successfully processed sensors
    filtered_sensors = []
    successfully_returned = []

    # Iterate through all sensors, returning only observations with > mask_prop clear pixels
    for sensor in sensors:
        
        try:
            
            # If bands of interest are given, assign measurements in dc.load call. This is
            # for compatibility with the existing dea-notebooks load_nbarx function.
            if bands_of_interest:
                
                # Lazily load Landsat data using dask              
                data = dc.load(product=f'{sensor}_{product}_albers',
                               measurements=bands_of_interest,
                               group_by='solar_day', 
                               dask_chunks={'time': 1},
                               **query)

            # If no bands of interest given, run without specifying measurements, and 
            # therefore return all available bands
            else:
                
                # Lazily load Landsat data using dask  
                data = dc.load(product=f'{sensor}_{product}_albers',
                               group_by='solar_day', 
                               dask_chunks={'time': 1},
                               **query)             

            # Load PQ data
            pq = dc.load(product=f'{sensor}_pq_albers',
                         group_by='solar_day',
                         fuse_func=ga_pq_fuser,
                         dask_chunks={'time': 1},
                         **query)

            # Remove Landsat 7 SLC-off from PQ layer if ls7_slc_off=False
            if not ls7_slc_off and sensor == 'ls7':

                print('Ignoring SLC-off observations for ls7')
                data = data.sel(time=data.time < np.datetime64('2003-05-30'))

            # Return only Landsat observations that have matching PQ data 
            time = (data.time - pq.time).time
            data = data.sel(time=time)
            pq = pq.sel(time=time)

            # Load PQ data using dask
            print('Loading {} pixel quality'.format(sensor))
            pq = pq.compute()
            
            # If a custom dict is provided for mask_dict, use these values to make mask from PQ
            if mask_dict:
                
                # Mask PQ using custom values by unpacking mask_dict **kwarg
                good_quality = masking.make_mask(pq.pixelquality, **mask_dict)
                
            else:

                # Identify pixels with no clouds in either ACCA for Fmask
                good_quality = masking.make_mask(pq.pixelquality,
                                                 cloud_acca='no_cloud',
                                                 cloud_fmask='no_cloud',
                                                 contiguous=True)

            # Compute good data for each observation as a percentage of total array pixels
            data_perc = good_quality.sum(dim=['x', 'y']) / (good_quality.shape[1] * good_quality.shape[2])
            
            # Add data_perc data to Landsat dataset as a new xarray variable
            data['data_perc'] = xr.DataArray(data_perc, [('time', data.time)])

            # Filter by data_perc to drop low quality observations and finally import data using dask
            filtered = data.sel(time=data.data_perc >= masked_prop)
            print(f'    Loading {len(filtered.time)} filtered {sensor} timesteps')
            filtered = filtered.compute()
            
            # Optionally apply pixel quality mask to all observations that were not dropped in previous step
            if mask_pixel_quality:
                filtered = filtered.where(good_quality)

            # Optionally add satellite name variable
            if satellite_metadata:
                filtered['satellite'] = xr.DataArray([sensor] * len(filtered.time), [('time', filtered.time)])

            # Append result to list and add sensor name to list of successfully sensors
            filtered_sensors.append(filtered)
            successfully_returned.append(sensor)
            
            # Close datasets
            filtered = None
            good_quality = None
            data = None
            pq = None            
                        
        except:
            
            # If there is no data for sensor or if another error occurs:
            print(f'Loading {sensor} pixel quality\n    Skipping {sensor}; no valid data for query')

    # Concatenate all sensors into one big xarray dataset, and then sort by time 
    sensor_string = ", ".join(successfully_returned)
    print(f'Combining and sorting {sensor_string} data')
    combined_ds = xr.concat(filtered_sensors, dim='time')
    combined_ds = combined_ds.sortby('time')                                                               
       
    # Optionally filter to replace no data values with nans
    if mask_invalid_data:
        print('    Replacing invalid -999 values with NaN (data will be coerced to float64)')
        combined_ds = masking.mask_invalid_data(combined_ds)

    # Return combined dataset
    return combined_ds

Example #29

0

Show file

File: Calc_AnnualNDVI_Tiles.py Project: GeoscienceAustralia/agdc_mangrovemonitoring

def calcMangNDVIMangPxlFromCube(tileNCFile, tileNCAMCFile, tileNCCMCFile,
                                tileAFile, minLat, maxLat, minLon, maxLon,
                                year, mangShpExt, ndviThresLow, ndviThresHigh):

    dc = datacube.Datacube(app='CalcAnnualMangroveExtent')

    #Define wavelengths/bands of interest, remove this kwarg to retrieve all bands
    bands_of_interest = ['red', 'nir']

    #Define sensors of interest
    sensors = ['ls8', 'ls7', 'ls5']

    #define temporal range
    start_of_epoch = year + '-01-01'
    # latest observation
    end_of_epoch = year + '-12-31'

    query = {
        'time': (start_of_epoch, end_of_epoch),
    }
    query['x'] = (minLon, maxLon)
    query['y'] = (maxLat, minLat)
    query['crs'] = 'EPSG:4326'

    #Define which pixel quality artefacts you want removed from the results
    mask_components = {
        'cloud_acca': 'no_cloud',
        'cloud_shadow_acca': 'no_cloud_shadow',
        'cloud_shadow_fmask': 'no_cloud_shadow',
        'cloud_fmask': 'no_cloud',
        'blue_saturated': False,
        'green_saturated': False,
        'red_saturated': False,
        'nir_saturated': False,
        'swir1_saturated': False,
        'swir2_saturated': False,
        'contiguous': True
    }

    print("Read pixel image data into memory.")
    sensor_clean = {}
    for sensor in sensors:
        print(sensor)
        #Load the NBAR and corresponding PQ
        sensor_nbar = dc.load(product=sensor + '_nbar_albers',
                              group_by='solar_day',
                              measurements=bands_of_interest,
                              **query)
        if bool(sensor_nbar):
            sensor_pq = dc.load(product=sensor + '_pq_albers',
                                group_by='solar_day',
                                fuse_func=pq_fuser,
                                **query)
            # Get the projection info
            crswkt = sensor_nbar.crs.wkt
            affine = sensor_nbar.affine
            # Apply the PQ masks to the NBAR
            cloud_free = masking.make_mask(sensor_pq, **mask_components)
            good_data = cloud_free.pixelquality.loc[
                start_of_epoch:end_of_epoch]
            sensor_nbar = sensor_nbar.where(good_data)
            sensor_clean[sensor] = sensor_nbar

    if bool(sensor_clean):
        print("Merge data from different sensors.")
        nbar_clean = xarray.concat(sensor_clean.values(), dim='time')
        time_sorted = nbar_clean.time.argsort()
        nbar_clean = nbar_clean.isel(time=time_sorted)
        nbar_clean.attrs['affine'] = affine
        nbar_clean.attrs['crs'] = crswkt

        print(
            "\'Clean\' up the Red and NIR bands to remove any values less than zero."
        )
        nbar_clean['red'] = nbar_clean.red.where(nbar_clean.red > 0)
        nbar_clean['nir'] = nbar_clean.nir.where(nbar_clean.nir > 0)

        print("Calculate NDVI.")
        ndvi = ((nbar_clean.nir - nbar_clean.red) /
                (nbar_clean.nir + nbar_clean.red))
        ndvi.attrs['affine'] = affine
        ndvi.attrs['crs'] = crswkt

        print("Create Composite")
        ndviMean = ndvi.mean(dim='time')
        ndviMean.attrs['affine'] = affine
        ndviMean.attrs['crs'] = crswkt

        print("Rasterise the GMW extent map for the area of interest.")
        # Define pixel size and NoData value of new raster
        xres = nbar_clean.attrs['affine'][0]
        yres = nbar_clean.attrs['affine'][4]
        noDataVal = -9999

        # Set the geotransform properties
        xcoord = ndviMean.coords['x'].min()
        ycoord = ndviMean.coords['y'].max()
        geotransform = (xcoord - (xres * 0.5), xres, 0, ycoord + (yres * 0.5),
                        0, yres)

        # Open the data source and read in the extent
        source_ds = ogr.Open(mangShpExt)
        source_layer = source_ds.GetLayer()
        source_srs = source_layer.GetSpatialRef()
        vx_min, vx_max, vy_min, vy_max = source_layer.GetExtent(
        )  # This is extent of Australia

        # Create the destination extent
        yt, xt = ndviMean.shape

        # Set up 'in-memory' gdal image to rasterise the shapefile too
        target_ds = gdal.GetDriverByName('MEM').Create('', xt, yt,
                                                       gdal.GDT_Byte)
        target_ds.SetGeoTransform(geotransform)
        albers = osr.SpatialReference()
        albers.ImportFromEPSG(3577)
        target_ds.SetProjection(albers.ExportToWkt())
        band = target_ds.GetRasterBand(1)
        band.SetNoDataValue(noDataVal)

        # Rasterise
        gdal.RasterizeLayer(target_ds, [1], source_layer, burn_values=[1])

        # Read as array the GMW mask
        gmwMaskArr = band.ReadAsArray()

        print("Apply the GMW Mask to the NDVI values")
        mangroveNDVIMean = ndviMean.where(gmwMaskArr == 1)

        print(
            "Apply thresholds to NDVI to find total mangrove mask and closed canopy mangrove mask."
        )
        mangroveAreaPxlC = mangroveNDVIMean > ndviThresLow
        clMangroveAreaPxlC = mangroveNDVIMean > ndviThresHigh

        print(
            "Calculate the number of pixels within the mangrove mask and write to CSV file."
        )
        numMangPxls = numpy.sum(mangroveAreaPxlC.data)
        numClMangPxls = numpy.sum(clMangroveAreaPxlC.data)

        pxlCountSeries = pandas.Series([numMangPxls, numClMangPxls],
                                       index=['MangPxls', 'MangPxlsCl'])
        pxlCountSeries.to_csv(tileAFile)

        mangroveAreaPxlC.attrs['affine'] = affine
        mangroveAreaPxlC.attrs['crs'] = crswkt
        print("Save MangroveAreaPxlC to netcdf")
        mangroveAreaPxlC.to_netcdf(path=tileNCAMCFile, mode='w')

        clMangroveAreaPxlC.attrs['affine'] = affine
        clMangroveAreaPxlC.attrs['crs'] = crswkt
        print("Save clMangroveAreaPxlC to netcdf")
        clMangroveAreaPxlC.to_netcdf(path=tileNCCMCFile, mode='w')

        mangroveNDVIMean.attrs['affine'] = affine
        mangroveNDVIMean.attrs['crs'] = crswkt
        print("Save Composite to netcdf")
        mangroveNDVIMean.to_netcdf(path=tileNCFile, mode='w')

Example #30

0

Show file

File: deafrica_datahandling.py Project: nikhil003/deafrica-sandbox-notebooks

def load_ard(dc,
             products=None,
             min_gooddata=0.0,
             pq_categories_s2=['vegetation','snow or ice',
                               'water','bare soils',
                               'unclassified', 'dark area pixels'],
             pq_categories_ls=None,
             mask_pixel_quality=True,
             ls7_slc_off=True,
             predicate=None,
             dtype='auto',
             **kwargs):

    '''
    Loads and combines Landsat Collections 1 or 2, and Sentinel-2 for 
    multiple sensors (i.e. ls5t, ls7e and ls8c for Landsat; s2a and s2b for Sentinel-2), 
    optionally applies pixel quality masks, and drops time steps that 
    contain greater than a minimum proportion of good quality (e.g. non-
    cloudy or shadowed) pixels. 
    The function supports loading the following DEA frica products:
    
        ls5_usgs_sr_scene
        ls7_usgs_sr_scene
        ls8_usgs_sr_scene
        usgs_ls8c_level2_2
        ga_ls8c_fractional_cover_2
        s2a_msil2a
        s2b_msil2a

    Last modified: March 2020
    
    Parameters
    ----------
    dc : datacube Datacube object
        The Datacube to connect to, i.e. `dc = datacube.Datacube()`.
        This allows you to also use development datacubes if required.
    products : list
        A list of product names to load data from. Valid options are
        Landsat C1: ['ls5_usgs_sr_scene', 'ls7_usgs_sr_scene', 'ls8_usgs_sr_scene'],
        Landsat C2: ['usgs_ls8c_level2_2']
        Sentinel-2: ['s2a_msil2a', 's2b_msil2a']
    min_gooddata : float, optional
        An optional float giving the minimum percentage of good quality
        pixels required for a satellite observation to be loaded.
        Defaults to 0.0 which will return all observations regardless of
        pixel quality (set to e.g. 0.99 to return only observations with
        more than 99% good quality pixels).
    pq_categories_s2 : list, optional
        An optional list of Sentinel-2 Scene Classification Layer (SCL) names 
        to treat as good quality observations in the above `min_gooddata` 
        calculation. The default is ['vegetation','snow or ice','water',
        'bare soils','unclassified', 'dark area pixels'] which will return
        non-cloudy or non-shadowed land, snow, water, veg, and non-veg pixels.
    pq_categories_ls : dict, optional
        An optional dictionary that is used to generate a good quality 
        pixel mask from the selected USGS product's pixel quality band (i.e. 
        'pixel_qa' for USGS Collection 1, and 'quality_l2_aerosol' for
        USGS Collection 2). This mask is used for both masking out low
        quality pixels (e.g. cloud or shadow), and for dropping 
        observations entirely based on the above `min_gooddata` 
        calculation. Default is None, which will apply the following mask 
        for USGS Collection 1: `{'cloud': 'no_cloud', 'cloud_shadow': 
        'no_cloud_shadow', 'nodata': False}`, and for USGS Collection 2:
        `{'cloud_shadow': 'not_cloud_shadow', 'cloud_or_cirrus': 
        'not_cloud_or_cirrus', 'nodata': False}.
    mask_pixel_quality : bool, optional
        An optional boolean indicating whether to apply the good data
        mask to all observations that were not filtered out for having
        less good quality pixels than `min_gooddata`. E.g. if
        `min_gooddata=0.99`, the filtered observations may still contain
        up to 1% poor quality pixels. The default of False simply
        returns the resulting observations without masking out these
        pixels; True masks them and sets them to NaN using the good data
        mask. This will convert numeric values to floating point values
        which can cause memory issues, set to False to prevent this.
    ls7_slc_off : bool, optional
        An optional boolean indicating whether to include data from
        after the Landsat 7 SLC failure (i.e. SLC-off). Defaults to
        True, which keeps all Landsat 7 observations > May 31 2003.
    predicate : function, optional
        An optional function that can be passed in to restrict the
        datasets that are loaded by the function. A filter function
        should take a `datacube.model.Dataset` object as an input (i.e.
        as returned from `dc.find_datasets`), and return a boolean.
        For example, a filter function could be used to return True on
        only datasets acquired in January:
        `dataset.time.begin.month == 1`
    dtype : string, optional
        An optional parameter that controls the data type/dtype that
        layers are coerced to after loading. Valid values: 'native', 
        'auto', 'float{16|32|64}'. When 'auto' is used, the data will be 
        converted to `float32` if masking is used, otherwise data will 
        be returned in the native data type of the data. Be aware that
        if data is loaded in its native dtype, nodata and masked 
        pixels will be returned with the data's native nodata value 
        (typically -999), not NaN. 
    **kwargs :
        A set of keyword arguments to `dc.load` that define the
        spatiotemporal query used to extract data. This typically
        includes `measurements`, `x`, `y`, `time`, `resolution`,
        `resampling`, `group_by` and `crs`. Keyword arguments can
        either be listed directly in the `load_ard` call like any
        other parameter (e.g. `measurements=['nbart_red']`), or by
        passing in a query kwarg dictionary (e.g. `**query`). For a
        list of possible options, see the `dc.load` documentation:
        https://datacube-core.readthedocs.io/en/latest/dev/api/generate/datacube.Datacube.load.html
        
    Returns
    -------
    combined_ds : xarray Dataset
        An xarray dataset containing only satellite observations that
        contains greater than `min_gooddata` proportion of good quality
        pixels.
        
    '''

    #########
    # Setup #
    #########
    # prevent function altering original query object
    kwargs = deepcopy(kwargs)

    # We deal with `dask_chunks` separately
    dask_chunks = kwargs.pop('dask_chunks', None)
    requested_measurements = kwargs.pop('measurements', None)

    # Warn user if they combine lazy load with min_gooddata
    if (min_gooddata > 0.0) and dask_chunks is not None:
        warnings.warn("Setting 'min_gooddata' percentage to > 0.0 "
                      "will cause dask arrays to compute when "
                      "loading pixel-quality data to calculate "
                      "'good pixel' percentage. This can "
                      "slow the return of your dataset.")
    
    # Verify that products were provided and determine if Sentinel-2
    # or Landsat data is being loaded
    if not products:
        raise ValueError(f'Please provide a list of product names '
                         f'to load data from.')
        
    elif all(['level2' in product for product in products]):
        product_type = 'c2'
    elif all(['sr' in product for product in products]):
        product_type = 'c1'
    elif all(['s2' in product for product in products]):
        product_type = 's2'
    elif all(['fractional_cover' in product for product in products]):
        product_type = 'fc'
                         
    # If `measurements` are specified but do not include pixel quality bands,
    #  add these to `measurements` according to collection
    if (product_type == 'c2') or (product_type == 'fc'):
        print('Using pixel quality parameters for USGS Collection 2')
        fmask_band = 'quality_l2_aerosol'
                        
    elif product_type == 'c1':
        print('Using pixel quality parameters for USGS Collection 1')
        fmask_band = 'pixel_qa'
    
    elif product_type == 's2':
        print('Using pixel quality parameters for Sentinel 2')
        fmask_band = 'scl'
    
    measurements = requested_measurements.copy() if requested_measurements else None
    
    # Deal with "load all" case: pick a set of bands common across 
    # all products    
    if measurements is None:
        if product_type == 'fc':
            measurements = ['pv', 'npv', 'bs', 'ue']
        else:
            measurements = _common_bands(dc, products)
    
    # If `measurements` are specified but do not include pq, add.
    if measurements:
        #pass if FC
        if product_type == 'fc':
            pass
        else:
            if fmask_band not in measurements:
                measurements.append(fmask_band)
    
    # Get list of data and mask bands so that we can later exclude
    # mask bands from being masked themselves
    if product_type == 'fc':
        pass
    else:
        data_bands = [band for band in measurements if band not in (fmask_band)]
        mask_bands = [band for band in measurements if band not in data_bands]
    
    #################
    # Find datasets #
    #################l
    
    # Pull out query params only to pass to dc.find_datasets
    query = _dc_query_only(**kwargs)
    
    # Extract datasets for each product using subset of dcload_kwargs
    dataset_list = []
     
    # Get list of datasets for each product
    print('Finding datasets')
    for product in products:

        # Obtain list of datasets for product
        print(f'    {product}')
        datasets = dc.find_datasets(product=product, **query)
        
        # Remove Landsat 7 SLC-off observations if ls7_slc_off=False
        if not ls7_slc_off and product in ['ls7_usgs_sr_scene', 
                                           'usgs_ls7e_level2_2']:
            print('    Ignoring SLC-off observations for ls7')
            datasets = [i for i in datasets if i.time.begin <
                        datetime.datetime(2003, 5, 31)]

        # Add any returned datasets to list
        dataset_list.extend(datasets)       
    
    # Raise exception if no datasets are returned
    if len(dataset_list) == 0:
        raise ValueError("No data available for query: ensure that "
                         "the products specified have data for the "
                         "time and location requested")

    # If pedicate is specified, use this function to filter the list
    # of datasets prior to load
    if predicate:
        print(f'Filtering datasets using filter function')
        dataset_list = [ds for ds in dataset_list if predicate(ds)]
            
    # Raise exception if filtering removes all datasets
    if len(dataset_list) == 0:
        raise ValueError("No data available after filtering with "
                         "filter function")
    
    # load fmask from C2 for masking FC, and filter if required
    # NOTE: This works because only one sensor (ls8) has FC, if/when
    # FC is calculated for LS7, LS5, will need to move this section
    # into the for loop above.
    if product_type == 'fc':
              
        print('    PQ data from USGS C2')
        dataset_list_fc_pq = dc.find_datasets(product='usgs_ls8c_level2_2', **query)
        
        if predicate:
            print(f'Filtering datasets using filter function')
            dataset_list_fc_pq = [ds for ds in dataset_list_fc_pq if predicate(ds)]

    #############
    # Load data #
    #############
    # Note we always load using dask here so that
    # we can lazy load data before filtering by good data
    ds = dc.load(datasets=dataset_list,
                 measurements=measurements,
                 dask_chunks={} if dask_chunks is None else dask_chunks,
                 **kwargs)
   
    if product_type == 'fc':
        ds_fc_pq = dc.load(datasets=dataset_list_fc_pq,
                           dask_chunks={} if dask_chunks is None else dask_chunks,
                           **kwargs)
        
    ####################
    # Filter good data #
    ####################
    
    # need to distinguish between products due to different
    # pq band properties                     
    
    # collection 2 USGS or FC
    if (product_type == 'c2') or (product_type == 'fc'):
        if pq_categories_ls is None:
            quality_flags_prod = {'cloud_shadow': 'not_cloud_shadow',
                                  'cloud_or_cirrus': 'not_cloud_or_cirrus',
                                   'nodata': False}
        else:
            quality_flags_prod = pq_categories_ls
        
        if product_type == 'fc':
            pq_mask = masking.make_mask(ds_fc_pq[fmask_band], 
                                        **quality_flags_prod)
        else:
            pq_mask = masking.make_mask(ds[fmask_band], 
                                        **quality_flags_prod)
            
    # collection 1 USGS                    
    if product_type == 'c1':
        if pq_categories_ls is None:
            quality_flags_prod = {'cloud': 'no_cloud',
                                  'cloud_shadow': 'no_cloud_shadow',
                                   'nodata': False}
        else:
            quality_flags_prod = pq_categories_ls
            
        pq_mask = masking.make_mask(ds[fmask_band], 
                                    **quality_flags_prod)
    # sentinel 2                     
    if product_type == 's2':
        pq_mask = odc.algo.fmask_to_bool(ds[fmask_band],
                                     categories=pq_categories_s2)

    # The good data percentage calculation has to load in all `fmask`
    # data, which can be slow. If the user has chosen no filtering
    # by using the default `min_gooddata = 0`, we can skip this step
    # completely to save processing time
    if min_gooddata > 0.0:

        # Compute good data for each observation as % of total pixels
        print('Counting good quality pixels for each time step')
        data_perc = (pq_mask.sum(axis=[1, 2], dtype='int32') /
                     (pq_mask.shape[1] * pq_mask.shape[2]))
        
        keep = data_perc >= min_gooddata
        
        # Filter by `min_gooddata` to drop low quality observations
        total_obs = len(ds.time)
        ds = ds.sel(time=keep)
        pq_mask = pq_mask.sel(time=keep)
        print(f'Filtering to {len(ds.time)} out of {total_obs} '
              f'time steps with at least {min_gooddata:.1%} '
              f'good quality pixels')
    
    ###############
    # Apply masks #
    ###############

    # Generate good quality data mask
    mask = None
    if mask_pixel_quality:
        print('Applying pixel quality/cloud mask')
        mask = pq_mask
    
    # Split into data/masks bands, as conversion to float and masking 
    # should only be applied to data bands    
    if product_type == 'fc':
        ds_data=ds
    else:
        ds_data = ds[data_bands]
        ds_masks = ds[mask_bands]

    # Mask data if either of the above masks were generated
    if mask is not None:  
            ds_data = odc.algo.keep_good_only(ds_data, where=mask)

    # Automatically set dtype to either native or float32 depending
    # on whether masking was requested
    if dtype == 'auto':
        dtype = 'native' if mask is None else 'float32'
    
    # Set nodata values using odc.algo tools to reduce peak memory
    # use when converting data dtype    
    if dtype != 'native':
        ds_data = odc.algo.to_float(ds_data, dtype=dtype)
    
    # Put data and mask bands back together
    if product_type == 'fc':
        attrs = ds.attrs
        ds = ds_data
        ds.attrs.update(attrs)
    else:
        attrs = ds.attrs
        ds = xr.merge([ds_data, ds_masks])
        ds = ds_data
        ds.attrs.update(attrs)

    ###############
    # Return data #
    ###############
    
     # Drop bands not originally requested by user
    if requested_measurements:
        ds = ds[requested_measurements]
    
    # If user supplied dask_chunks, return data as a dask array without
    # actually loading it in
    if dask_chunks is not None:
        print(f'Returning {len(ds.time)} time steps as a dask array')
        return ds
    else:
        print(f'Loading {len(ds.time)} time steps')
        return ds.compute()