def _load_pq(self, x, y, res, period, n_landsat): query = { 'time': period, 'x': x, 'y': y, 'crs': 'EPSG:3577', 'measurements': ['pixelquality'], 'resolution': res, } pq_stack = [] for n in n_landsat: pq_stack.append(self.load(product='ls{}_pq_albers'.format(n), group_by='solar_day', fuse_func=ga_pq_fuser, resampling='nearest', **query)) pq_stack = xr.concat(pq_stack, dim='time').sortby('time') # Land/sea mask isn't used at the moment. Possible alternatives are WOFS and ITEM. #pq_stack['land'] = masking.make_mask(pq_stack.pixelquality, land_sea='land') # masking cloud, saturation and invalid data (contiguous) pq_stack['no_cloud'] = masking.make_mask(pq_stack.pixelquality, cloud_acca='no_cloud', cloud_fmask='no_cloud', cloud_shadow_acca='no_cloud_shadow', cloud_shadow_fmask='no_cloud_shadow',blue_saturated=False, green_saturated=False, red_saturated=False, nir_saturated=False, swir1_saturated=False, swir2_saturated=False, contiguous=True) pq_stack['valid'] = masking.make_mask(pq_stack.pixelquality, contiguous=True) return pq_stack
def transform_data(self, data, pq_data, extent_mask, *masks): # pylint: disable=too-many-locals # extent mask data per band to preseve nodata _LOG.debug("transform begin %s", datetime.now()) if extent_mask is not None: for band in data.data_vars: try: data[band] = data[band].where( extent_mask, other=data[band].attrs['nodata']) except AttributeError: data[band] = data[band].where(extent_mask) _LOG.debug("extent mask complete %d", datetime.now()) data = self.apply_masks(data, pq_data) _LOG.debug("mask complete %d", datetime.now()) imgdata = Dataset() for band in self.value_map.keys(): band_data = Dataset() for value in self.value_map[band]: target = Dataset() flags = value["flags"] rgb = Color(value["color"]) dims = data[band].dims coords = data[band].coords bdata = data[band] colors = ["red", "green", "blue"] for color in colors: c = numpy.full(data[band].shape, getattr(rgb, color)) target[color] = DataArray(c, dims=dims, coords=coords) if "or" in flags: fs = flags["or"] mask = None for f in fs.items(): f = {f[0]: f[1]} if mask is None: mask = make_mask(bdata, **f) else: mask |= make_mask(bdata, **f) else: fs = flags if "and" not in flags else flags["and"] mask = make_mask(bdata, **fs) masked = target.where(mask) if len(band_data.data_vars) == 0: band_data = masked else: band_data = band_data.combine_first(masked) if len(imgdata.data_vars) == 0: imgdata = band_data else: imgdata = merge([imgdata, band_data]) imgdata *= 255 return imgdata.astype('uint8')
def create_mask(data, flags): if "or" in flags: fs = flags["or"] mask = None for f in fs.items(): f = {f[0]: f[1]} if mask is None: mask = make_mask(data, **f) else: mask |= make_mask(data, **f) else: fs = flags if "and" not in flags else flags["and"] mask = make_mask(data, **fs) return mask
def get_data(query, mask_components, pnbars, pfcs, pqas): nbars = [] fcs = [] for pnbar, pfc, pqa in zip(pnbars, pfcs, pqas): #Load the NBAR, FC and corresponding PQ nbar = dc.load(product=pnbar, measurements=pnbar_measurements, **query) fc = dc.load(product=pfc, measurements=pfc_measurements, **query) pq = dc.load(product=pqa, fuse_func=ga_pq_fuser, **query) #Apply the PQ masks to the data try: cloud_free = masking.make_mask(pq, **mask_components) good_data = cloud_free.pixelquality.loc[ query['time'][0]:query['time'][1]] nbar = nbar.where(good_data) fc = fc.where(good_data) del cloud_free, good_data except ValueError: continue nbars.append(nbar) fcs.append(fc) del nbar, fc, pq if not nbars: raise NoDataError #Concatenate data from different sensors together and sort so that observations are sorted by time rather # than sensor nbar = sort_data(nbars) fc = sort_data(fcs) return nbar, fc
def return_good_pixels(data, sensor_pq, start_date, end_date): """ This function uses pixel quality information to mask out and remove pixel quality artifacts from extracted data. """ #Define which pixel quality artefacts you want removed from the datacube results mask_components = {'cloud_acca':'no_cloud', 'cloud_shadow_acca' :'no_cloud_shadow', 'cloud_shadow_fmask' : 'no_cloud_shadow', 'cloud_fmask' :'no_cloud', 'blue_saturated' : False, 'green_saturated' : False, 'red_saturated' : False, 'nir_saturated' : False, 'swir1_saturated' : False, 'swir2_saturated' : False, 'contiguous':True} #grab the projection info before masking/sorting crs = data.crs crswkt = data.crs.wkt affine = data.affine #Apply the PQ masks to the NDVI cloud_free = masking.make_mask(sensor_pq, **mask_components) good_data = cloud_free.pixelquality.loc[start_date:end_date] quality_data = data.where(good_data) return quality_data
def load_miningrehab_data(): """ Loads Fractional Cover and Water Observations from Space products for the mining case-study area. Last modified: January 2020 outputs ds - data set containing masked Fractional Cover data from Landsat 8 Masked values are set to 'nan' """ # Suppress warnings warnings.filterwarnings("ignore") # Initialise the data cube. 'app' argument is used to identify this app dc = datacube.Datacube(app="mining-app") # Specify latitude and longitude ranges latitude = (-34.426512, -34.434517) longitude = (116.648123, 116.630731) # Specify the date range time = ("2015-06-01", "2018-06-30") # Construct the data cube query query = { "x": longitude, "y": latitude, "time": time, "output_crs": "EPSG:3577", "resolution": (-25, 25), } print("Loading Fractional Cover for Landsat 8") dataset_fc = dc.load(product="ls8_fc_albers", **query) print("Loading WoFS for Landsat 8") dataset_wofs = dc.load(product="wofs_albers", like=dataset_fc) # Match the data shared_times = np.intersect1d(dataset_fc.time, dataset_wofs.time) ds_fc_matched = dataset_fc.sel(time=shared_times) ds_wofs_matched = dataset_wofs.sel(time=shared_times) # Mask FC dry_mask = masking.make_mask(ds_wofs_matched, dry=True) # Get fractional masked fc dataset (as proportion of 1, rather than 100) ds_fc_masked = ds_fc_matched.where(dry_mask.water == True) / 100 # Resample ds_resampled = ds_fc_masked.resample(time="1M").median() ds_resampled.attrs["crs"] = dataset_fc.crs # Return the data return ds_resampled
def apply_masks(self, data, pq_data): if pq_data is not None: net_mask = None for mask in self.masks: odc_mask = make_mask(pq_data, **mask.flags) mask_data = getattr(odc_mask, self.product.pq_band) if mask.invert: mask_data = ~mask_data data = data.where(mask_data) return data
def make_mask_from_spec(loaded_mask_data, mask_spec): if mask_spec.get('flags') is not None: mask = make_mask(loaded_mask_data, **mask_spec['flags']) elif mask_spec.get('less_than') is not None: less_than = float(mask_spec['less_than']) mask = loaded_mask_data < less_than elif mask_spec.get('greater_than') is not None: greater_than = float(mask_spec['greater_than']) mask = loaded_mask_data > greater_than if mask_spec.get('invert') is True: mask = np.logical_not(mask) return mask
def run(tile, gwf, center_dt): """Basic datapreparation recipe 001 Computes mean NDVI for a landsat collection over a given time frame Args: tile (tuple): Tuple of (tile indices, Tile object). Tile object can be loaded as xarray.Dataset using gwf.load() gwf (GridWorkflow): GridWorkflow object instantiated with the corresponding product center_dt (datetime): Date to be used in making the filename Return: str: The filename of the netcdf file created """ try: center_dt = center_dt.strftime("%Y-%m-%d") # TODO: Need a more dynamic way to handle this filename (e.g.: global variable for the path up to datacube_ingest) nc_filename = os.path.expanduser( '~/datacube_ingest/recipes/landsat_8_ndvi_mean/ndvi_mean_%d_%d_%s.nc' % (tile[0][0], tile[0][1], center_dt)) if os.path.isfile(nc_filename): raise ValueError('%s already exist' % nc_filename) # Load Landsat sr sr = gwf.load( tile[1], dask_chunks={ 'x': 1667, 'y': 1667 }, ) # Compute ndvi sr['ndvi'] = (sr.nir - sr.red) / (sr.nir + sr.red) * 10000 clear = masking.make_mask(sr.pixel_qa, clear=True) ndvi = sr.drop( ['pixel_qa', 'blue', 'red', 'green', 'nir', 'swir1', 'swir2']) ndvi_clear = ndvi.where(clear) # Run temporal reductions and rename DataArrays ndvi_mean = ndvi_clear.mean('time', keep_attrs=True) ndvi_mean['ndvi'].attrs['nodata'] = -9999 ndvi_mean_int = ndvi_mean.apply(to_int) ndvi_mean_int.attrs['crs'] = sr.attrs['crs'] write_dataset_to_netcdf(ndvi_mean_int, nc_filename, netcdfparams={'zlib': True}) return nc_filename except Exception as e: print('Tile (%d, %d) not processed. %s' % (tile[0][0], tile[0][1], e)) raise return None
def cloud_coverage(dataset): from datacube.storage import masking # Import masking capabilities from .dc_mosaic import create_median_mosaic clean_pixel_mask = masking.make_mask( dataset.quality, cloud=False, radiometric_saturation='none', terrain_occlusion=False) masked_cloud = dataset.where((dataset != 0) & clean_pixel_mask) mosaic = create_median_mosaic(masked_cloud, clean_pixel_mask, no_data=0) mosaic = mosaic.expand_dims({'time': 1}) return threshold_percentage(mosaic.red)
def return_good_pixels(nbar, pq): """ This function uses pixel quality information to mask out and remove pixel quality artifacts from extracted data. """ mask_components = {'cloud_acca':'no_cloud', 'cloud_shadow_acca' :'no_cloud_shadow', 'cloud_shadow_fmask' : 'no_cloud_shadow', 'cloud_fmask' :'no_cloud', 'blue_saturated' : False, 'green_saturated' : False, 'red_saturated' : False, 'nir_saturated' : False, 'swir1_saturated' : False, 'swir2_saturated' : False, 'contiguous':True} pqmask = masking.make_mask(pq.pixelquality, **mask_components) return nbar.where(pqmask)
def run(tile, center_dt, path): """Basic datapreparation recipe 001 Computes mean NDVI for a landsat collection over a given time frame Args: tile (tuple): Tuple of (tile indices, Tile object). Tile object can be loaded as xarray.Dataset using gwf.load() center_dt (datetime): Date to be used in making the filename path (str): Directory where files generated are to be written Return: str: The filename of the netcdf file created """ try: center_dt = center_dt.strftime("%Y-%m-%d") nc_filename = os.path.join( path, 'ndvi_mean_%d_%d_%s.nc' % (tile[0][0], tile[0][1], center_dt)) if os.path.isfile(nc_filename): logger.warning( '%s already exists. Returning filename for database indexing', nc_filename) return nc_filename # Load Landsat sr sr = GridWorkflow.load(tile[1], dask_chunks={'x': 1667, 'y': 1667}) # Compute ndvi sr['ndvi'] = (sr.nir - sr.red) / (sr.nir + sr.red) * 10000 clear = masking.make_mask(sr.pixel_qa, clear=True) ndvi = sr.drop( ['pixel_qa', 'blue', 'red', 'green', 'nir', 'swir1', 'swir2']) ndvi_clear = ndvi.where(clear) # Run temporal reductions and rename DataArrays ndvi_mean = ndvi_clear.mean('time', keep_attrs=True) ndvi_mean['ndvi'].attrs['nodata'] = -9999 ndvi_mean_int = ndvi_mean.apply(to_int) ndvi_mean_int.attrs['crs'] = sr.attrs['crs'] write_dataset_to_netcdf(ndvi_mean_int, nc_filename, netcdfparams={'zlib': True}) return nc_filename except Exception as e: logger.info('Tile (%d, %d) not processed. %s' % (tile[0][0], tile[0][1], e)) return None
def compute_mosaic(products, measurements, **parsed_expressions): with Datacube() as dc: acq_range = parsed_expressions['time'] click.echo("Processing time range {}".format(acq_range)) datasets = [] for prodname in products: dataset = dc.load(product=prodname, measurements=measurements, group_by='solar_day', **parsed_expressions) if len(dataset) == 0: continue else: click.echo("Found {} time slices of {} during {}.".format( len(dataset['time']), prodname, acq_range)) pq = dc.load(product=prodname.replace('nbar', 'pq'), group_by='solar_day', fuse_func=pq_fuser, **parsed_expressions) if len(pq) == 0: click.echo('No PQ found, skipping') continue crs = dataset.attrs['crs'] dataset = dataset.where(dataset != -999) dataset.attrs['product'] = prodname dataset.attrs['crs'] = crs cloud_free = make_mask(pq.pixelquality, ga_good_pixel=True) dataset = dataset.where(cloud_free) if len(dataset) == 0: click.echo("Nothing left after PQ masking") continue datasets.append(dataset) dataset = xr.concat(datasets, dim='time') return dataset.median(dim='time')
def load_slice(i): loc = [slice(i, i + 1), slice(None), slice(None)] d = GridWorkflow.load(tile[loc], **kwargs) if mask_nodata: d = sensible_mask_invalid_data(d) # Load all masks and combine them all into one mask = None for (m_tile, flags, load_args), invert in zip(masks, inverts): m = GridWorkflow.load(m_tile[loc], **load_args) m, *other = m.data_vars.values() # TODO make use of make_mask_from_spec here m = make_mask(m, **flags) if invert: m = np.logical_not(m) if mask is None: mask = m else: mask &= m if mask_inplace or not mask_nodata: where = sensible_where_inplace else: where = sensible_where if mask is not None: # Apply mask in place if asked or if we already performed # conversion to float32, this avoids reallocation of memory and # hence increases the largest data set size one can load without # running out of memory d = where(d, mask) if geom is not None: d = where(d, geometry_mask([geom], d.geobox, invert=True)) if src_idx is not None: d.coords['source'] = ('time', np.repeat(src_idx, d.time.size)) return d
def run(tile, center_dt, path): """Basic datapreparation recipe 001 Combines temporal statistics of surface reflectance and ndvi with terrain metrics Args: tile (tuple): Tuple of (tile indices, Tile object). Tile object can be loaded as xarray.Dataset using gwf.load() center_dt (datetime): Date to be used in making the filename path (str): Directory where files generated are to be written Return: str: The filename of the netcdf file created """ try: center_dt = center_dt.strftime("%Y-%m-%d") nc_filename = os.path.join( path, 'madmex_001_%d_%d_%s.nc' % (tile[0][0], tile[0][1], center_dt)) # Load Landsat sr if os.path.isfile(nc_filename): logger.warning( '%s already exists. Returning filename for database indexing', nc_filename) return nc_filename sr_0 = GridWorkflow.load(tile[1], dask_chunks={'x': 1667, 'y': 1667}) # Load terrain metrics using same spatial parameters than sr dc = datacube.Datacube(app='landsat_madmex_001_%s' % randomword(5)) terrain = dc.load(product='srtm_cgiar_mexico', like=sr_0, time=(datetime(1970, 1, 1), datetime(2018, 1, 1)), dask_chunks={ 'x': 1667, 'y': 1667 }) dc.close() # Mask clouds, shadow, water, ice,... and drop qa layer clear = masking.make_mask(sr_0.pixel_qa, cloud=False, cloud_shadow=False, snow=False) sr_1 = sr_0.where(clear) sr_2 = sr_1.drop('pixel_qa') # Convert Landsat data to float (nodata values are converted to np.Nan) sr_3 = sr_2.apply(func=to_float, keep_attrs=True) # Compute ndvi sr_3['ndvi'] = ((sr_3.nir - sr_3.red) / (sr_3.nir + sr_3.red)) * 10000 sr_3['ndvi'].attrs['nodata'] = -9999 # Run temporal reductions and rename DataArrays sr_mean = sr_3.mean('time', keep_attrs=True, skipna=True) sr_mean.rename( { 'blue': 'blue_mean', 'green': 'green_mean', 'red': 'red_mean', 'nir': 'nir_mean', 'swir1': 'swir1_mean', 'swir2': 'swir2_mean', 'ndvi': 'ndvi_mean' }, inplace=True) sr_min = sr_3.min('time', keep_attrs=True, skipna=True) sr_min.rename( { 'blue': 'blue_min', 'green': 'green_min', 'red': 'red_min', 'nir': 'nir_min', 'swir1': 'swir1_min', 'swir2': 'swir2_min', 'ndvi': 'ndvi_min' }, inplace=True) sr_max = sr_3.max('time', keep_attrs=True, skipna=True) sr_max.rename( { 'blue': 'blue_max', 'green': 'green_max', 'red': 'red_max', 'nir': 'nir_max', 'swir1': 'swir1_max', 'swir2': 'swir2_max', 'ndvi': 'ndvi_max' }, inplace=True) sr_std = sr_3.std('time', keep_attrs=True, skipna=True) sr_std.rename( { 'blue': 'blue_std', 'green': 'green_std', 'red': 'red_std', 'nir': 'nir_std', 'swir1': 'swir1_std', 'swir2': 'swir2_std', 'ndvi': 'ndvi_std' }, inplace=True) # Merge dataarrays combined = xr.merge([ sr_mean.apply(to_int), sr_min.apply(to_int), sr_max.apply(to_int), sr_std.apply(to_int), terrain ]) combined.attrs['crs'] = sr_0.attrs['crs'] write_dataset_to_netcdf(combined, nc_filename) return nc_filename except Exception as e: logger.warning('Tile (%d, %d) not processed. %s' % (tile[0][0], tile[0][1], e)) return None
def load_clearlandsat(dc, query, sensors=['ls5', 'ls7', 'ls8'], bands_of_interest=None, product='nbart', masked_prop=0.99, mask_dict=None, apply_mask=False, ls7_slc_off=False): """ Loads Landsat NBAR, NBART or FC25 and PQ data for multiple sensors (i.e. ls5, ls7, ls8), and returns a single xarray dataset containing only observations that contain greater than a given proportion of clear pixels. This function was designed to extract visually appealing time series of observations that are not affected by cloud, for example as an input to the `animated_timeseries` function from `DEAPlotting`. The proportion of clear pixels is calculated by summing the pixels that are flagged as being problematic in the Landsat PQ25 layer. By default only cloudy pixels or pixels without valid data in every band are included in the calculation, but this can be customised using the `mask_dict` function. Last modified: August 2018 Author: Robbi Bishop-Taylor, Bex Dunn :param dc: A specific Datacube to import from, i.e. `dc = datacube.Datacube(app='Clear Landsat')`. This allows you to also use development datacubes if they have been imported into the environment. :param query: A dict containing the query bounds. Can include lat/lon, time etc. If no `time` query is given, the function defaults to all timesteps available to all sensors (e.g. 1987-2018) :param sensors: An optional list of Landsat sensor names to load data for. Options are 'ls5', 'ls7', 'ls8'; defaults to all. :param product: An optional string specifying 'nbar', 'nbart' or 'fc'. Defaults to 'nbart'. For information on the difference, see the 'GettingStartedWithLandsat' or 'Introduction_to_Fractional_Cover' notebooks on DEA-notebooks. :param bands_of_interest: An optional list of strings containing the bands to be read in; options include 'red', 'green', 'blue', 'nir', 'swir1', 'swir2'; defaults to all available bands if no bands are specified. :param masked_prop: An optional float giving the minimum percentage of clear pixels required for a Landsat observation to be loaded. Defaults to 0.99 (i.e. only return observations with less than 1% of unclear pixels). :param mask_dict: An optional dict of arguments to the `masking.make_mask` function that can be used to identify clear observations from the PQ layer using alternative masking criteria. The default value of None masks out pixels flagged as cloud by either the ACCA or Fmask alogorithms, and that have values for every band (equivalent to: `mask_dict={'cloud_acca': 'no_cloud', 'cloud_fmask': 'no_cloud', 'contiguous': True}`. See the `Landsat5-7-8-PQ` notebook on DEA Notebooks for a list of all possible options. :param apply_mask: An optional boolean indicating whether resulting observations should have the PQ mask applied to filter out any remaining unclear cells. For example, if `masked_prop=0.99`, the filtered images may still contain up to 1% unclear/cloudy pixels. The default of False simply returns the resulting observations without masking out these pixels; True removes them using the mask. :param ls7_slc_off: An optional boolean indicating whether to include data from after the Landsat 7 SLC failure (i.e. SLC-off). Defaults to False, which removes all Landsat 7 observations after May 31 2003. :returns: An xarray dataset containing only Landsat observations that contain greater than `masked_prop` proportion of clear pixels. :example: >>> # Import modules >>> import datacube >>> import sys >>> >>> # Import dea-notebooks functions using relative link to Scripts directory >>> sys.path.append('../10_Scripts') >>> import DEADataHandling >>> >>> # Define datacube to import from >>> dc = datacube.Datacube(app='Clear Landsat') >>> >>> # Set up spatial and temporal query >>> query = {'x': (-191400.0, -183400.0), >>> 'y': (-1423460.0, -1415460.0), >>> 'time': ('1998-01-01', '2003-01-01'), >>> 'crs': 'EPSG:3577'} >>> >>> # Load in red, green and blue bands for all clear Landsat observations with < 1% unclear values. >>> combined_ds = DEADataHandling.load_clearlandsat(dc=dc, query=query, >>> bands_of_interest=['red', 'green', 'blue'], >>> masked_prop=0.99) >>> combined_ds """ # List to save results from each sensor filtered_sensors = [] # Iterate through all sensors, returning only observations with > mask_prop clear pixels for sensor in sensors: try: # If bands of interest are given, assign measurements in dc.load call. This is # for compatibility with the existing dea-notebooks load_nbarx function. if bands_of_interest: # Lazily load Landsat data using dask data = dc.load(product = '{}_{}_albers'.format(sensor, product), measurements=bands_of_interest, group_by = 'solar_day', dask_chunks={'time': 1}, **query) # If no bands of interest given, run without specifying measurements, and # therefore return all available bands else: # Lazily load Landsat data using dask data = dc.load(product = '{}_{}_albers'.format(sensor, product), group_by = 'solar_day', dask_chunks={'time': 1}, **query) # Load PQ data pq = dc.load(product = '{}_pq_albers'.format(sensor), group_by = 'solar_day', fuse_func=ga_pq_fuser, dask_chunks={'time': 1}, **query) # Remove Landsat 7 SLC-off from PQ layer if ls7_slc_off=False if not ls7_slc_off and sensor == 'ls7': print('Ignoring SLC-off observations for ls7') data = data.where(data.time < np.datetime64('2003-05-30'), drop=True) # Return only Landsat observations that have matching PQ data time = (data.time - pq.time).time data = data.sel(time=time) pq = pq.sel(time=time) # Load PQ data using dask print('Loading {} PQ'.format(sensor)) pq = pq.compute() # If a custom dict is provided for mask_dict, use these values to make mask from PQ if mask_dict: # Mask PQ using custom values by unpacking mask_dict **kwarg good_quality = masking.make_mask(pq.pixelquality, **mask_dict) else: # Identify pixels with no clouds in either ACCA for Fmask good_quality = masking.make_mask(pq.pixelquality, cloud_acca='no_cloud', cloud_fmask='no_cloud', contiguous=True) # Compute good data for each observation as a percentage of total array pixels data_perc = good_quality.sum(dim=['x', 'y']) / (good_quality.shape[1] * good_quality.shape[2]) # Add data_perc data to Landsat dataset as a new xarray variable data['data_perc'] = xr.DataArray(data_perc, [('time', data.time)]) # Filter and finally import data using dask filtered = data.where(data.data_perc >= masked_prop, drop=True) print(' Loading {} filtered {} timesteps'.format(len(filtered.time), sensor)) filtered = filtered.compute() # Optionally apply mask (instead of only filtering) if apply_mask: filtered = filtered.where(good_quality) # Append result to list filtered_sensors.append(filtered) # Close datasets filtered = None good_quality = None data = None pq = None except: # If there is no data for sensor or if another error occurs: print(' Skipping {}'.format(sensor)) # Concatenate all sensors into one big xarray dataset, and then sort by time print('Combining and sorting ls5, ls7 and ls8 data') combined_ds = xr.concat(filtered_sensors, dim='time') combined_ds = combined_ds.sortby('time') #Filter to replace no data values with nans combined_ds = masking.mask_invalid_data(combined_ds) # Return combined dataset return combined_ds
def load_nbarx(dc, sensor, query, product='nbart', bands_of_interest='', filter_pq=True): """ Loads NBAR (Nadir BRDF Adjusted Reflectance) or NBAR-T (terrain corrected NBAR) data for a sensor, masks using pixel quality (PQ), then optionally filters out terrain -999s (for NBAR-T). Returns an xarray dataset and CRS and Affine objects defining map projection and geotransform Last modified: May 2018 Author: Bex Dunn Modified by: Claire Krause, Robbi Bishop-Taylor, Bex Dunn inputs dc - Handle for the Datacube to import from. This allows you to also use dev environments if that have been imported into the environment. sensor - Options are 'ls5', 'ls7', 'ls8' query - A dict containing the query bounds. Can include lat/lon, time etc. optional product - 'nbar' or 'nbart'. Defaults to nbart unless otherwise specified bands_of_interest - List of strings containing the bands to be read in; defaults to all bands, options include 'red', 'green', 'blue', 'nir', 'swir1', 'swir2' filter_pq - boolean. Will filter clouds and saturated pixels using PQ unless set to False outputs ds - Extracted and optionally PQ filtered dataset crs - CRS object defining dataset coordinate reference system affine - Affine object defining dataset affine transformation """ product_name = '{}_{}_albers'.format(sensor, product) mask_product = '{}_{}_albers'.format(sensor, 'pq') print('Loading {}'.format(product_name)) # If bands of interest are given, assign measurements in dc.load call if bands_of_interest: ds = dc.load(product=product_name, measurements=bands_of_interest, group_by='solar_day', **query) # If no bands of interest given, run without specifying measurements else: ds = dc.load(product=product_name, group_by='solar_day', **query) # Proceed if the resulting call returns data if ds.variables: crs = ds.crs affine = ds.affine print('Loaded {}'.format(product_name)) # If pixel quality filtering is enabled, extract PQ data to use as mask if filter_pq: sensor_pq = dc.load(product=mask_product, fuse_func=ga_pq_fuser, group_by='solar_day', **query) # If PQ call returns data, use to mask input data if sensor_pq.variables: print('Generating mask {}'.format(mask_product)) good_quality = masking.make_mask(sensor_pq.pixelquality, cloud_acca='no_cloud', cloud_shadow_acca='no_cloud_shadow', cloud_shadow_fmask='no_cloud_shadow', cloud_fmask='no_cloud', blue_saturated=False, green_saturated=False, red_saturated=False, nir_saturated=False, swir1_saturated=False, swir2_saturated=False, contiguous=True) # Apply mask to preserve only good data ds = ds.where(good_quality) ds.attrs['crs'] = crs ds.attrs['affine'] = affine # Replace nodata values with nans ds = masking.mask_invalid_data(ds) return ds, crs, affine else: print('Failed to load {}'.format(product_name)) return None, None, None
def load_cloudmaskedlandsat(dc, query, platforms=['ls5', 'ls7', 'ls8'], bands=['red', 'green', 'blue', 'nir', 'swir1', 'swir2']): ''' This function returns cloud-masked Landsat `{platform}_usgs_sr_scene` data by loading both Landsat and masking out any pixels affected by cloud, cloud shadow, or any pixels missing data in any band. Last modified: August 2019 Author: Robbi Bishop-Taylor Parameters ---------- dc : datacube Datacube object A specific Datacube to import from, i.e. `dc = datacube.Datacube(app='Clear Landsat')`. This allows you to also use development datacubes if required. query : dict A dict containing the query bounds. Can include lat/lon, time etc. If no `time` query is given, the function defaults to all timesteps available to all sensors (e.g. 1987-2018) platforms : list, optional An optional Landsat platform name to load data from. Options are 'ls5', 'ls7', 'ls8'. bands : list, optional An optional list of strings containing the bands to be read in; options default to 'red', 'green', 'blue', 'nir', 'swir1', 'swir2'. Returns ------- landsat_ds : xarray Dataset An xarray dataset containing pixel-quality masked Landsat observations ''' # If bands do not include fmask, add it new_bands = set(bands) new_bands.add('pixel_qa') platform_data = [] for platform in platforms: # Load landsat data landsat_ds = dc.load(product=f'{platform}_usgs_sr_scene', dask_chunks={'time': 1}, measurements=new_bands, group_by='solar_day', **query) # Mask out all pixels affected by cloud, cloud shadow, or other invalid data from datacube.storage import masking valid_data = masking.make_mask(landsat_ds.pixel_qa, cloud='no_cloud', cloud_shadow='no_cloud_shadow', # water='no_water', nodata=False) landsat_ds = landsat_ds.where(valid_data) platform_data.append(landsat_ds) return xr.concat(platform_data, dim='time').sortby('time')
def extractNDVIFromCube(tileFile, minLat, maxLat, minLon, maxLon, year): dc = datacube.Datacube(app='ExtractAnnualNDVI') #Define wavelengths/bands of interest, remove this kwarg to retrieve all bands bands_of_interest = ['red', 'nir'] #Define sensors of interest sensors = ['ls8', 'ls7', 'ls5'] #define temporal range start_of_epoch = year + '-01-01' # latest observation end_of_epoch = year + '-12-31' query = { 'time': (start_of_epoch, end_of_epoch), } query['x'] = (minLon, maxLon) query['y'] = (maxLat, minLat) query['crs'] = 'EPSG:4326' #Define which pixel quality artefacts you want removed from the results mask_components = { 'cloud_acca': 'no_cloud', 'cloud_shadow_acca': 'no_cloud_shadow', 'cloud_shadow_fmask': 'no_cloud_shadow', 'cloud_fmask': 'no_cloud', 'blue_saturated': False, 'green_saturated': False, 'red_saturated': False, 'nir_saturated': False, 'swir1_saturated': False, 'swir2_saturated': False, 'contiguous': True } print("Read pixel image data into memory.") sensor_clean = {} for sensor in sensors: print(sensor) #Load the NBAR and corresponding PQ sensor_nbar = dc.load(product=sensor + '_nbar_albers', group_by='solar_day', measurements=bands_of_interest, **query) if bool(sensor_nbar): sensor_pq = dc.load(product=sensor + '_pq_albers', group_by='solar_day', fuse_func=pq_fuser, **query) #grab the projection info before masking/sorting crs = sensor_nbar.crs crswkt = sensor_nbar.crs.wkt affine = sensor_nbar.affine #this line is to make sure there's PQ to go with the NBAR sensor_nbar = sensor_nbar.sel(time=sensor_pq.time) #Apply the PQ masks to the NBAR cloud_free = masking.make_mask(sensor_pq, **mask_components) good_data = cloud_free.pixelquality.loc[ start_of_epoch:end_of_epoch] sensor_nbar = sensor_nbar.where(good_data) sensor_clean[sensor] = sensor_nbar if bool(sensor_clean): #Concatenate data from different sensors together and sort so that observations are sorted by time rather than sensor print("Merge data from different sensors.") nbar_clean = xarray.concat(sensor_clean.values(), dim='time') time_sorted = nbar_clean.time.argsort() nbar_clean = nbar_clean.isel(time=time_sorted) nbar_clean.attrs['affine'] = affine nbar_clean.attrs['crs'] = crswkt print("Calculate NDVI.") ndvi = ((nbar_clean.nir - nbar_clean.red) / (nbar_clean.nir + nbar_clean.red)) ndvi.attrs['affine'] = affine ndvi.attrs['crs'] = crswkt print("Create Composite") ndviMean = ndvi.mean(dim='time') ndviMean.attrs['affine'] = affine ndviMean.attrs['crs'] = crswkt print("Save Composite to netcdf") ndviMean.to_netcdf(path=tileFile, mode='w')
print("Read pixel image data into memory.") sensor_clean = {} for sensor in sensors: print(sensor) #Load the NBAR and corresponding PQ sensor_nbar = dc.load(product= sensor+'_nbar_albers', group_by='solar_day', measurements = bands_of_interest, **query) if bool(sensor_nbar): sensor_pq = dc.load(product=sensor+'_pq_albers', group_by='solar_day', fuse_func=pq_fuser, **query) #grab the projection info before masking/sorting crs = sensor_nbar.crs crswkt = sensor_nbar.crs.wkt affine = sensor_nbar.affine #this line is to make sure there's PQ to go with the NBAR sensor_nbar = sensor_nbar.sel(time = sensor_pq.time) #Apply the PQ masks to the NBAR cloud_free = masking.make_mask(sensor_pq, **mask_components) good_data = cloud_free.pixelquality.loc[start_of_epoch:end_of_epoch] sensor_nbar = sensor_nbar.where(good_data) sensor_clean[sensor] = sensor_nbar if bool(sensor_clean): #Concatenate data from different sensors together and sort so that observations are sorted by time rather than sensor print("Merge data from different sensors.") nbar_clean = xarray.concat(sensor_clean.values(), dim='time') time_sorted = nbar_clean.time.argsort() nbar_clean = nbar_clean.isel(time=time_sorted) nbar_clean.attrs['affine'] = affine nbar_clean.attrs['crs'] = crswkt print("\'Clean\' up the Red and NIR bands to remove any values less than zero.") nbar_clean['red'] = nbar_clean.red.where(nbar_clean.red>0)
def load_ard(dc, products=None, min_gooddata=0.0, pq_categories_s2=[ 'vegetation', 'snow or ice', 'water', 'bare soils', 'unclassified', 'dark area pixels' ], pq_categories_ls=None, mask_pixel_quality=True, ls7_slc_off=True, filter_func=None, **extras): ''' Loads USGS Landsat Collection 1 and Collection 2 data for multiple satellites (i.e. Landsat 5, 7, 8), and returns a single masked xarray dataset containing only observations that contain greater than a given proportion of good quality pixels. This can be used to extract clean time series of observations that are not affected by cloud, for example as an input to the `animated_timeseries` function from `deafrica-sandbox-notebooks/deafrica_plotting`. The proportion of good quality pixels is calculated by summing the pixels flagged as good quality in the product's pixel quality band (i.e. 'pixel_qa' for USGS Collection 1, and 'quality_l2_aerosol' for USGS Collection 2). By default non-cloudy or non-shadowed pixels are considered as good data, but this can be customised using the `fmask_categories` parameter. Last modified: February 2020 Parameters ---------- dc : datacube Datacube object The Datacube to connect to, i.e. `dc = datacube.Datacube()`. This allows you to also use development datacubes if required. products : list A list of product names to load data from. Valid options are ['ls5_usgs_sr_scene', 'ls7_usgs_sr_scene', 'ls8_usgs_sr_scene'] for Landsat C1, ['usgs_ls8c_level2_2'] for Landsat C2, and ['s2a_msil2a', 's2b_msil2a'] for Sentinel 2. min_gooddata : float, optional An optional float giving the minimum percentage of good quality pixels required for a satellite observation to be loaded. Defaults to 0.0 which will return all observations regardless of pixel quality (set to e.g. 0.99 to return only observations with more than 99% good quality pixels). pq_categories_s2 : list, optional An optional list of S2 Scene Classification Layer (SCL) names to treat as good quality observations in the above `min_gooddata` calculation. T The default is ['vegetation','snow or ice','water', 'bare soils','unclassified', 'dark area pixels'] which will return non-cloudy or shadowed land, snow, water, veg, and non-veg pixels. pq_categories_ls : dict, optional An optional dictionary that is used to generate a good quality pixel mask from the selected USGS product's pixel quality band (i.e. 'pixel_qa' for USGS Collection 1, and 'quality_l2_aerosol' for USGS Collection 2). This mask is used for both masking out low quality pixels (e.g. cloud or shadow), and for dropping observations entirely based on the above `min_gooddata` calculation. Default is None, which will apply the following mask for USGS Collection 1: `{'cloud': 'no_cloud', 'cloud_shadow': 'no_cloud_shadow', 'nodata': False}`, and for USGS Collection 2: `{'cloud_shadow': 'not_cloud_shadow', 'cloud_or_cirrus': 'not_cloud_or_cirrus', 'nodata': False}. mask_pixel_quality : bool, optional An optional boolean indicating whether to apply the good data mask to all observations that were not filtered out for having less good quality pixels than `min_gooddata`. E.g. if `min_gooddata=0.99`, the filtered observations may still contain up to 1% poor quality pixels. The default of False simply returns the resulting observations without masking out these pixels; True masks them and sets them to NaN using the good data mask. This will convert numeric values to floating point values which can cause memory issues, set to False to prevent this. ls7_slc_off : bool, optional An optional boolean indicating whether to include data from after the Landsat 7 SLC failure (i.e. SLC-off). Defaults to True, which keeps all Landsat 7 observations > May 31 2003. filter_func : function, optional An optional function that can be passed in to restrict the datasets that are loaded by the function. A filter function should take a `datacube.model.Dataset` object as an input (i.e. as returned from `dc.find_datasets`), and return a boolean. For example, a filter function could be used to return True on only datasets acquired in January: `dataset.time.begin.month == 1` **extras : A set of keyword arguments to `dc.load` that define the spatiotemporal query used to extract data. This typically includes `measurements`, `x`, `y`, `time`, `resolution`, `resampling`, `group_by` and `crs`. Keyword arguments can either be listed directly in the `load_ard` call like any other parameter (e.g. `measurements=['nbart_red']`), or by passing in a query kwarg dictionary (e.g. `**query`). For a list of possible options, see the `dc.load` documentation: https://datacube-core.readthedocs.io/en/latest/dev/api/generate/datacube.Datacube.load.html Returns ------- combined_ds : xarray Dataset An xarray dataset containing only satellite observations that contains greater than `min_gooddata` proportion of good quality pixels. ''' ######### # Setup # ######### extras = deepcopy(extras) query = _dc_query_only(**extras) # We deal with `dask_chunks` separately dask_chunks = extras.pop('dask_chunks', None) requested_measurements = extras.pop('measurements', None) # Warn user if they combine lazy load with min_gooddata if (min_gooddata > 0.0) and dask_chunks is not None: warnings.warn("Setting 'min_gooddata' percentage to > 0.0 " "will cause dask arrays to compute when " "loading pixel-quality data to calculate " "'good pixel' percentage. This can " "slow the return of your dataset.") # Verify that products were provided if not products: raise ValueError(f'Please provide a list of product names ' f'to load data from. Valid options include ' f'{c1_products}, {c2_products} and {s2_products}') elif all(['level2' in product for product in products]): product_type = 'c2' elif all(['sr' in product for product in products]): product_type = 'c1' elif all(['s2' in product for product in products]): product_type = 's2' # If `measurements` are specified but do not include pixel quality bands, # add these to `measurements` according to collection if product_type == 'c2': print('Using pixel quality parameters for USGS Collection 2') fmask_band = 'quality_l2_aerosol' elif product_type == 'c1': print('Using pixel quality parameters for USGS Collection 1') fmask_band = 'pixel_qa' elif product_type == 's2': print('Using pixel quality parameters for Sentinel 2') fmask_band = 'scl' measurements = requested_measurements.copy( ) if requested_measurements else None if measurements: if fmask_band not in measurements: measurements.append(fmask_band) ################# # Find datasets # ################# # Extract datasets for each product using subset of dcload_kwargs dataset_list = [] # Get list of datasets for each product print('Finding datasets') for product in products: # Obtain list of datasets for product print(f' {product}') datasets = dc.find_datasets(product=product, **query) # Remove Landsat 7 SLC-off observations if ls7_slc_off=False #!!!Update when we have C2 lS7!!! if not ls7_slc_off and product in [ 'ls7_usgs_sr_scene', 'usgs_ls7e_level2_2' ]: print(' Ignoring SLC-off observations for ls7') datasets = [ i for i in datasets if i.time.begin < datetime.datetime(2003, 5, 31) ] # Add any returned datasets to list dataset_list.extend(datasets) # Raise exception if no datasets are returned if len(dataset_list) == 0: raise ValueError("No data available for query: ensure that " "the products specified have data for the " "time and location requested") # If filter_func is specified, use this function to filter the list # of datasets prior to load if filter_func: print(f'Filtering datasets using filter function') dataset_list = [ds for ds in dataset_list if filter_func(ds)] # Raise exception if filtering removes all datasets if len(dataset_list) == 0: raise ValueError("No data available after filtering with " "filter function") ############# # Load data # ############# # Note we always load using dask here so that # we can lazy load data before filtering by good data ds = dc.load(datasets=dataset_list, measurements=measurements, dask_chunks={} if dask_chunks is None else dask_chunks, **extras) ############### # Apply masks # ############### #need to distinguish between products due to different # "fmask" band properties #collection 2 USGS if product_type == 'c2': if pq_categories_ls is None: quality_flags_prod = { 'cloud_shadow': 'not_cloud_shadow', 'cloud_or_cirrus': 'not_cloud_or_cirrus', 'nodata': False } else: quality_flags_prod = pq_categories_ls pq_mask = masking.make_mask(ds[fmask_band], **quality_flags_prod) # collection 1 USGS if product_type == 'c1': if pq_categories_ls is None: quality_flags_prod = { 'cloud': 'no_cloud', 'cloud_shadow': 'no_cloud_shadow', 'nodata': False } else: quality_flags_prod = pq_categories_ls pq_mask = masking.make_mask(ds[fmask_band], **quality_flags_prod) # sentinel 2 if product_type == 's2': pq_mask = odc.algo.fmask_to_bool(ds[fmask_band], categories=pq_categories_s2) # Generate good quality data mask mask = None if mask_pixel_quality: print('Applying pixel quality/cloud mask') mask = pq_mask # Mask data if either of the above masks were generated if mask is not None: ds = odc.algo.keep_good_only(ds, where=mask) #################### # Filter good data # #################### # The good data percentage calculation has to load in all `fmask` # data, which can be slow. If the user has chosen no filtering # by using the default `min_gooddata = 0`, we can skip this step # completely to save processing time if min_gooddata > 0.0: # Compute good data for each observation as % of total pixels print('Counting good quality pixels for each time step') data_perc = (pq_mask.sum(axis=[1, 2], dtype='int32') / (pq_mask.shape[1] * pq_mask.shape[2])) # Filter by `min_gooddata` to drop low quality observations total_obs = len(ds.time) ds = ds.sel(time=data_perc >= min_gooddata) print(f'Filtering to {len(ds.time)} out of {total_obs} ' f'time steps with at least {min_gooddata:.1%} ' f'good quality pixels') # Drop bands not originally requested by user if requested_measurements: ds = ds[requested_measurements] ############### # Return data # ############### # Set nodata valuses using odc.algo tools to reduce peak memory # use when converting data to a float32 dtype ds = odc.algo.to_f32(ds) # If user supplied dask_chunks, return data as a dask array without # actually loading it in if dask_chunks is not None: print(f'Returning {len(ds.time)} time steps as a dask array') return ds else: print(f'Loading {len(ds.time)} time steps') return ds.compute()
def load_masked_usgs(dc, products=None, min_gooddata=0.0, quality_flags=None, mask_pixel_quality=True, mask_invalid_data=True, ls7_slc_off=True, product_metadata=False, dask_chunks={'time': 1}, lazy_load=False, **dcload_kwargs): ''' Loads USGS Landsat Collection 1 and Collection 2 data for multiple satellites (i.e. Landsat 5, 7, 8), and returns a single masked xarray dataset containing only observations that contain greater than a given proportion of good quality pixels. This can be used to extract clean time series of observations that are not affected by cloud, for example as an input to the `animated_timeseries` function from `deafrica-sandbox-notebooks/deafrica_plotting`. The proportion of good quality pixels is calculated by summing the pixels flagged as good quality in the product's pixel quality band (i.e. 'pixel_qa' for USGS Collection 1, and 'quality_l2_aerosol' for USGS Collection 2). By default non-cloudy or non-shadowed pixels are considered as good data, but this can be customised using the `quality_flags` parameter. MEMORY ISSUES: For large data extractions, it can be advisable to set `mask_pixel_quality=False`. The masking step coerces all numeric values to float32 when NaN values are inserted into the array, potentially causing your data to use twice the memory. Be aware that the resulting arrays will contain invalid values which may affect future analyses. Last modified: October 2019 Parameters ---------- dc : datacube Datacube object The Datacube to connect to, i.e. `dc = datacube.Datacube()`. This allows you to also use development datacubes if required. products : list A list of product names to load data from. Valid options for USGS Collection 1 are ['ls5_usgs_sr_scene', 'ls7_usgs_sr_scene', 'ls8_usgs_sr_scene'] and for USGS Collection 2 are ['usgs_ls5t_level2_2', 'usgs_ls7e_level2_2', 'usgs_ls8c_level2_2']. min_gooddata : float, optional An optional float giving the minimum percentage of good quality pixels required for a satellite observation to be loaded. Defaults to 0.0 which will return all observations regardless of pixel quality (set to e.g. 0.99 to return only observations with more than 99% good quality pixels). quality_flags : dict, optional An optional dictionary that is used to generate a good quality pixel mask from the selected product's pixel quality band (i.e. 'pixel_qa' for USGS Collection 1, and 'quality_l2_aerosol' for USGS Collection 2). This mask is used for both masking out low quality pixels (e.g. cloud or shadow), and for dropping observations entirely based on the above `min_gooddata` calculation. Default is None, which will apply the following mask for USGS Collection 1: `{'cloud': 'no_cloud', 'cloud_shadow': 'no_cloud_shadow', 'nodata': False}`, and for USGS Collection 2: `{'cloud_shadow': 'not_cloud_shadow', 'cloud_or_cirrus': 'not_cloud_or_cirrus', 'nodata': False}. mask_pixel_quality : bool, optional An optional boolean indicating whether to apply the good data mask to all observations that were not filtered out for having less good quality pixels than `min_gooddata`. E.g. if `min_gooddata=0.99`, the filtered observations may still contain up to 1% poor quality pixels. The default of False simply returns the resulting observations without masking out these pixels; True masks them out and sets them to NaN using the good data mask. This will convert numeric values to float32 which can cause memory issues, set to False to prevent this. ls7_slc_off : bool, optional An optional boolean indicating whether to include data from after the Landsat 7 SLC failure (i.e. SLC-off). Defaults to True, which keeps all Landsat 7 observations > May 31 2003. product_metadata : bool, optional An optional boolean indicating whether to return the dataset with a `product` variable that gives the name of the product that each observation in the time series came from (e.g. 'usgs_ls8c_level2_2'). Defaults to False. dask_chunks : dict, optional An optional dictionary containing the coords and sizes you wish to create dask chunks over. Usually used in combination with `lazy_load=True` (see below). For example: `dask_chunks = {'x': 500, 'y': 500}` lazy_load : boolean, optional Setting this variable to True will delay the computation of the function until you explicitly run `ds.compute()`. If used in conjuction with `dask.distributed.Client()` this will allow for automatic parallel computation. Be aware that computation will still occur if min_gooddata > 0, as the pixel quality will be loaded to compute the 'good data' percentage. **dcload_kwargs : A set of keyword arguments to `dc.load` that define the spatiotemporal query used to extract data. This can include `x`, `y`, `time`, `resolution`, `resampling`, `group_by`, `crs` etc, and can either be listed directly in the `load_ard` call (e.g. `x=(150.0, 151.0)`), or by passing in a query kwarg (e.g. `**query`). For a full list of possible options, see: https://datacube-core.readthedocs.io/en/latest/dev/api/generate/datacube.Datacube.load.html Returns ------- combined_ds : xarray Dataset An xarray dataset containing only satellite observations that contains greater than `min_gooddata` proportion of good quality pixels. ''' # Due to possible bug in xarray 0.13.0, define temporary function # which converts dtypes in a way that preserves attributes def astype_attrs(da, dtype=np.float32): ''' Loop through all data variables in the dataset, record attributes, convert to float32, then reassign attributes. If the data variable cannot be converted to float32 (e.g. for a non-numeric dtype like strings), skip and return the variable unchanged. ''' try: da_attr = da.attrs da = da.astype(dtype) da = da.assign_attrs(**da_attr) return da except ValueError: return da # List of valid USGS Collection 1 products c1_products = [ 'ls5_usgs_sr_scene', 'ls7_usgs_sr_scene', 'ls8_usgs_sr_scene' ] # List of valid USGS Collection 2 products c2_products = [ 'usgs_ls5t_level2_2', 'usgs_ls7e_level2_2', 'usgs_ls8c_level2_2' ] # Verify that products were provided if not products: raise ValueError(f'Please provide a list of product names ' f'to load data from. Valid options include ' f'{c1_products} and {c2_products}') # Verify that all provided products are valid not_in_list = [i for i in products if i not in c1_products + c2_products] if not_in_list: raise ValueError(f'The product(s) {not_in_list} are not ' f'supported by this function. Valid options ' f'include {c1_products} and {c2_products}') # Warn user if they combine lazy load with min_gooddata if (min_gooddata > 0.0) & lazy_load: warnings.warn("Setting 'min_gooddata' percentage to > 0.0 " "will cause dask arrays \n to compute when " "loading pixel-quality data to calculate " "'good pixel' percentage. This will " "significantly slow the return of your dataset.") # Create a list to hold data for each product product_data = [] # Iterate through each requested product for product in products: try: print(f'Loading {product} data') # Set quality band according to collection if product in c2_products: print( ' Using pixel quality parameters for USGS Collection 2') quality_band = 'quality_l2_aerosol' elif product in c1_products: print( ' Using pixel quality parameters for USGS Collection 1') quality_band = 'pixel_qa' # Set quality flags according to collection if (product in c2_products) and not quality_flags: quality_flags_prod = { 'cloud_shadow': 'not_cloud_shadow', 'cloud_or_cirrus': 'not_cloud_or_cirrus', 'nodata': False } elif (product in c1_products) and not quality_flags: quality_flags_prod = { 'cloud': 'no_cloud', 'cloud_shadow': 'no_cloud_shadow', 'nodata': False } elif quality_flags: quality_flags_prod = quality_flags # If `measurements` are specified but do not include fmask, add it if (('measurements' in dcload_kwargs) and (quality_band not in dcload_kwargs['measurements'])): dcload_kwargs['measurements'].append(quality_band) # Load data try: ds = dc.load(product=f'{product}', dask_chunks=dask_chunks, **dcload_kwargs) except KeyError as e: raise ValueError(f'Band {e} does not exist in this product. ' f'Verify all requested `measurements` exist ' f'in {products}') # Keep a record of the original number of observations total_obs = len(ds.time) # Remove Landsat 7 SLC-off observations if ls7_slc_off=False if not ls7_slc_off and product in [ 'ls7_usgs_sr_scene', 'usgs_ls7e_level2_2' ]: print(' Ignoring SLC-off observations for ls7') ds = ds.sel(time=ds.time < np.datetime64('2003-05-30')) # Identify all pixels not affected by cloud/shadow/invalid good_quality = masking.make_mask(ds[quality_band], **quality_flags_prod) # The good data percentage calculation has to load in all `fmask` # data, which can be slow. If the user has chosen no filtering # by using the default `min_gooddata = 0`, we can skip this step # completely to save processing time if min_gooddata > 0.0: # Compute good data for each observation as % of total pixels data_perc = (good_quality.sum(axis=1).sum(axis=1) / (good_quality.shape[1] * good_quality.shape[2])) # Filter by `min_gooddata` to drop low quality observations ds = ds.sel(time=data_perc >= min_gooddata) print(f' Filtering to {len(ds.time)} ' f'out of {total_obs} observations') # Optionally apply pixel quality mask to observations remaining # after the filtering step above to mask out all remaining # bad quality pixels if mask_pixel_quality & (len(ds.time) > 0): print(' Applying pixel quality mask') # First change dtype to float32, then mask out values using # `.where()`. By casting to float32, we prevent `.where()` # from automatically casting to float64, using 2x the memory. # We need to do this by applying a custom function to every # variable in the dataset instead of using `.astype()`, due # to a possible bug in xarray 0.13.0 that drops attributes ds = ds.apply(astype_attrs, dtype=np.float32, keep_attrs=True) ds = ds.where(good_quality) # Optionally add satellite/product name as a new variable if product_metadata: ds['product'] = xr.DataArray([product] * len(ds.time), [('time', ds.time)]) # If any data was returned, add result to list if len(ds.time) > 0: product_data.append(ds.drop(quality_band)) # If AttributeError due to there being no pixel quality variable # in the dataset, skip this product and move on to the next except AttributeError: print(f' No data for {product}') # If any data was returned above, combine into one xarray if (len(product_data) > 0): # Concatenate results and sort by time try: print(f'Combining and sorting data') combined_ds = xr.concat(product_data, dim='time').sortby('time') except KeyError as e: raise ValueError( f'The requested products {products} contain ' f'bands with non-matching names (e.g. {e}). Please ' f'select products with identical band names.') # If `lazy_load` is True, return data as a dask array without # actually loading it in if lazy_load: print(f' Returning {len(combined_ds.time)} observations' ' as a dask array') return combined_ds else: print(f' Returning {len(combined_ds.time)} observations ') return combined_ds.compute() # If no data was returned: else: print('No data returned for query') return None
autumn_valid = autumn.where(autumn != autumn.attrs['nodata']) autumn_valid.plot(col='time', col_wrap=3) plt.savefig('plot-nodata.png') pq = dc.load(product='ls5_pq_albers', x=(132.2, 132.4), y=(-12.45, -12.14), time=('2003-01', '2003-12')) pq_autumn = pq.pixelquality.loc['2003-3':'2003-8'] pq_autumn.plot(col='time', col_wrap=3) plt.savefig('plot-pxlqual.png') pandas.DataFrame.from_dict(masking.get_flags_def(pq), orient='index') good_data = masking.make_mask(pq, cloud_acca='no_cloud', cloud_fmask='no_cloud', contiguous=True) autumn_good_data = good_data.pixelquality.loc['2003-3':'2003-8'] autumn_good_data.plot(col='time', col_wrap=3) plt.savefig('plot-clouds.png') autumn_cloud_free = autumn_valid.where(autumn_good_data) autumn_cloud_free.plot(col='time', col_wrap=3) plt.savefig('plot-cloudfree.png') red = nbar.red.where(nbar.red != nbar.red.attrs['nodata']) nir = nbar.nir.where(nbar.nir != nbar.nir.attrs['nodata']) cloud_free = masking.make_mask(pq, cloud_acca='no_cloud', cloud_fmask='no_cloud', contiguous=True).pixelquality
y=(22, 21), time=(datetime(2017, 6, 1), datetime(2018, 1, 1)), group_by='solar_day', dask_chunks={ 'x': 2000, 'y': 2000 }) sr['ndvi'] = (sr.nir - sr.red) / (sr.nir + sr.red) terrain = dc.load(product='srtm_cgiar_mexico', like=sr, time=(datetime(1970, 1, 1), datetime(2018, 1, 1)), dask_chunks={ 'x': 2000, 'y': 2000 }) clear = masking.make_mask(sr.pixel_qa, clear=True) sr_clear = sr.where(clear) sr_clear2 = sr_clear.drop('pixel_qa') sr_mean = sr_clear2.mean('time', keep_attrs=True) sr_mean.rename( { 'blue': 'blue_mean', 'green': 'green_mean', 'red': 'red_mean', 'nir': 'nir_mean', 'swir1': 'swir1_mean', 'swir2': 'swir2_mean', 'ndvi': 'ndvi_mean' }, inplace=True) sr_min = sr_clear2.min('time', keep_attrs=True)
def loadBands(): global time_sorted global nbar_clean global all_nbr_sorted global query global start_of_epoch global end_of_epoch global valid_bit #Define temporal range #start_of_epoch = '2000-01-01' #need a variable here that defines a rolling 'latest observation' #end_of_epoch = '2016-07-31' #Define wavelengths/bands of interest, remove this kwarg to retrieve all bands bands_of_interest = [ #'blue', 'green', #'red', 'nir', #'swir1', 'swir2' ] #Define sensors of interest sensor1 = 'ls8' sensor2 = 'ls7' sensor3 = 'ls5' #Group PQ by solar day to avoid idiosyncracies of N/S overlap differences in PQ algorithm performance pq_albers_product = dc.index.products.get_by_name(sensor1 + '_pq_albers') valid_bit = pq_albers_product.measurements['pixelquality'][ 'flags_definition']['contiguous']['bits'] #load sensor specific band adjustment tuples for TSS ls5_tss_constant = 3983 ls5_tss_exponent = 1.6246 ls7_tss_constant = 3983 ls7_tss_exponent = 1.6246 ls8_tss_constant = 3957 ls8_tss_exponent = 1.6436 #Retrieve the NBAR and PQ data for sensor n def dummy(**kwargs): print kwargs dc.load = dummy print "about to load" sensor1_nbar = dc.load(product=sensor1 + '_nbar_albers', group_by='solar_day', measurements=bands_of_interest, **query) print "about to load again" raise SystemExit sensor1_pq = dc.load(product=sensor1 + '_pq_albers', group_by='solar_day', fuse_func=pq_fuser, **query) print "done" sensor1_nbar affine = sensor1_nbar.affine #This line exists to make sure that there's a 1:1 match between NBAR and PQ sensor1_nbar = sensor1_nbar.sel(time=sensor1_pq.time) #Generate PQ masks and apply those masks to remove cloud, cloud shadow, saturated observations #Generate PQ masks and apply those masks to remove cloud, cloud shadow, saturated observations s1_cloud_free = masking.make_mask(sensor1_pq, ga_good_pixel=True) s1_good_data = s1_cloud_free.pixelquality.loc[start_of_epoch:end_of_epoch] sensor1_nbar = sensor1_nbar.where(s1_good_data) #Fix the TSS coefficients for each sensor all_indices = [ #'BRIGHT','GREEN','WET', 'NDVI', 'NBR', 'NDWI', 'TSS' ] sensor1_rsindex = {} for i, name in enumerate(all_indices): #sensor1_rsindex['BRIGHT'] = pd.DataFrame((s1[0]*0.3037)+(s1[1]*0.2793)+(s1[2]*0.4343)+(s1[3]*0.5585)+(s1[4]*0.5082)+(s1[0]*0.1863)) #sensor1_rsindex['GREEN'] = pd.DataFrame((s1[0]*-0.2848)+(s1[1]*-0.2435)+(s1[2]*-0.5436)+(s1[3]*0.7243)+(s1[4]*0.0840)+(s1[0]*-0.1800)) #sensor1_rsindex['WET'] = pd.DataFrame((s1[0]*0.1509)+(s1[1]*0.1793)+(s1[2]*0.3299)+(s1[3]*0.3406)+(s1[4]*-0.7112)+(s1[0]*-0.4572)) #sensor1_rsindex['NDVI'] = ((sensor1_nbar['nir']-sensor1_nbar['red'])/(sensor1_nbar['nir']+sensor1_nbar['red'])) #sensor1_rsindex['NDWI'] = ((sensor1_nbar['swir1']-sensor1_nbar['green'])/(sensor1_nbar['swir1']+sensor1_nbar['green'])) sensor1_rsindex['NBR'] = ( (sensor1_nbar['nir'] - sensor1_nbar['swir2']) / (sensor1_nbar['nir'] + sensor1_nbar['swir2'])) #Need this to reference into a tuple - Check with Damien #sensor1_rsindex['TSS'] = (ls8_tss_constant*((sensor1_nbar['green']+sensor1_nbar['red'])/20000)**ls8_tss_exponent) sensor2_nbar = dc.load(product=sensor2 + '_nbar_albers', group_by='solar_day', measurements=bands_of_interest, **query) sensor2_pq = dc.load(product=sensor2 + '_pq_albers', group_by='solar_day', fuse_func=pq_fuser, **query) sensor2_nbar = sensor2_nbar.sel(time=sensor2_pq.time) s2_cloud_free = masking.make_mask(sensor2_pq, ga_good_pixel=True) s2_good_data = s2_cloud_free.pixelquality.loc[start_of_epoch:end_of_epoch] sensor2_nbar = sensor2_nbar.where(s2_good_data) all_indices = [ #'BRIGHT','GREEN','WET', 'NDVI', 'NBR', 'NDWI', 'TSS' ] sensor2_rsindex = {} for i, name in enumerate(all_indices): #sensor2_rsindex['BRIGHT'] = pd.DataFrame((s1[0]*0.3037)+(s1[1]*0.2793)+(s1[2]*0.4343)+(s1[3]*0.5585)+(s1[4]*0.5082)+(s1[0]*0.1863)) #sensor2_rsindex['GREEN'] = pd.DataFrame((s1[0]*-0.2848)+(s1[1]*-0.2435)+(s1[2]*-0.5436)+(s1[3]*0.7243)+(s1[4]*0.0840)+(s1[0]*-0.1800)) #sensor2_rsindex['WET'] = pd.DataFrame((s1[0]*0.1509)+(s1[1]*0.1793)+(s1[2]*0.3299)+(s1[3]*0.3406)+(s1[4]*-0.7112)+(s1[0]*-0.4572)) #sensor2_rsindex['NDVI'] = ((sensor2_nbar['nir']-sensor2_nbar['red'])/(sensor2_nbar['nir']+sensor2_nbar['red'])) #sensor2_rsindex['NDWI'] = ((sensor2_nbar['swir1']-sensor2_nbar['green'])/(sensor2_nbar['swir1']+sensor2_nbar['green'])) sensor2_rsindex['NBR'] = ( (sensor2_nbar['nir'] - sensor2_nbar['swir2']) / (sensor2_nbar['nir'] + sensor2_nbar['swir2'])) #sensor2_rsindex['TSS'] = (ls7_tss_constant*((sensor2_nbar['green']+sensor2_nbar['red'])/20000)**ls7_tss_exponent) sensor3_nbar = dc.load(product=sensor3 + '_nbar_albers', group_by='solar_day', measurements=bands_of_interest, **query) sensor3_pq = dc.load(product=sensor3 + '_pq_albers', group_by='solar_day', fuse_func=pq_fuser, **query) sensor3_nbar = sensor3_nbar.sel(time=sensor3_pq.time) s3_cloud_free = masking.make_mask(sensor3_pq, ga_good_pixel=True) s3_good_data = s3_cloud_free.pixelquality.loc[start_of_epoch:end_of_epoch] sensor3_nbar = sensor3_nbar.where(s3_good_data) all_indices = [ #'BRIGHT','GREEN','WET', 'NDVI', 'NBR', 'NDWI', 'TSS' ] sensor3_rsindex = {} for i, name in enumerate(all_indices): #sensor2_rsindex['BRIGHT'] = pd.DataFrame((s1[0]*0.3037)+(s1[1]*0.2793)+(s1[2]*0.4343)+(s1[3]*0.5585)+(s1[4]*0.5082)+(s1[0]*0.1863)) #sensor2_rsindex['GREEN'] = pd.DataFrame((s1[0]*-0.2848)+(s1[1]*-0.2435)+(s1[2]*-0.5436)+(s1[3]*0.7243)+(s1[4]*0.0840)+(s1[0]*-0.1800)) #sensor2_rsindex['WET'] = pd.DataFrame((s1[0]*0.1509)+(s1[1]*0.1793)+(s1[2]*0.3299)+(s1[3]*0.3406)+(s1[4]*-0.7112)+(s1[0]*-0.4572)) #sensor3_rsindex['NDVI'] = ((sensor3_nbar['nir']-sensor3_nbar['red'])/(sensor3_nbar['nir']+sensor3_nbar['red'])) #sensor3_rsindex['NDWI'] = ((sensor3_nbar['swir1']-sensor3_nbar['green'])/(sensor3_nbar['swir1']+sensor3_nbar['green'])) sensor3_rsindex['NBR'] = ( (sensor3_nbar['nir'] - sensor3_nbar['swir2']) / (sensor3_nbar['nir'] + sensor3_nbar['swir2'])) #sensor3_rsindex['TSS'] = ((sensor3_nbar['green']+sensor3_nbar['red'])/2) #sensor3_rsindex['TSS'] = (ls5_tss_constant*((sensor3_nbar['green']+sensor3_nbar['red'])/20000)**ls5_tss_exponent) #Concatenate and sort the different sensor xarrays into a single xarray nbar_clean = xr.concat([sensor1_nbar, sensor2_nbar, sensor3_nbar], dim='time') time_sorted = nbar_clean.time.argsort() nbar_clean = nbar_clean.isel(time=time_sorted) nbar_clean.attrs['affine'] = affine ''' all_tss_sorted = xr.concat([sensor1_rsindex['TSS'], sensor2_rsindex['TSS'], sensor3_rsindex['TSS']], dim='time') time_sorted = all_tss_sorted.time.argsort() all_tss_sorted = all_tss_sorted.isel(time=time_sorted)''' """all_ndvi_sorted = xr.concat([sensor1_rsindex['NDVI'], sensor2_rsindex['NDVI'], sensor3_rsindex['NDVI']], dim='time') time_sorted = all_ndvi_sorted.time.argsort() all_ndvi_sorted = all_ndvi_sorted.isel(time=time_sorted) """ all_nbr_sorted = xr.concat([ sensor1_rsindex['NBR'], sensor2_rsindex['NBR'], sensor3_rsindex['NBR'] ], dim='time') time_sorted = all_nbr_sorted.time.argsort() all_nbr_sorted = all_nbr_sorted.isel(time=time_sorted) all_nbr_sorted.attrs['affine'] = affine #clean up per sensor xarrays to free up some memory del sensor1_nbar del sensor2_nbar del sensor3_nbar del sensor1_rsindex del sensor2_rsindex del sensor3_rsindex print 'The number of time slices at this location is' print all_nbr_sorted.shape[0]
print(lon_range, lat_range) print(crs) for platform in platform_list: product_name = '{}_{}_albers'.format(platform, product_type) print('Loading product: {}'.format(product_name)) output_file = '/g/data/u46/users/dra547/erf_07_09_2013_' + product_name + '.cdf' print(output_file) dataset = dc.load(product=product_name, x=lon_range, y=lat_range, time=(acq_min, acq_max), group_by='solar_day', crs=crs, measurements=measurements_list) # Load PQ Mask mask_product = '{}_{}_albers'.format(platform, 'pq') sensor_pq = dc.load(product=mask_product, group_by='solar_day', fuse_func=ga_pq_fuser, like=dataset) cloud_free = make_mask(sensor_pq.pixelquality, ga_good_pixel=True) dataset = dataset.where(cloud_free).fillna(-999).astype('int16') dataset.attrs[ 'crs'] = sensor_pq.crs # Temporarily required until xarray issue #1009 gets into a release print(dataset) write_dataset_to_netcdf(dataset, output_file)
def get_map(args): # Version parameter # GetMap 1.1.1 must be supported for Terria version = get_arg(args, "version", "WMS version", permitted_values=["1.1.1", "1.3.0"]) # CRS parameter if version == "1.1.1": crs_arg = "srs" else: crs_arg = "crs" crsid = get_arg(args, crs_arg, "Coordinate Reference System", errcode=WMSException.INVALID_CRS, permitted_values=service_cfg["published_CRSs"].keys()) crs = geometry.CRS(crsid) # Layers and Styles parameters product = get_product_from_arg(args) styles = args.get("styles", "").split(",") if len(styles) != 1: raise WMSException("Multi-layer GetMap requests not supported") style_r = styles[0] if not style_r: style_r = product.platform.default_style style = product.platform.style_index.get(style_r) if not style: raise WMSException("Style %s is not defined" % style_r, WMSException.STYLE_NOT_DEFINED, locator="Style parameter") # Format parameter fmt = get_arg(args, "format", "image format", errcode=WMSException.INVALID_FORMAT, lower=True, permitted_values=["image/png"]) # BBox, height and width parameters geobox = _get_geobox(args, crs) # Zoom Factor zf = zoom_factor(args, crs) # Time parameter time = get_time(args, product) # Tiling. tiler = RGBTileGenerator(product, geobox, time, style=style) dc = get_cube() datasets = tiler.datasets(dc.index) if style.pq_mask_flags: pq_datasets = tiler.datasets(dc.index, mask=True) else: pq_datasets = None if not datasets: body = _write_empty(geobox) elif zf < product.min_zoom: # Zoomed out to far to properly render data. # Construct a polygon which is the union of the extents of the matching datasets. extent = None for ds in datasets: if extent: extent = extent.union(ds.extent) else: extent = ds.extent extent = extent.to_crs(geobox.crs) body = _write_polygon(geobox, extent, product.zoom_fill) else: masks = [] data = tiler.data(datasets) for band in style.needed_bands: extent_mask = (data[band] != data[band].attrs['nodata']) if pq_datasets: # ?????? # sources = datacube.Datacube.group_datasets(datasets, datacube.api.query.query_group_by()) # pq_sources = datacube.Datacube.group_datasets(pq_datasets, datacube.api.query.query_group_by()) # sources, pq_sources = xarray.align(sources, pq_sources) pq_data = tiler.data(pq_datasets, mask=True) mask = make_mask(pq_data, **style.pq_mask_flags) mask_data = mask.pixelquality masks.append(mask_data) if data: body = _write_png(data, style, extent_mask, *masks) else: body = _write_empty(geobox) release_cube(dc) return body, 200, resp_headers({"Content-Type": "image/png"})
def load_clearlandsat(dc, query, sensors=('ls5', 'ls7', 'ls8'), product='nbart', bands_of_interest=None, masked_prop=0.99, mask_dict=None, mask_pixel_quality=False, mask_invalid_data=True, ls7_slc_off=False, satellite_metadata=False): """Load cloud-free data from multiple Landsat satellites as an xarray dataset Loads Landsat NBAR, NBART or FC25 and PQ data for multiple sensors (i.e. ls5, ls7, ls8) and returns a single xarray dataset containing only observations that contain greater than a given proportion of good quality pixels. This function can be used to extract visually appealing time series of observations that are not affected by cloud, for example as an input to the `animated_timeseries` function from `DEAPlotting`. The proportion of clear pixels is calculated by summing the pixels that are not flagged as being poor quality in the Landsat PQ25 layer. By default only cloudy pixels or pixels that are missing data in any band are used to calculate the number of poor quality pixels, but this can be customised using the `mask_dict` parameter. Last modified: October 2018 Author: Robbi Bishop-Taylor, Bex Dunn Parameters ---------- dc : datacube Datacube object A specific Datacube to import from, i.e. `dc = datacube.Datacube(app='Clear Landsat')`. This allows you to also use development datacubes if they have been imported into the environment. query : dict A dict containing the query bounds. Can include lat/lon, time etc. If no `time` query is given, the function defaults to all timesteps available to all sensors (e.g. 1987-2018) sensors : list, optional An optional list of Landsat sensor names to load data for. Options are 'ls5', 'ls7', 'ls8'; defaults to all. product : str, optional An optional string specifying 'nbar', 'nbart' or 'fc'. Defaults to 'nbart'. For information on the difference, see the '02_DEA_datasets/Introduction_to_Landsat' or '02_DEA_datasets/Introduction_to_Fractional_Cover' notebooks from DEA-notebooks. bands_of_interest : list, optional An optional list of strings containing the bands to be read in; options include 'red', 'green', 'blue', 'nir', 'swir1', 'swir2'; defaults to all available bands if no bands are specified. masked_prop : float, optional An optional float giving the minimum percentage of clear pixels required for a Landsat observation to be loaded. Defaults to 0.99 (i.e. only return observations with less than 1% of poor quality pixels). mask_dict : dict, optional An optional dict of arguments to the `masking.make_mask` function that can be used to identify good/poor quality pixels from the PQ layer using alternative masking criteria. The default value of None masks out pixels flagged as cloud by either the ACCA or Fmask algorithms, or pixels that are missing data in any band (equivalent to: `mask_dict={'cloud_acca': 'no_cloud', 'cloud_fmask': 'no_cloud', 'contiguous': True}`. See the `02_DEA_datasets/Introduction_to_LandsatPQ.ipynb` notebook on DEA Notebooks for a list of all possible options. mask_pixel_quality : bool, optional An optional boolean indicating whether to apply the pixel quality mask to all observations that were not filtered out for having less good quality pixels that `masked_prop`. For example, if `masked_prop=0.99`, the filtered images may still contain up to 1% poor quality pixels. The default of False simply returns the resulting observations without masking out these pixels; True masks them out and sets them to NaN using the pixel quality mask, but has the side effect of changing the data type of the output arrays from int16 to float64 which can cause memory issues. To reduce memory usage, set to False. mask_invalid_data : bool, optional An optional boolean indicating whether invalid -999 nodata values should be replaced with NaN. Defaults to True; this has the side effect of changing the data type of the output arrays from int16 to float64 which can cause memory issues. To reduce memory usage, set to False. ls7_slc_off : bool, optional An optional boolean indicating whether to include data from after the Landsat 7 SLC failure (i.e. SLC-off). Defaults to False, which removes all Landsat 7 observations after May 31 2003. satellite_metadata : bool, optional An optional boolean indicating whether to return the dataset with a `satellite` variable that gives the name of the satellite that made each observation in the timeseries (i.e. ls5, ls7, ls8). Defaults to False. Returns ------- combined_ds : xarray Dataset An xarray dataset containing only Landsat observations that contain greater than `masked_prop` proportion of clear pixels. Notes ----- Memory issues: For large data extractions, it is recommended that you set both `mask_pixel_quality=False` and `mask_invalid_data=False`. Otherwise, all output variables will be coerced to float64 when NaN values are inserted into the array, potentially causing your data to use 4x as much memory. Be aware that the resulting arrays will contain invalid -999 values which should be considered in analyses. Example ------- >>> # Import modules >>> import datacube >>> import sys >>> # Import dea-notebooks functions using relative link to 10_Scripts directory >>> sys.path.append('../10_Scripts') >>> import DEADataHandling >>> # Connect to a datacube containing Landsat data >>> dc = datacube.Datacube(app='load_clearlandsat') >>> # Set up spatial and temporal query >>> query = {'x': (954163, 972163), ... 'y': (-3573891, -3555891), ... 'time': ('2011-06-01', '2013-06-01'), ... 'crs': 'EPSG:3577'} >>> # Load observations with less than 25% cloud from ls5, ls7 and ls8 as a single combined dataset >>> landsat_ds = DEADataHandling.load_clearlandsat(dc=dc, query=query, sensors=['ls5', 'ls7', 'ls8'], ... bands_of_interest=['red', 'green', 'blue'], ... masked_prop=0.75, mask_pixel_quality=True, ls7_slc_off=True) Loading ls5 pixel quality Loading 4 filtered ls5 timesteps Loading ls7 pixel quality Loading 29 filtered ls7 timesteps Loading ls8 pixel quality Loading 3 filtered ls8 timesteps Combining and sorting ls5, ls7, ls8 data Replacing invalid -999 values with NaN (data will be coerced to float64) >>> # Test that function returned data >>> len(landsat_ds.time) > 0 True """ # List to save results from each sensor and list to keep names of successfully processed sensors filtered_sensors = [] successfully_returned = [] # Iterate through all sensors, returning only observations with > mask_prop clear pixels for sensor in sensors: try: # If bands of interest are given, assign measurements in dc.load call. This is # for compatibility with the existing dea-notebooks load_nbarx function. if bands_of_interest: # Lazily load Landsat data using dask data = dc.load(product=f'{sensor}_{product}_albers', measurements=bands_of_interest, group_by='solar_day', dask_chunks={'time': 1}, **query) # If no bands of interest given, run without specifying measurements, and # therefore return all available bands else: # Lazily load Landsat data using dask data = dc.load(product=f'{sensor}_{product}_albers', group_by='solar_day', dask_chunks={'time': 1}, **query) # Load PQ data pq = dc.load(product=f'{sensor}_pq_albers', group_by='solar_day', fuse_func=ga_pq_fuser, dask_chunks={'time': 1}, **query) # Remove Landsat 7 SLC-off from PQ layer if ls7_slc_off=False if not ls7_slc_off and sensor == 'ls7': print('Ignoring SLC-off observations for ls7') data = data.sel(time=data.time < np.datetime64('2003-05-30')) # Return only Landsat observations that have matching PQ data time = (data.time - pq.time).time data = data.sel(time=time) pq = pq.sel(time=time) # Load PQ data using dask print('Loading {} pixel quality'.format(sensor)) pq = pq.compute() # If a custom dict is provided for mask_dict, use these values to make mask from PQ if mask_dict: # Mask PQ using custom values by unpacking mask_dict **kwarg good_quality = masking.make_mask(pq.pixelquality, **mask_dict) else: # Identify pixels with no clouds in either ACCA for Fmask good_quality = masking.make_mask(pq.pixelquality, cloud_acca='no_cloud', cloud_fmask='no_cloud', contiguous=True) # Compute good data for each observation as a percentage of total array pixels data_perc = good_quality.sum(dim=['x', 'y']) / (good_quality.shape[1] * good_quality.shape[2]) # Add data_perc data to Landsat dataset as a new xarray variable data['data_perc'] = xr.DataArray(data_perc, [('time', data.time)]) # Filter by data_perc to drop low quality observations and finally import data using dask filtered = data.sel(time=data.data_perc >= masked_prop) print(f' Loading {len(filtered.time)} filtered {sensor} timesteps') filtered = filtered.compute() # Optionally apply pixel quality mask to all observations that were not dropped in previous step if mask_pixel_quality: filtered = filtered.where(good_quality) # Optionally add satellite name variable if satellite_metadata: filtered['satellite'] = xr.DataArray([sensor] * len(filtered.time), [('time', filtered.time)]) # Append result to list and add sensor name to list of successfully sensors filtered_sensors.append(filtered) successfully_returned.append(sensor) # Close datasets filtered = None good_quality = None data = None pq = None except: # If there is no data for sensor or if another error occurs: print(f'Loading {sensor} pixel quality\n Skipping {sensor}; no valid data for query') # Concatenate all sensors into one big xarray dataset, and then sort by time sensor_string = ", ".join(successfully_returned) print(f'Combining and sorting {sensor_string} data') combined_ds = xr.concat(filtered_sensors, dim='time') combined_ds = combined_ds.sortby('time') # Optionally filter to replace no data values with nans if mask_invalid_data: print(' Replacing invalid -999 values with NaN (data will be coerced to float64)') combined_ds = masking.mask_invalid_data(combined_ds) # Return combined dataset return combined_ds
def calcMangNDVIMangPxlFromCube(tileNCFile, tileNCAMCFile, tileNCCMCFile, tileAFile, minLat, maxLat, minLon, maxLon, year, mangShpExt, ndviThresLow, ndviThresHigh): dc = datacube.Datacube(app='CalcAnnualMangroveExtent') #Define wavelengths/bands of interest, remove this kwarg to retrieve all bands bands_of_interest = ['red', 'nir'] #Define sensors of interest sensors = ['ls8', 'ls7', 'ls5'] #define temporal range start_of_epoch = year + '-01-01' # latest observation end_of_epoch = year + '-12-31' query = { 'time': (start_of_epoch, end_of_epoch), } query['x'] = (minLon, maxLon) query['y'] = (maxLat, minLat) query['crs'] = 'EPSG:4326' #Define which pixel quality artefacts you want removed from the results mask_components = { 'cloud_acca': 'no_cloud', 'cloud_shadow_acca': 'no_cloud_shadow', 'cloud_shadow_fmask': 'no_cloud_shadow', 'cloud_fmask': 'no_cloud', 'blue_saturated': False, 'green_saturated': False, 'red_saturated': False, 'nir_saturated': False, 'swir1_saturated': False, 'swir2_saturated': False, 'contiguous': True } print("Read pixel image data into memory.") sensor_clean = {} for sensor in sensors: print(sensor) #Load the NBAR and corresponding PQ sensor_nbar = dc.load(product=sensor + '_nbar_albers', group_by='solar_day', measurements=bands_of_interest, **query) if bool(sensor_nbar): sensor_pq = dc.load(product=sensor + '_pq_albers', group_by='solar_day', fuse_func=pq_fuser, **query) # Get the projection info crswkt = sensor_nbar.crs.wkt affine = sensor_nbar.affine # Apply the PQ masks to the NBAR cloud_free = masking.make_mask(sensor_pq, **mask_components) good_data = cloud_free.pixelquality.loc[ start_of_epoch:end_of_epoch] sensor_nbar = sensor_nbar.where(good_data) sensor_clean[sensor] = sensor_nbar if bool(sensor_clean): print("Merge data from different sensors.") nbar_clean = xarray.concat(sensor_clean.values(), dim='time') time_sorted = nbar_clean.time.argsort() nbar_clean = nbar_clean.isel(time=time_sorted) nbar_clean.attrs['affine'] = affine nbar_clean.attrs['crs'] = crswkt print( "\'Clean\' up the Red and NIR bands to remove any values less than zero." ) nbar_clean['red'] = nbar_clean.red.where(nbar_clean.red > 0) nbar_clean['nir'] = nbar_clean.nir.where(nbar_clean.nir > 0) print("Calculate NDVI.") ndvi = ((nbar_clean.nir - nbar_clean.red) / (nbar_clean.nir + nbar_clean.red)) ndvi.attrs['affine'] = affine ndvi.attrs['crs'] = crswkt print("Create Composite") ndviMean = ndvi.mean(dim='time') ndviMean.attrs['affine'] = affine ndviMean.attrs['crs'] = crswkt print("Rasterise the GMW extent map for the area of interest.") # Define pixel size and NoData value of new raster xres = nbar_clean.attrs['affine'][0] yres = nbar_clean.attrs['affine'][4] noDataVal = -9999 # Set the geotransform properties xcoord = ndviMean.coords['x'].min() ycoord = ndviMean.coords['y'].max() geotransform = (xcoord - (xres * 0.5), xres, 0, ycoord + (yres * 0.5), 0, yres) # Open the data source and read in the extent source_ds = ogr.Open(mangShpExt) source_layer = source_ds.GetLayer() source_srs = source_layer.GetSpatialRef() vx_min, vx_max, vy_min, vy_max = source_layer.GetExtent( ) # This is extent of Australia # Create the destination extent yt, xt = ndviMean.shape # Set up 'in-memory' gdal image to rasterise the shapefile too target_ds = gdal.GetDriverByName('MEM').Create('', xt, yt, gdal.GDT_Byte) target_ds.SetGeoTransform(geotransform) albers = osr.SpatialReference() albers.ImportFromEPSG(3577) target_ds.SetProjection(albers.ExportToWkt()) band = target_ds.GetRasterBand(1) band.SetNoDataValue(noDataVal) # Rasterise gdal.RasterizeLayer(target_ds, [1], source_layer, burn_values=[1]) # Read as array the GMW mask gmwMaskArr = band.ReadAsArray() print("Apply the GMW Mask to the NDVI values") mangroveNDVIMean = ndviMean.where(gmwMaskArr == 1) print( "Apply thresholds to NDVI to find total mangrove mask and closed canopy mangrove mask." ) mangroveAreaPxlC = mangroveNDVIMean > ndviThresLow clMangroveAreaPxlC = mangroveNDVIMean > ndviThresHigh print( "Calculate the number of pixels within the mangrove mask and write to CSV file." ) numMangPxls = numpy.sum(mangroveAreaPxlC.data) numClMangPxls = numpy.sum(clMangroveAreaPxlC.data) pxlCountSeries = pandas.Series([numMangPxls, numClMangPxls], index=['MangPxls', 'MangPxlsCl']) pxlCountSeries.to_csv(tileAFile) mangroveAreaPxlC.attrs['affine'] = affine mangroveAreaPxlC.attrs['crs'] = crswkt print("Save MangroveAreaPxlC to netcdf") mangroveAreaPxlC.to_netcdf(path=tileNCAMCFile, mode='w') clMangroveAreaPxlC.attrs['affine'] = affine clMangroveAreaPxlC.attrs['crs'] = crswkt print("Save clMangroveAreaPxlC to netcdf") clMangroveAreaPxlC.to_netcdf(path=tileNCCMCFile, mode='w') mangroveNDVIMean.attrs['affine'] = affine mangroveNDVIMean.attrs['crs'] = crswkt print("Save Composite to netcdf") mangroveNDVIMean.to_netcdf(path=tileNCFile, mode='w')
def load_ard(dc, products=None, min_gooddata=0.0, pq_categories_s2=['vegetation','snow or ice', 'water','bare soils', 'unclassified', 'dark area pixels'], pq_categories_ls=None, mask_pixel_quality=True, ls7_slc_off=True, predicate=None, dtype='auto', **kwargs): ''' Loads and combines Landsat Collections 1 or 2, and Sentinel-2 for multiple sensors (i.e. ls5t, ls7e and ls8c for Landsat; s2a and s2b for Sentinel-2), optionally applies pixel quality masks, and drops time steps that contain greater than a minimum proportion of good quality (e.g. non- cloudy or shadowed) pixels. The function supports loading the following DEA frica products: ls5_usgs_sr_scene ls7_usgs_sr_scene ls8_usgs_sr_scene usgs_ls8c_level2_2 ga_ls8c_fractional_cover_2 s2a_msil2a s2b_msil2a Last modified: March 2020 Parameters ---------- dc : datacube Datacube object The Datacube to connect to, i.e. `dc = datacube.Datacube()`. This allows you to also use development datacubes if required. products : list A list of product names to load data from. Valid options are Landsat C1: ['ls5_usgs_sr_scene', 'ls7_usgs_sr_scene', 'ls8_usgs_sr_scene'], Landsat C2: ['usgs_ls8c_level2_2'] Sentinel-2: ['s2a_msil2a', 's2b_msil2a'] min_gooddata : float, optional An optional float giving the minimum percentage of good quality pixels required for a satellite observation to be loaded. Defaults to 0.0 which will return all observations regardless of pixel quality (set to e.g. 0.99 to return only observations with more than 99% good quality pixels). pq_categories_s2 : list, optional An optional list of Sentinel-2 Scene Classification Layer (SCL) names to treat as good quality observations in the above `min_gooddata` calculation. The default is ['vegetation','snow or ice','water', 'bare soils','unclassified', 'dark area pixels'] which will return non-cloudy or non-shadowed land, snow, water, veg, and non-veg pixels. pq_categories_ls : dict, optional An optional dictionary that is used to generate a good quality pixel mask from the selected USGS product's pixel quality band (i.e. 'pixel_qa' for USGS Collection 1, and 'quality_l2_aerosol' for USGS Collection 2). This mask is used for both masking out low quality pixels (e.g. cloud or shadow), and for dropping observations entirely based on the above `min_gooddata` calculation. Default is None, which will apply the following mask for USGS Collection 1: `{'cloud': 'no_cloud', 'cloud_shadow': 'no_cloud_shadow', 'nodata': False}`, and for USGS Collection 2: `{'cloud_shadow': 'not_cloud_shadow', 'cloud_or_cirrus': 'not_cloud_or_cirrus', 'nodata': False}. mask_pixel_quality : bool, optional An optional boolean indicating whether to apply the good data mask to all observations that were not filtered out for having less good quality pixels than `min_gooddata`. E.g. if `min_gooddata=0.99`, the filtered observations may still contain up to 1% poor quality pixels. The default of False simply returns the resulting observations without masking out these pixels; True masks them and sets them to NaN using the good data mask. This will convert numeric values to floating point values which can cause memory issues, set to False to prevent this. ls7_slc_off : bool, optional An optional boolean indicating whether to include data from after the Landsat 7 SLC failure (i.e. SLC-off). Defaults to True, which keeps all Landsat 7 observations > May 31 2003. predicate : function, optional An optional function that can be passed in to restrict the datasets that are loaded by the function. A filter function should take a `datacube.model.Dataset` object as an input (i.e. as returned from `dc.find_datasets`), and return a boolean. For example, a filter function could be used to return True on only datasets acquired in January: `dataset.time.begin.month == 1` dtype : string, optional An optional parameter that controls the data type/dtype that layers are coerced to after loading. Valid values: 'native', 'auto', 'float{16|32|64}'. When 'auto' is used, the data will be converted to `float32` if masking is used, otherwise data will be returned in the native data type of the data. Be aware that if data is loaded in its native dtype, nodata and masked pixels will be returned with the data's native nodata value (typically -999), not NaN. **kwargs : A set of keyword arguments to `dc.load` that define the spatiotemporal query used to extract data. This typically includes `measurements`, `x`, `y`, `time`, `resolution`, `resampling`, `group_by` and `crs`. Keyword arguments can either be listed directly in the `load_ard` call like any other parameter (e.g. `measurements=['nbart_red']`), or by passing in a query kwarg dictionary (e.g. `**query`). For a list of possible options, see the `dc.load` documentation: https://datacube-core.readthedocs.io/en/latest/dev/api/generate/datacube.Datacube.load.html Returns ------- combined_ds : xarray Dataset An xarray dataset containing only satellite observations that contains greater than `min_gooddata` proportion of good quality pixels. ''' ######### # Setup # ######### # prevent function altering original query object kwargs = deepcopy(kwargs) # We deal with `dask_chunks` separately dask_chunks = kwargs.pop('dask_chunks', None) requested_measurements = kwargs.pop('measurements', None) # Warn user if they combine lazy load with min_gooddata if (min_gooddata > 0.0) and dask_chunks is not None: warnings.warn("Setting 'min_gooddata' percentage to > 0.0 " "will cause dask arrays to compute when " "loading pixel-quality data to calculate " "'good pixel' percentage. This can " "slow the return of your dataset.") # Verify that products were provided and determine if Sentinel-2 # or Landsat data is being loaded if not products: raise ValueError(f'Please provide a list of product names ' f'to load data from.') elif all(['level2' in product for product in products]): product_type = 'c2' elif all(['sr' in product for product in products]): product_type = 'c1' elif all(['s2' in product for product in products]): product_type = 's2' elif all(['fractional_cover' in product for product in products]): product_type = 'fc' # If `measurements` are specified but do not include pixel quality bands, # add these to `measurements` according to collection if (product_type == 'c2') or (product_type == 'fc'): print('Using pixel quality parameters for USGS Collection 2') fmask_band = 'quality_l2_aerosol' elif product_type == 'c1': print('Using pixel quality parameters for USGS Collection 1') fmask_band = 'pixel_qa' elif product_type == 's2': print('Using pixel quality parameters for Sentinel 2') fmask_band = 'scl' measurements = requested_measurements.copy() if requested_measurements else None # Deal with "load all" case: pick a set of bands common across # all products if measurements is None: if product_type == 'fc': measurements = ['pv', 'npv', 'bs', 'ue'] else: measurements = _common_bands(dc, products) # If `measurements` are specified but do not include pq, add. if measurements: #pass if FC if product_type == 'fc': pass else: if fmask_band not in measurements: measurements.append(fmask_band) # Get list of data and mask bands so that we can later exclude # mask bands from being masked themselves if product_type == 'fc': pass else: data_bands = [band for band in measurements if band not in (fmask_band)] mask_bands = [band for band in measurements if band not in data_bands] ################# # Find datasets # #################l # Pull out query params only to pass to dc.find_datasets query = _dc_query_only(**kwargs) # Extract datasets for each product using subset of dcload_kwargs dataset_list = [] # Get list of datasets for each product print('Finding datasets') for product in products: # Obtain list of datasets for product print(f' {product}') datasets = dc.find_datasets(product=product, **query) # Remove Landsat 7 SLC-off observations if ls7_slc_off=False if not ls7_slc_off and product in ['ls7_usgs_sr_scene', 'usgs_ls7e_level2_2']: print(' Ignoring SLC-off observations for ls7') datasets = [i for i in datasets if i.time.begin < datetime.datetime(2003, 5, 31)] # Add any returned datasets to list dataset_list.extend(datasets) # Raise exception if no datasets are returned if len(dataset_list) == 0: raise ValueError("No data available for query: ensure that " "the products specified have data for the " "time and location requested") # If pedicate is specified, use this function to filter the list # of datasets prior to load if predicate: print(f'Filtering datasets using filter function') dataset_list = [ds for ds in dataset_list if predicate(ds)] # Raise exception if filtering removes all datasets if len(dataset_list) == 0: raise ValueError("No data available after filtering with " "filter function") # load fmask from C2 for masking FC, and filter if required # NOTE: This works because only one sensor (ls8) has FC, if/when # FC is calculated for LS7, LS5, will need to move this section # into the for loop above. if product_type == 'fc': print(' PQ data from USGS C2') dataset_list_fc_pq = dc.find_datasets(product='usgs_ls8c_level2_2', **query) if predicate: print(f'Filtering datasets using filter function') dataset_list_fc_pq = [ds for ds in dataset_list_fc_pq if predicate(ds)] ############# # Load data # ############# # Note we always load using dask here so that # we can lazy load data before filtering by good data ds = dc.load(datasets=dataset_list, measurements=measurements, dask_chunks={} if dask_chunks is None else dask_chunks, **kwargs) if product_type == 'fc': ds_fc_pq = dc.load(datasets=dataset_list_fc_pq, dask_chunks={} if dask_chunks is None else dask_chunks, **kwargs) #################### # Filter good data # #################### # need to distinguish between products due to different # pq band properties # collection 2 USGS or FC if (product_type == 'c2') or (product_type == 'fc'): if pq_categories_ls is None: quality_flags_prod = {'cloud_shadow': 'not_cloud_shadow', 'cloud_or_cirrus': 'not_cloud_or_cirrus', 'nodata': False} else: quality_flags_prod = pq_categories_ls if product_type == 'fc': pq_mask = masking.make_mask(ds_fc_pq[fmask_band], **quality_flags_prod) else: pq_mask = masking.make_mask(ds[fmask_band], **quality_flags_prod) # collection 1 USGS if product_type == 'c1': if pq_categories_ls is None: quality_flags_prod = {'cloud': 'no_cloud', 'cloud_shadow': 'no_cloud_shadow', 'nodata': False} else: quality_flags_prod = pq_categories_ls pq_mask = masking.make_mask(ds[fmask_band], **quality_flags_prod) # sentinel 2 if product_type == 's2': pq_mask = odc.algo.fmask_to_bool(ds[fmask_band], categories=pq_categories_s2) # The good data percentage calculation has to load in all `fmask` # data, which can be slow. If the user has chosen no filtering # by using the default `min_gooddata = 0`, we can skip this step # completely to save processing time if min_gooddata > 0.0: # Compute good data for each observation as % of total pixels print('Counting good quality pixels for each time step') data_perc = (pq_mask.sum(axis=[1, 2], dtype='int32') / (pq_mask.shape[1] * pq_mask.shape[2])) keep = data_perc >= min_gooddata # Filter by `min_gooddata` to drop low quality observations total_obs = len(ds.time) ds = ds.sel(time=keep) pq_mask = pq_mask.sel(time=keep) print(f'Filtering to {len(ds.time)} out of {total_obs} ' f'time steps with at least {min_gooddata:.1%} ' f'good quality pixels') ############### # Apply masks # ############### # Generate good quality data mask mask = None if mask_pixel_quality: print('Applying pixel quality/cloud mask') mask = pq_mask # Split into data/masks bands, as conversion to float and masking # should only be applied to data bands if product_type == 'fc': ds_data=ds else: ds_data = ds[data_bands] ds_masks = ds[mask_bands] # Mask data if either of the above masks were generated if mask is not None: ds_data = odc.algo.keep_good_only(ds_data, where=mask) # Automatically set dtype to either native or float32 depending # on whether masking was requested if dtype == 'auto': dtype = 'native' if mask is None else 'float32' # Set nodata values using odc.algo tools to reduce peak memory # use when converting data dtype if dtype != 'native': ds_data = odc.algo.to_float(ds_data, dtype=dtype) # Put data and mask bands back together if product_type == 'fc': attrs = ds.attrs ds = ds_data ds.attrs.update(attrs) else: attrs = ds.attrs ds = xr.merge([ds_data, ds_masks]) ds = ds_data ds.attrs.update(attrs) ############### # Return data # ############### # Drop bands not originally requested by user if requested_measurements: ds = ds[requested_measurements] # If user supplied dask_chunks, return data as a dask array without # actually loading it in if dask_chunks is not None: print(f'Returning {len(ds.time)} time steps as a dask array') return ds else: print(f'Loading {len(ds.time)} time steps') return ds.compute()