Ejemplo n.º 1
0
def test_mask_valid_data():
    test_attrs = {
        'one': 1,
        'nodata': -999,
    }

    expected_data_array = DataArray(np.array([[1., np.nan, np.nan], [2, 3, np.nan], [np.nan, np.nan, np.nan]],
                                             dtype='float'),
                                    attrs=test_attrs, name='var_one')

    data_array = DataArray([[1, -999, -999], [2, 3, -999], [-999, -999, -999]], attrs=test_attrs)
    dataset = Dataset(data_vars={'var_one': data_array}, attrs={'ds_attr': 'still here'})

    # Make sure test is actually changing something
    assert not data_array.equals(expected_data_array)

    output_ds = mask_invalid_data(dataset, keep_attrs=True)
    assert output_ds.attrs['ds_attr'] == 'still here'
    assert output_ds.data_vars['var_one'].equals(expected_data_array)
    assert output_ds.data_vars['var_one'].attrs['one'] == 1

    output_da = mask_invalid_data(data_array, keep_attrs=True)
    assert output_da.equals(expected_data_array)
    assert output_da.attrs['one'] == 1

    missing_nodata = data_array.copy()
    del missing_nodata.attrs['nodata']
    assert not hasattr(missing_nodata, 'nodata')
    np.testing.assert_array_equal(missing_nodata, mask_invalid_data(missing_nodata))

    with pytest.raises(TypeError):
        mask_invalid_data({})
Ejemplo n.º 2
0
import datacube
from datacube.utils.masking import mask_invalid_data

query = {
    'time': ('1990-01-01', '1991-01-01'),
    'lat': (-35.2, -35.4),
    'lon': (149.0, 149.2),
}

dc = datacube.Datacube(app='plot-rgb-recipe')
data = dc.load(product='ls5_nbar_albers', measurements=['red', 'green', 'blue'], **query)
data = mask_invalid_data(data)

fake_saturation = 4000
rgb = data.to_array(dim='color')
rgb = rgb.transpose(*(rgb.dims[1:]+rgb.dims[:1]))  # make 'color' the last dimension
rgb = rgb.where((rgb <= fake_saturation).all(dim='color'))  # mask out pixels where any band is 'saturated'
rgb /= fake_saturation  # scale to [0, 1] range for imshow

rgb.plot.imshow(x=data.crs.dimensions[1], y=data.crs.dimensions[0],
                col='time', col_wrap=5, add_colorbar=False)
Ejemplo n.º 3
0
def load_nbarx(dc,
               sensor,
               query,
               product='nbart',
               bands_of_interest='',
               filter_pq=True):
    """
    Loads NBAR (Nadir BRDF Adjusted Reflectance) or NBAR-T (terrain corrected NBAR) data for a
    sensor, masks using pixel quality (PQ), then optionally filters out terrain -999s (for NBAR-T).
    Returns an xarray dataset and CRS and Affine objects defining map projection and geotransform

    Last modified: May 2018
    Author: Bex Dunn
    Modified by: Claire Krause, Robbi Bishop-Taylor, Bex Dunn

    inputs
    dc - Handle for the Datacube to import from. This allows you to also use dev environments
    if that have been imported into the environment.
    sensor - Options are 'ls5', 'ls7', 'ls8'
    query - A dict containing the query bounds. Can include lat/lon, time etc. 

    optional
    product - 'nbar' or 'nbart'. Defaults to nbart unless otherwise specified
    bands_of_interest - List of strings containing the bands to be read in; defaults to all bands,
                        options include 'red', 'green', 'blue', 'nir', 'swir1', 'swir2'
    filter_pq - boolean. Will filter clouds and saturated pixels using PQ unless set to False


    outputs
    ds - Extracted and optionally PQ filtered dataset
    crs - CRS object defining dataset coordinate reference system
    affine - Affine object defining dataset affine transformation
    """

    product_name = '{}_{}_albers'.format(sensor, product)
    mask_product = '{}_{}_albers'.format(sensor, 'pq')
    print('Loading {}'.format(product_name))

    # If bands of interest are given, assign measurements in dc.load call
    if bands_of_interest:

        ds = dc.load(product=product_name,
                     measurements=bands_of_interest,
                     group_by='solar_day',
                     **query)

    # If no bands of interest given, run without specifying measurements
    else:

        ds = dc.load(product=product_name, group_by='solar_day', **query)

    # Proceed if the resulting call returns data
    if ds.variables:

        crs = ds.crs
        affine = ds.affine
        print('Loaded {}'.format(product_name))

        # If pixel quality filtering is enabled, extract PQ data to use as mask
        if filter_pq:

            sensor_pq = dc.load(product=mask_product,
                                fuse_func=ga_pq_fuser,
                                group_by='solar_day',
                                **query)

            # If PQ call returns data, use to mask input data
            if sensor_pq.variables:
                print('Generating mask {}'.format(mask_product))
                good_quality = masking.make_mask(
                    sensor_pq.pixelquality,
                    cloud_acca='no_cloud',
                    cloud_shadow_acca='no_cloud_shadow',
                    cloud_shadow_fmask='no_cloud_shadow',
                    cloud_fmask='no_cloud',
                    blue_saturated=False,
                    green_saturated=False,
                    red_saturated=False,
                    nir_saturated=False,
                    swir1_saturated=False,
                    swir2_saturated=False,
                    contiguous=True)

                # Apply mask to preserve only good data
                ds = ds.where(good_quality)

            ds.attrs['crs'] = crs
            ds.attrs['affine'] = affine

            # Replace nodata values with nans

            ds = masking.mask_invalid_data(ds)

        return ds, crs, affine

    else:

        print('Failed to load {}'.format(product_name))
        return None, None, None
Ejemplo n.º 4
0
def load_clearsentinel2(dc,
                        query,
                        sensors=('s2a', 's2b'),
                        product='ard',
                        bands_of_interest=('nbart_red', 'nbart_green',
                                           'nbart_blue', 'nbart_nir_1',
                                           'nbart_swir_2', 'nbart_swir_3'),
                        masked_prop=0.0,
                        mask_values=(0, 2, 3),
                        pixel_quality_band='fmask',
                        mask_pixel_quality=True,
                        mask_invalid_data=True,
                        satellite_metadata=False):
    """
    Loads Sentinel 2 data for multiple sensors (i.e. s2a, s2b), and returns a single xarray dataset containing 
    only observations that contain greater than a given proportion of good quality pixels. This can be used to extract
    visually appealing time series of observations that are not affected by cloud, for example as an input to the
    `animated_timeseries` function from `DEAPlotting`.
    
    The proportion of good quality pixels is calculated by summing the pixels flagged as good quality
    in the Sentinel pixel quality array. By default pixels flagged as nodata, cloud or shadow are used to 
    calculate the number of good quality quality pixels, but this can be customised using the `mask_values` parameter.
    
    MEMORY ISSUES: For large data extractions, it is recommended that you set both `mask_pixel_quality=False` and 
    `mask_invalid_data=False`. Otherwise, all output variables will be coerced to float64 when NaN values are 
    inserted into the array, potentially causing your data to use 4x as much memory. Be aware that the resulting
    arrays will contain invalid -999 values which should be considered in analyses.
    
    Last modified: March 2019
    Author: Robbi Bishop-Taylor
    
    :param dc: 
        A specific Datacube to import from, i.e. `dc = datacube.Datacube(app='Sentinel datacube')`. This allows you 
        to also use development datacubes if they have been imported into the environment.
    
    :param query: 
        A dict containing the query bounds. Can include lat/lon, time etc. If no `time` query is given, the 
        function defaults to all time steps available to all sensors (e.g. 2015 onward)

    :param sensors:
        An optional list of Sentinel 2 sensors to load data for. Options are 's2a', and 's2b'; defaults to both.

    :param product:
        An optional string specifying the product to load. Defaults to 'ard', which is equivalent to loading
        e.g. `s2a_ard_granule`. 
        
    :param bands_of_interest:
        An optional list of strings containing the bands to be read in; to view full list run the following:
        `dc.list_measurements().loc['s2b_ard_granule']`. Defaults to `('nbart_red', 'nbart_green', 'nbart_blue', 
        'nbart_nir_1', 'nbart_swir_2', 'nbart_swir_3')`.

    :param masked_prop:
        An optional float giving the minimum percentage of good quality pixels required for a Sentinel 2 observation
        to be loaded. Defaults to 0.0 which will return all observations regardless of pixel quality (set to e.g. 0.99 
        to return only observations with more than 99% good quality pixels).
    
    :param mask_values:
        An optional list of pixel quality values to treat as poor quality observations in the above `masked_prop`
        calculation. The default is `[0, 2, 3]` which treats nodata, cloud and cloud shadow as poor quality.
        Choose from: `{'0': 'nodata', '1': 'valid', '2': 'cloud', '3': 'shadow', '4': 'snow', '5': 'water'}`.
        
    :param pixel_quality_band:
        An optional string giving the name of the pixel quality band contained in the Sentinel 2 dataset. The default
        value is 'fmask'.
      
    :param mask_pixel_quality:
        An optional boolean indicating whether to apply the pixel quality mask to all observations that were not
        filtered out for having less good quality pixels that `masked_prop`. For example, if `masked_prop=0.99`, the
        filtered images may still contain up to 1% poor quality pixels. The default of True masks poor quality pixeks 
        out and sets them to NaN using the pixel quality mask. This has the side effect of changing the data type of 
        the output arrays from int16 to float64 which can cause memory issues. To reduce memory usage, set to False.
        
    :param mask_invalid_data:
        An optional boolean indicating whether invalid -999 nodata values should be replaced with NaN. Defaults to
        True; this has the side effect of changing the data type of the output arrays from int16 to float64 which can
        cause memory issues. To reduce memory usage, set to False.
        
    :param satellite_metadata:
        An optional boolean indicating whether to return the dataset with a `satellite` variable that gives the name
        of the satellite that made each observation in the time series (i.e. s2a, s2b). Defaults to False.
        
    :returns:
        An xarray dataset containing only Sentinel 2 observations that contain greater than `masked_prop`
        proportion of clear pixels.  
        
    :example:
    
    >>> # Import modules
    >>> import datacube
    >>> import sys

    >>> # Import dea-notebooks functions using relative link to 10_Scripts directory
    >>> sys.path.append('../10_Scripts')
    >>> import DEADataHandling

    >>> # Connect to a datacube containing Sentinel data
    >>> dc = datacube.Datacube(app='load_clearsentinel')

    >>> # Set up spatial and temporal query; note that 'output_crs' and 'resolution' need to be set
    >>> query = {'x': (-191400.0, -183400.0),
    ...          'y': (-1423460.0, -1415460.0),
    ...          'time': ('2018-01-01', '2018-03-01'),
    ...          'crs': 'EPSG:3577',
    ...          'output_crs': 'EPSG:3577',
    ...          'resolution': (10, 10)}   

    >>> # Load observations with less than 70% cloud from both S2A and S2B as a single combined dataset
    >>> sentinel_ds = DEADataHandling.load_clearsentinel2(dc=dc, query=query, sensors=['s2a', 's2b'], 
    ...                                    bands_of_interest=['nbart_red', 'nbart_green', 'nbart_blue'], 
    ...                                    masked_prop=0.3, mask_pixel_quality=True)
    Loading s2a pixel quality
        Loading 3 filtered s2a timesteps
    Loading s2b pixel quality
        Loading 2 filtered s2b timesteps
    Combining and sorting s2a, s2b data
        Replacing invalid -999 values with NaN (data will be coerced to float64)

    >>> # Test that function returned data
    >>> len(sentinel_ds.time) > 0
    True
      
    """

    # Dictionary to save results from each sensor
    filtered_sensors = {}

    # Iterate through all sensors, returning only observations with > mask_prop clear pixels
    for sensor in sensors:

        # If bands of interest are given, assign measurements in dc.load call. This is
        # for compatibility with the existing dea-notebooks load_nbarx function.
        if bands_of_interest:

            # Lazily load Sentinel 2 data using dask
            data = dc.load(product=f'{sensor}_{product}_granule',
                           measurements=bands_of_interest,
                           group_by='solar_day',
                           dask_chunks={'time': 1},
                           **query)

        # If no bands of interest given, run without specifying measurements, and
        # therefore return all available bands
        else:

            # Lazily load Sentinel 2 data using dask
            data = dc.load(product=f'{sensor}_{product}_granule',
                           group_by='solar_day',
                           dask_chunks={'time': 1},
                           **query)

        # Load PQ data
        print(f'Loading {sensor} pixel quality')
        pq = dc.load(product=f'{sensor}_{product}_granule',
                     measurements=[pixel_quality_band],
                     group_by='solar_day',
                     dask_chunks={'time': 1},
                     **query)

        # If resulting dataset has data, continue:
        if data.variables:

            # If more than 0 timesteps
            if len(data.time) > 0:

                # Identify pixels with valid data
                good_quality = np.isin(pq[pixel_quality_band],
                                       test_elements=mask_values,
                                       invert=True)
                good_quality = pq[pixel_quality_band].where(
                    good_quality).notnull()

                # Compute good data for each observation as a percentage of total array pixels
                data_perc = good_quality.sum(axis=1).sum(
                    axis=1) / (good_quality.shape[1] * good_quality.shape[2])

                # Add data_perc data to Sentinel 2 dataset as a new xarray variable
                data['data_perc'] = xr.DataArray(data_perc,
                                                 [('time', data.time)])

                # Filter by data_perc to drop low quality observations and finally import data using dask
                filtered = data.sel(time=data.data_perc >= masked_prop)
                print(
                    f'    Loading {len(filtered.time)} filtered {sensor} timesteps'
                )

                # Optionally apply pixel quality mask to all observations that were not dropped in previous step
                if mask_pixel_quality:
                    filtered = filtered.where(good_quality)

                # Optionally add satellite name
                if satellite_metadata:
                    filtered['satellite'] = xr.DataArray(
                        [sensor] * len(filtered.time),
                        [('time', filtered.time)])

                # Add result to dictionary
                filtered_sensors[sensor] = filtered.compute()

                # Close datasets
                filtered = None
                good_quality = None
                data = None

            else:

                # If there is no data for sensor or if another error occurs:
                print(f'    Skipping {sensor}; no valid data for query')

        else:

            # If there is no data for sensor or if another error occurs:
            print(f'    Skipping {sensor}; no valid data for query')

    ############################
    # Combine multiple sensors #
    ############################

    # Proceed with concatenating only if there is more than 1 sensor processed
    if len(filtered_sensors) > 1:

        # Concatenate all sensors into one big xarray dataset, and then sort by time
        sensor_string = ", ".join(filtered_sensors.keys())
        print(f'Combining and sorting {sensor_string} data')
        combined_ds = xr.concat(filtered_sensors.values(), dim='time')
        combined_ds = combined_ds.sortby('time')

        # Optionally filter to replace invalid data values with nans
        if mask_invalid_data:

            print(
                '    Replacing invalid -999 values with NaN (data will be coerced to float64)'
            )
            combined_ds = masking.mask_invalid_data(combined_ds)

        # Return combined dataset
        return combined_ds

    # Return the single dataset if only one sensor was processed
    elif len(filtered_sensors) == 1:

        sensor_string = ", ".join(filtered_sensors.keys())
        print(f'Combining and sorting {sensor_string} data')
        sensor_ds = list(filtered_sensors.values())[0]

        # Optionally filter to replace no data values with nans
        if mask_invalid_data:

            print(
                '    Replacing invalid -999 values with NaN (data will be coerced to float64)'
            )
            sensor_ds = masking.mask_invalid_data(sensor_ds)

        return sensor_ds

    else:

        print(
            f'No data returned for query for any sensor in {", ".join(sensors)} '
            f'and time range {"-".join(query["time"])}')
Ejemplo n.º 5
0
def load_clearlandsat(dc,
                      query,
                      sensors=('ls5', 'ls7', 'ls8'),
                      product='nbart',
                      dask_chunks={'time': 1},
                      lazy_load=False,
                      bands_of_interest=None,
                      masked_prop=0.0,
                      mask_dict=None,
                      mask_pixel_quality=True,
                      mask_invalid_data=True,
                      ls7_slc_off=False,
                      satellite_metadata=False):
    """Loads Landsat NBAR, NBART or FC25 and PQ data for multiple sensors (i.e. ls5, ls7, ls8) and returns a single 
    xarray dataset containing only observations that contain greater than a given proportion of good quality pixels.
    
    This function can be used to extract visually appealing time series of observations that are not affected by cloud,
    for example as an input to the `animated_timeseries` function from `DEAPlotting`.
    
    The proportion of clear pixels is calculated by summing the pixels that are marked as being good quality
    in the Landsat PQ25 layer. By default cloud, cloud shadow, saturated pixels and pixels missing data for any band 
    are considered poor quality data, but this can be customised using the `mask_dict` parameter.
    
    Last modified: March 2019
    Author: Robbi Bishop-Taylor, Bex Dunn    
    
    Parameters
    ----------    
    dc : datacube Datacube object
        A specific Datacube to import from, i.e. `dc = datacube.Datacube(app='Clear Landsat')`. This allows you to 
        also use development datacubes if they have been imported into the environment.    
    query : dict
        A dict containing the query bounds. Can include lat/lon, time etc. If no `time` query is given, the 
        function defaults to all timesteps available to all sensors (e.g. 1987-2018)
    sensors : list, optional
        An optional list of Landsat sensor names to load data for. Options are 'ls5', 'ls7', 'ls8'; defaults to all.
    product : str, optional
        An optional string specifying 'nbar', 'nbart' or 'fc'. Defaults to 'nbart'. For information on the difference, 
        see the '02_DEA_datasets/Introduction_to_Landsat' or '02_DEA_datasets/Introduction_to_Fractional_Cover'
        notebooks from DEA-notebooks.
    dask_chunks : dict, optional
        An optional dictionary containing the coords and sizes you wish to create dask chunks over. Usually
        used in combination with lazy_load=True (see below). example: dask_chunks = {'x': 500, 'y': 500}
    lazy_load : boolean, optional
        Setting this variable to 'True' will delay the computation of the function until you explicitly
        run ds.compute(). If used in conjuction with dask.distributed.Client() will allow 
        for automatic parallel computation. 
    bands_of_interest : list, optional
        An optional list of strings containing the bands to be read in; options include 'red', 'green', 'blue', 
        'nir', 'swir1', 'swir2'; defaults to all available bands if no bands are specified.
    masked_prop : float, optional
        An optional float giving the minimum percentage of good quality pixels required for a Landsat observation to 
        be loaded. Defaults to 0.0 which will return all observations regardless of pixel quality (set to e.g. 0.99 
        to return only observations with more than 99% good quality pixels).
    mask_dict : dict, optional
        An optional dict of arguments to the `masking.make_mask` function that can be used to identify poor
        quality pixels from the PQ layer using alternative masking criteria. The default value of None masks
        out pixels flagged as cloud or cloud shadow by either the ACCA or Fmask algorithms, any saturated pixels, 
        or any pixels that are missing data in any band (equivalent to: `mask_dict={'cloud_acca': 'no_cloud', 
        'cloud_shadow_acca': 'no_cloud_shadow', 'cloud_shadow_fmask': 'no_cloud_shadow', 'cloud_fmask': 'no_cloud', 
        'blue_saturated': False, 'green_saturated': False, 'red_saturated': False, 'nir_saturated': False, 
        'swir1_saturated': False, 'swir2_saturated': False, 'contiguous': True}`. See the 
        `02_DEA_datasets/Introduction_to_LandsatPQ.ipynb` notebook on DEA Notebooks for a list of all possible options.
    mask_pixel_quality : bool, optional
        An optional boolean indicating whether to apply the pixel quality mask to all observations that were not
        filtered out for having less good quality pixels that `masked_prop`. For example, if `masked_prop=0.99`, the
        filtered images may still contain up to 1% poor quality pixels. The default of False simply returns the
        resulting observations without masking out these pixels; True masks them out and sets them to NaN using the
        pixel quality mask, but has the side effect of changing the data type of the output arrays from int16 to
        float32 which can cause memory issues. To reduce memory usage, set to False.
    mask_invalid_data : bool, optional
        An optional boolean indicating whether invalid -999 nodata values should be replaced with NaN. Defaults to
        True; this has the side effect of changing the data type of the output arrays from int16 to float32 which
        can cause memory issues. To reduce memory usage, set to False.
    ls7_slc_off : bool, optional
        An optional boolean indicating whether to include data from after the Landsat 7 SLC failure (i.e. SLC-off).
        Defaults to False, which removes all Landsat 7 observations after May 31 2003. 
    satellite_metadata : bool, optional
        An optional boolean indicating whether to return the dataset with a `satellite` variable that gives the name 
        of the satellite that made each observation in the timeseries (i.e. ls5, ls7, ls8). Defaults to False. 
    
    Returns
    -------
    combined_ds : xarray Dataset
        An xarray dataset containing only Landsat observations that contain greater than `masked_prop`
        proportion of clear pixels.   
        
    Notes
    -----
    Memory issues: For large data extractions, it is recommended that you set both `mask_pixel_quality=False` and 
    `mask_invalid_data=False`. Otherwise, all output variables will be coerced to float32 when NaN values are 
    inserted into the array, potentially causing your data to use 2x as much memory. Be aware that the resulting
    arrays will contain invalid -999 values which should be considered in analyses.
        
    Example
    -------    
    >>> # Import modules
    >>> import datacube
    >>> import sys
    >>> # Import dea-notebooks functions using relative link to 10_Scripts directory
    >>> sys.path.append('../10_Scripts')
    >>> import DEADataHandling
    >>> # Connect to a datacube containing Landsat data
    >>> dc = datacube.Datacube(app='load_clearlandsat')
    >>> # Set up spatial and temporal query
    >>> query = {'x': (954163, 972163),
    ...          'y': (-3573891, -3555891),
    ...          'time': ('2011-06-01', '2013-06-01'),
    ...          'crs': 'EPSG:3577'}   
    >>> # Load observations with more than 75% good quality pixels from ls5, ls7 and ls8 as a combined dataset
    >>> landsat_ds = DEADataHandling.load_clearlandsat(dc=dc, query=query, sensors=['ls5', 'ls7', 'ls8'], 
    ...                                    bands_of_interest=['red', 'green', 'blue'], 
    ...                                    masked_prop=0.75, mask_pixel_quality=True, ls7_slc_off=True)
    Loading ls5
        Loading 4 filtered ls5 timesteps
    Loading ls7
        Loading 29 filtered ls7 timesteps
    Loading ls8
        Loading 3 filtered ls8 timesteps
    Combining and sorting ls5, ls7, ls8 data
        Replacing invalid -999 values with NaN (data will be coerced to float32)
    >>> # Test that function returned data
    >>> len(landsat_ds.time) > 0
    True
                
    """

    #######################
    # Process each sensor #
    #######################

    #warn if loading a pq bitstring product and attempting to mask it (and therefore cast to float)
    if product == 'pq' and (mask_invalid_data or mask_pixel_quality):
        warnings.warn(
            """You are attempting to load pixel quality product with a mask flag
                        (mask_invalid_data or mask_pixel_quality). Pixel quality is a bitstring 
                        (only makes sense as int) and masking
                        casts to float32.""")

    # Dictionary to save results from each sensor
    filtered_sensors = {}

    # Iterate through all sensors, returning only observations with > mask_prop clear pixels
    for sensor in sensors:

        # Load PQ data using dask
        print(f'Loading {sensor}')

        # If bands of interest are given, assign measurements in dc.load call. This is
        # for compatibility with the existing dea-notebooks load_nbarx function.
        if bands_of_interest:

            # Lazily load Landsat data using dask
            data = dc.load(product=f'{sensor}_{product}_albers',
                           measurements=bands_of_interest,
                           group_by='solar_day',
                           dask_chunks=dask_chunks,
                           **query)

        # If no bands of interest given, run without specifying measurements, and
        # therefore return all available bands
        else:

            # Lazily load Landsat data using dask
            data = dc.load(product=f'{sensor}_{product}_albers',
                           group_by='solar_day',
                           dask_chunks=dask_chunks,
                           **query)

        # Load PQ data
        pq = dc.load(product=f'{sensor}_pq_albers',
                     group_by='solar_day',
                     fuse_func=ga_pq_fuser,
                     dask_chunks=dask_chunks,
                     **query)

        # If resulting dataset has data, continue:
        if data.variables:

            # Remove Landsat 7 SLC-off from PQ layer if ls7_slc_off=False
            if not ls7_slc_off and sensor == 'ls7':

                print('    Ignoring SLC-off observations for ls7')
                data = data.sel(time=data.time < np.datetime64('2003-05-30'))

            # If more than 0 timesteps
            if len(data.time) > 0:

                # Return only Landsat observations that have matching PQ data
                time = (data.time - pq.time).time
                data = data.sel(time=time)
                pq = pq.sel(time=time)

                # If a custom dict is provided for mask_dict, use these values to make mask from PQ
                if mask_dict:

                    # Mask PQ using custom values by unpacking mask_dict **kwarg
                    good_quality = masking.make_mask(pq.pixelquality,
                                                     **mask_dict)

                else:

                    # Identify pixels with no clouds in either ACCA for Fmask
                    good_quality = masking.make_mask(
                        pq.pixelquality,
                        cloud_acca='no_cloud',
                        cloud_shadow_acca='no_cloud_shadow',
                        cloud_shadow_fmask='no_cloud_shadow',
                        cloud_fmask='no_cloud',
                        blue_saturated=False,
                        green_saturated=False,
                        red_saturated=False,
                        nir_saturated=False,
                        swir1_saturated=False,
                        swir2_saturated=False,
                        contiguous=True)

                # Compute good data for each observation as a percentage of total array pixels. Need to
                # sum over x and y axes individually so that the function works with lat-lon dimensions,
                # and because it isn't currently possible to pass a list of axes (bug with xarray?)
                data_perc = good_quality.sum(axis=1).sum(
                    axis=1) / (good_quality.shape[1] * good_quality.shape[2])

                # Add data_perc data to Landsat dataset as a new xarray variable
                data['data_perc'] = xr.DataArray(data_perc,
                                                 [('time', data.time)])

                # Filter by data_perc to drop low quality observations and finally import data using dask
                filtered = data.sel(time=data.data_perc >= masked_prop)
                print(
                    f'    Loading {len(filtered.time)} filtered {sensor} timesteps'
                )

                # Optionally apply pixel quality mask to all observations that were not dropped in previous step
                if mask_pixel_quality:

                    # First change dtype to float32, then mask out values using
                    # `.where()`. By casting to float32, we prevent `.where()`
                    # from automatically casting to float64, using 2x the memory
                    # We also need to manually reset attributes due to a possible
                    # bug in recent xarray version
                    filtered = filtered.astype(
                        np.float32).assign_attrs(crs=filtered.crs)
                    filtered = filtered.where(good_quality)

                # Optionally add satellite name variable
                if satellite_metadata:
                    filtered['satellite'] = xr.DataArray(
                        [sensor] * len(filtered.time),
                        [('time', filtered.time)])

                # Add result to dictionary
                if lazy_load == True:
                    filtered_sensors[sensor] = filtered
                else:
                    filtered_sensors[sensor] = filtered.compute()

                # Close datasets
                filtered = None
                good_quality = None
                data = None
                pq = None

            else:

                # If there is no data for sensor or if another error occurs:
                print(f'    Skipping {sensor}; no valid data for query')

        else:

            # If there is no data for sensor or if another error occurs:
            print(f'    Skipping {sensor}; no valid data for query')

    ############################
    # Combine multiple sensors #
    ############################

    # Proceed with concatenating only if there is more than 1 sensor processed
    if len(filtered_sensors) > 1:

        # Concatenate all sensors into one big xarray dataset, and then sort by time
        sensor_string = ", ".join(filtered_sensors.keys())
        print(f'Combining and sorting {sensor_string} data')
        combined_ds = xr.concat(filtered_sensors.values(), dim='time')
        combined_ds = combined_ds.sortby('time')

        # Optionally filter to replace no data values with nans
        if mask_invalid_data:

            print(
                '    Replacing invalid -999 values with NaN (data will be coerced to float32)'
            )

            # First change dtype to float32, then mask out values using
            # `.where()`. By casting to float32, we prevent `.where()`
            # from automatically casting to float64, using 2x the memory
            # We also need to manually reset attributes due to a possible
            # bug in recent xarray version
            combined_ds = (combined_ds.astype(
                np.float32).assign_attrs(crs=combined_ds.crs))
            combined_ds = masking.mask_invalid_data(combined_ds)

        # reset pixel quality attributes
        if product == 'pq':
            combined_ds.pixelquality.attrs.update(
                list(filtered_sensors.values())[0].pixelquality.attrs)

        # Return combined dataset
        return combined_ds

    # Return the single dataset if only one sensor was processed
    elif len(filtered_sensors) == 1:

        sensor_string = ", ".join(filtered_sensors.keys())
        print(f'Returning {sensor_string} data')
        sensor_ds = list(filtered_sensors.values())[0]

        # Optionally filter to replace no data values with nans
        if mask_invalid_data:

            print(
                '    Replacing invalid -999 values with NaN (data will be coerced to float32)'
            )

            # First change dtype to float32, then mask out values using
            # `.where()`. By casting to float32, we prevent `.where()`
            # from automatically casting to float64, using 2x the memory
            # We also need to manually reset attributes due to a possible
            # bug in recent xarray version
            sensor_ds = (sensor_ds.astype(
                np.float32).assign_attrs(crs=sensor_ds.crs))
            sensor_ds = masking.mask_invalid_data(sensor_ds)

        return sensor_ds

    else:

        print(
            f'No data returned for query for any sensor in {", ".join(sensors)} '
            f'and time range {"-".join(query["time"])}')