def create_median_mosaic(dataset_in, clean_mask=None, no_data=-9999, dtype=None, **kwargs): """ Method for calculating the median pixel value for a given dataset. Parameters ---------- dataset_in: xarray.Dataset A dataset retrieved from the Data Cube; should contain: coordinates: time, latitude, longitude variables: variables to be mosaicked (e.g. red, green, and blue bands) clean_mask: np.ndarray An ndarray of the same shape as `dataset_in` - specifying which values to mask out. If no clean mask is specified, then all values are kept during compositing. no_data: int or float The no data value. dtype: str or numpy.dtype A string denoting a Python datatype name (e.g. int, float) or a NumPy dtype (e.g. np.int16, np.float32) to convert the data to. Returns ------- dataset_out: xarray.Dataset Compositited data with the format: coordinates: latitude, longitude variables: same as dataset_in """ # Default to masking nothing. if clean_mask is None: clean_mask = create_default_clean_mask(dataset_in) dataset_in_dtypes = None if dtype is None: # Save dtypes because masking with Dataset.where() converts to float64. band_list = list(dataset_in.data_vars) dataset_in_dtypes = {} for band in band_list: dataset_in_dtypes[band] = dataset_in[band].dtype # Mask out clouds and Landsat 7 scan lines. dataset_in = dataset_in.where((dataset_in != no_data) & (clean_mask)) dataset_out = dataset_in.median(dim='time', skipna=True, keep_attrs=False) # Handle datatype conversions. dataset_out = restore_or_convert_dtypes(dtype, band_list, dataset_in_dtypes, dataset_out, no_data) return dataset_out
def nazeer_chlorophyll(dataset_in, clean_mask=None, no_data=0): # Default to masking nothing. if clean_mask is None: clean_mask = create_default_clean_mask(dataset_in) chl_a = (0.57 * (dataset_in.red.astype('float64') * 0.0001) / (dataset_in.blue.astype('float64') * 0.0001)**2) - 2.61 chl_a.values[np.invert(clean_mask)] = no_data # Contains data for clear pixels # Create xarray of data time = dataset_in.time latitude = dataset_in.latitude longitude = dataset_in.longitude dataset_out = xr.Dataset( { 'nazeer_chlorophyll': chl_a }, coords={'time': time, 'latitude': latitude, 'longitude': longitude}) return dataset_out
def watanabe_chlorophyll(dataset_in, clean_mask=None, no_data=0): assert 'red' in dataset_in and 'nir' in dataset_in, "Red and NIR bands are required for the Watanabe Chlorophyll analysis." # Default to masking nothing. if clean_mask is None: clean_mask = create_default_clean_mask(dataset_in) chl_a = 925.001 * (dataset_in.nir.astype('float64') / dataset_in.red.astype('float64')) - 77.16 chl_a.values[np.invert(clean_mask)] = no_data # Contains data for clear pixels # Create xarray of data time = dataset_in.time latitude = dataset_in.latitude longitude = dataset_in.longitude dataset_out = xr.Dataset( { 'watanabe_chlorophyll': chl_a }, coords={'time': time, 'latitude': latitude, 'longitude': longitude}) return dataset_out
def tsm(dataset_in, clean_mask=None, no_data=0): """ Inputs: dataset_in (xarray.Dataset) - dataset retrieved from the Data Cube. Optional Inputs: clean_mask (numpy.ndarray with dtype boolean) - true for values user considers clean; if user does not provide a clean mask, all values will be considered clean no_data (int/float) - no data pixel value; default: -9999 Throws: ValueError - if dataset_in is an empty xarray.Dataset. """ assert 'red' in dataset_in and 'green' in dataset_in, "Red and Green bands are required for the TSM analysis." # Default to masking nothing. if clean_mask is None: clean_mask = create_default_clean_mask(dataset_in) tsm = 3983 * _tsmi(dataset_in)**1.6246 tsm.values[np.invert(clean_mask)] = no_data # Contains data for clear pixels # Create xarray of data _coords = { key:dataset_in[key] for key in dataset_in.dims.keys()} dataset_out = xr.Dataset({'tsm': tsm}, coords=_coords) return dataset_out
def create_hdmedians_multiple_band_mosaic(dataset_in, clean_mask=None, no_data=-9999, dtype=None, intermediate_product=None, operation="median", **kwargs): """ Calculates the geomedian or geomedoid using a multi-band processing method. Parameters ---------- dataset_in: xarray.Dataset A dataset retrieved from the Data Cube; should contain: coordinates: time, latitude, longitude (in that order) variables: variables to be mosaicked (e.g. red, green, and blue bands) clean_mask: np.ndarray An ndarray of the same shape as `dataset_in` - specifying which values to mask out. If no clean mask is specified, then all values are kept during compositing. no_data: int or float The no data value. dtype: str or numpy.dtype A string denoting a Python datatype name (e.g. int, float) or a NumPy dtype (e.g. np.int16, np.float32) to convert the data to. operation: str in ['median', 'medoid'] Returns ------- dataset_out: xarray.Dataset Compositited data with the format: coordinates: latitude, longitude variables: same as dataset_in """ # Default to masking nothing. if clean_mask is None: clean_mask = create_default_clean_mask(dataset_in) assert operation in ['median', 'medoid' ], "Only median and medoid operations are supported." band_list = list(dataset_in.data_vars) dataset_in_dtypes = None if dtype is None: # Save dtypes because masking with Dataset.where() converts to float64. dataset_in_dtypes = {} for band in band_list: dataset_in_dtypes[band] = dataset_in[band].dtype # Mask out clouds and scan lines. dataset_in = dataset_in.where((dataset_in != no_data) & clean_mask) arrays = [dataset_in[band] for band in band_list] stacked_data = np.stack(arrays) bands_shape, time_slices_shape, lat_shape, lon_shape = stacked_data.shape[0], \ stacked_data.shape[1], stacked_data.shape[2], \ stacked_data.shape[3] # Reshape to remove lat/lon reshaped_stack = stacked_data.reshape(bands_shape, time_slices_shape, lat_shape * lon_shape) # Build zeroes array across time slices. hdmedians_result = np.zeros((bands_shape, lat_shape * lon_shape)) # For each pixel (lat/lon combination), find the geomedian or geomedoid across time. for x in range(reshaped_stack.shape[2]): try: hdmedians_result[:, x] = hd.nangeomedian( reshaped_stack[:, :, x], axis=1) if operation == "median" else hd.nanmedoid( reshaped_stack[:, :, x], axis=1) except ValueError as e: # If all bands have nan values across time, the geomedians are nans. hdmedians_result[:, x] = np.full((bands_shape), np.nan) output_dict = { value: (('y', 'x'), hdmedians_result[index, :].reshape(lat_shape, lon_shape)) for index, value in enumerate(band_list) } dataset_out = xr.Dataset(output_dict, coords={ 'y': dataset_in['y'], 'x': dataset_in['x'] }, attrs=dataset_in.attrs) dataset_out = restore_or_convert_dtypes(dtype, band_list, dataset_in_dtypes, dataset_out, no_data) return dataset_out
def create_mosaic(dataset_in, clean_mask=None, no_data=-9999, dtype=None, intermediate_product=None, **kwargs): """ Creates a most-recent-to-oldest mosaic of the input dataset. Parameters ---------- dataset_in: xarray.Dataset A dataset retrieved from the Data Cube; should contain: coordinates: time, latitude, longitude variables: variables to be mosaicked (e.g. red, green, and blue bands) clean_mask: np.ndarray An ndarray of the same shape as `dataset_in` - specifying which values to mask out. If no clean mask is specified, then all values are kept during compositing. no_data: int or float The no data value. dtype: str or numpy.dtype A string denoting a Python datatype name (e.g. int, float) or a NumPy dtype (e.g. np.int16, np.float32) to convert the data to. Returns ------- dataset_out: xarray.Dataset Compositited data with the format: coordinates: latitude, longitude variables: same as dataset_in """ dataset_in = dataset_in.copy(deep=True) # Default to masking nothing. if clean_mask is None: clean_mask = create_default_clean_mask(dataset_in) # Mask data with clean_mask. All values where clean_mask==False are set to no_data. for key in list(dataset_in.data_vars): dataset_in[key].values[np.invert(clean_mask)] = no_data dataset_in_dtypes = None if dtype is None: # Save dtypes because masking with Dataset.where() converts to float64. band_list = list(dataset_in.data_vars) dataset_in_dtypes = {} for band in band_list: dataset_in_dtypes[band] = dataset_in[band].dtype if intermediate_product is not None: dataset_out = intermediate_product.copy(deep=True) else: dataset_out = None time_slices = reversed(range(len( dataset_in.time))) if 'reverse_time' in kwargs else range( len(dataset_in.time)) for index in time_slices: dataset_slice = dataset_in.isel(time=index).drop('time') if dataset_out is None: dataset_out = dataset_slice.copy(deep=True) utilities.clear_attrs(dataset_out) else: for key in list(dataset_in.data_vars): dataset_out[key].values[dataset_out[key].values == -9999] = dataset_slice[key].values[ dataset_out[key].values == -9999] dataset_out[key].attrs = OrderedDict() # Handle datatype conversions. dataset_out = restore_or_convert_dtypes(dtype, band_list, dataset_in_dtypes, dataset_out, no_data) return dataset_out
def create_min_ndvi_mosaic(dataset_in, clean_mask=None, no_data=-9999, dtype=None, intermediate_product=None, **kwargs): """ Method for calculating the pixel value for the min ndvi value. Parameters ---------- dataset_in: xarray.Dataset A dataset retrieved from the Data Cube; should contain: coordinates: time, latitude, longitude variables: variables to be mosaicked (e.g. red, green, and blue bands) clean_mask: np.ndarray An ndarray of the same shape as `dataset_in` - specifying which values to mask out. If no clean mask is specified, then all values are kept during compositing. no_data: int or float The no data value. dtype: str or numpy.dtype A string denoting a Python datatype name (e.g. int, float) or a NumPy dtype (e.g. np.int16, np.float32) to convert the data to. Returns ------- dataset_out: xarray.Dataset Compositited data with the format: coordinates: latitude, longitude variables: same as dataset_in """ dataset_in = dataset_in.copy(deep=True) # Default to masking nothing. if clean_mask is None: clean_mask = create_default_clean_mask(dataset_in) dataset_in_dtypes = None if dtype is None: # Save dtypes because masking with Dataset.where() converts to float64. band_list = list(dataset_in.data_vars) dataset_in_dtypes = {} for band in band_list: dataset_in_dtypes[band] = dataset_in[band].dtype # Mask out clouds and scan lines. dataset_in = dataset_in.where((dataset_in != -9999) & clean_mask) if intermediate_product is not None: dataset_out = intermediate_product.copy(deep=True) else: dataset_out = None time_slices = range(len(dataset_in.time)) for timeslice in time_slices: dataset_slice = dataset_in.isel(time=timeslice).drop('time') ndvi = (dataset_slice.nir - dataset_slice.red) / (dataset_slice.nir + dataset_slice.red) ndvi.values[np.invert(clean_mask)[timeslice, ::]] = 1000000000 dataset_slice['ndvi'] = ndvi if dataset_out is None: dataset_out = dataset_slice.copy(deep=True) utilities.clear_attrs(dataset_out) else: for key in list(dataset_slice.data_vars): dataset_out[key].values[ dataset_slice.ndvi.values < dataset_out.ndvi.values] = dataset_slice[key].values[ dataset_slice.ndvi.values < dataset_out.ndvi.values] # Handle datatype conversions. dataset_out = restore_or_convert_dtypes(dtype, None, dataset_in_dtypes, dataset_out, no_data) return dataset_out
def wofs_classify(dataset_in, clean_mask=None, x_coord='longitude', y_coord='latitude', time_coord='time', no_data=-9999, mosaic=False, enforce_float64=False): """ Description: Performs WOfS algorithm on given dataset. Assumption: - The WOfS algorithm is defined for Landsat 5/Landsat 7 References: - Mueller, et al. (2015) "Water observations from space: Mapping surface water from 25 years of Landsat imagery across Australia." Remote Sensing of Environment. - https://github.com/GeoscienceAustralia/eo-tools/blob/stable/eotools/water_classifier.py ----- Inputs: dataset_in (xarray.Dataset) - dataset retrieved from the Data Cube; should contain coordinates: time, latitude, longitude variables: blue, green, red, nir, swir1, swir2 x_coord, y_coord, time_coord: (str) - Names of DataArrays in `dataset_in` to use as x, y, and time coordinates. Optional Inputs: clean_mask (nd numpy array with dtype boolean) - true for values user considers clean; if user does not provide a clean mask, all values will be considered clean no_data (int/float) - no data pixel value; default: -9999 mosaic (boolean) - flag to indicate if dataset_in is a mosaic. If mosaic = False, dataset_in should have a time coordinate and wofs will run over each time slice; otherwise, dataset_in should not have a time coordinate and wofs will run over the single mosaicked image enforce_float64 (boolean) - flag to indicate whether or not to enforce float64 calculations; will use float32 if false Output: dataset_out (xarray.DataArray) - wofs water classification results: 0 - not water; 1 - water Throws: ValueError - if dataset_in is an empty xarray.Dataset. """ def _band_ratio(a, b): """ Calculates a normalized ratio index """ return (a - b) / (a + b) def _run_regression(band1, band2, band3, band4, band5, band7): """ Regression analysis based on Australia's training data TODO: Return type """ # Compute normalized ratio indices ndi_52 = _band_ratio(band5, band2) ndi_43 = _band_ratio(band4, band3) ndi_72 = _band_ratio(band7, band2) #classified = np.ones(shape, dtype='uint8') classified = np.full(shape, no_data, dtype='uint8') # Start with the tree's left branch, finishing nodes as needed # Left branch r1 = ndi_52 <= -0.01 r2 = band1 <= 2083.5 classified[r1 & ~r2] = 0 #Node 3 r3 = band7 <= 323.5 _tmp = r1 & r2 _tmp2 = _tmp & r3 _tmp &= ~r3 r4 = ndi_43 <= 0.61 classified[_tmp2 & r4] = 1 #Node 6 classified[_tmp2 & ~r4] = 0 #Node 7 r5 = band1 <= 1400.5 _tmp2 = _tmp & ~r5 r6 = ndi_43 <= -0.01 classified[_tmp2 & r6] = 1 #Node 10 classified[_tmp2 & ~r6] = 0 #Node 11 _tmp &= r5 r7 = ndi_72 <= -0.23 _tmp2 = _tmp & ~r7 r8 = band1 <= 379 classified[_tmp2 & r8] = 1 #Node 14 classified[_tmp2 & ~r8] = 0 #Node 15 _tmp &= r7 r9 = ndi_43 <= 0.22 classified[_tmp & r9] = 1 #Node 17 _tmp &= ~r9 r10 = band1 <= 473 classified[_tmp & r10] = 1 #Node 19 classified[_tmp & ~r10] = 0 #Node 20 # Left branch complete; cleanup del r2, r3, r4, r5, r6, r7, r8, r9, r10 gc.collect() # Right branch of regression tree r1 = ~r1 r11 = ndi_52 <= 0.23 _tmp = r1 & r11 r12 = band1 <= 334.5 _tmp2 = _tmp & ~r12 classified[_tmp2] = 0 #Node 23 _tmp &= r12 r13 = ndi_43 <= 0.54 _tmp2 = _tmp & ~r13 classified[_tmp2] = 0 #Node 25 _tmp &= r13 r14 = ndi_52 <= 0.12 _tmp2 = _tmp & r14 classified[_tmp2] = 1 #Node 27 _tmp &= ~r14 r15 = band3 <= 364.5 _tmp2 = _tmp & r15 r16 = band1 <= 129.5 classified[_tmp2 & r16] = 1 #Node 31 classified[_tmp2 & ~r16] = 0 #Node 32 _tmp &= ~r15 r17 = band1 <= 300.5 _tmp2 = _tmp & ~r17 _tmp &= r17 classified[_tmp] = 1 #Node 33 classified[_tmp2] = 0 #Node 34 _tmp = r1 & ~r11 r18 = ndi_52 <= 0.34 classified[_tmp & ~r18] = 0 #Node 36 _tmp &= r18 r19 = band1 <= 249.5 classified[_tmp & ~r19] = 0 #Node 38 _tmp &= r19 r20 = ndi_43 <= 0.45 classified[_tmp & ~r20] = 0 #Node 40 _tmp &= r20 r21 = band3 <= 364.5 classified[_tmp & ~r21] = 0 #Node 42 _tmp &= r21 r22 = band1 <= 129.5 classified[_tmp & r22] = 1 #Node 44 classified[_tmp & ~r22] = 0 #Node 45 # Completed regression tree return classified # Default to masking nothing. if clean_mask is None: clean_mask = create_default_clean_mask(dataset_in) # Extract dataset bands needed for calculations blue = dataset_in.blue green = dataset_in.green red = dataset_in.red nir = dataset_in.nir swir1 = dataset_in.swir1 swir2 = dataset_in.swir2 # Enforce float calculations - float64 if user specified, otherwise float32 will do dtype = blue.values.dtype # This assumes all dataset bands will have # the same dtype (should be a reasonable # assumption) # Save dtypes because the `astype()` calls below modify `dataset_in`. band_list = ['red', 'green', 'blue', 'nir', 'swir1', 'swir2'] dataset_in_dtypes = {} for band in band_list: dataset_in_dtypes[band] = dataset_in[band].dtype if enforce_float64: if dtype != 'float64': blue.values = blue.values.astype('float64') green.values = green.values.astype('float64') red.values = red.values.astype('float64') nir.values = nir.values.astype('float64') swir1.values = swir1.values.astype('float64') swir2.values = swir2.values.astype('float64') else: if dtype == 'float64': pass elif dtype != 'float32': blue.values = blue.values.astype('float32') green.values = green.values.astype('float32') red.values = red.values.astype('float32') nir.values = nir.values.astype('float32') swir1.values = swir1.values.astype('float32') swir2.values = swir2.values.astype('float32') shape = blue.values.shape classified = _run_regression(blue.values, green.values, red.values, nir.values, swir1.values, swir2.values) classified_clean = np.full(classified.shape, no_data, dtype='float64') classified_clean[clean_mask] = classified[ clean_mask] # Contains data for clear pixels # Create xarray of data x_coords = dataset_in[x_coord] y_coords = dataset_in[y_coord] time = None coords = None dims = None if mosaic: coords = [y_coords, x_coords] dims = [y_coord, x_coord] else: time_coords = dataset_in[time_coord] coords = [time_coords, y_coords, x_coords] dims = [time_coord, y_coord, x_coord] data_array = xr.DataArray(classified_clean, coords=coords, dims=dims) if mosaic: dataset_out = xr.Dataset({'wofs': data_array}, coords={ y_coord: y_coords, x_coord: x_coords }) else: dataset_out = xr.Dataset({'wofs': data_array}, coords={ time_coord: time_coords, y_coord: y_coords, x_coord: x_coords }) # Handle datatype conversions. restore_or_convert_dtypes(None, band_list, dataset_in_dtypes, dataset_in, no_data) return dataset_out
def frac_coverage_classify(dataset_in, clean_mask=None, no_data=-9999): """ Description: Performs fractional coverage algorithm on given dataset. If no clean mask is given, the 'cf_mask' variable must be included in the input dataset, as it will be used to create a clean mask Assumption: - The implemented algqorithm is defined for Landsat 5/Landsat 7; in order for it to be used for Landsat 8, the bands will need to be adjusted References: - Guerschman, Juan P., et al. "Assessing the effects of site heterogeneity and soil properties when unmixing photosynthetic vegetation, non-photosynthetic vegetation and bare soil fractions from Landsat and MODIS data." Remote Sensing of Environment 161 (2015): 12-26. ----- Inputs: dataset_in (xarray.Dataset) - dataset retrieved from the Data Cube (can be a derived product, such as a cloudfree mosaic; should contain coordinates: latitude, longitude variables: blue, green, red, nir, swir1, swir2 If user does not provide a clean_mask, dataset_in must also include the cf_mask variable Optional Inputs: clean_mask (nd numpy array with dtype boolean) - true for values user considers clean; If none is provided, one will be created which considers all values to be clean. Output: dataset_out (xarray.Dataset) - fractional coverage results with no data = -9999; containing coordinates: latitude, longitude variables: bs, pv, npv where bs -> bare soil, pv -> photosynthetic vegetation, npv -> non-photosynthetic vegetation """ # Default to masking nothing. if clean_mask is None: clean_mask = create_default_clean_mask(dataset_in) band_stack = [] mosaic_clean_mask = clean_mask.flatten() for band in [ dataset_in.blue.values, dataset_in.green.values, dataset_in.red.values, dataset_in.nir.values, dataset_in.swir1.values, dataset_in.swir2.values ]: band = band.astype(np.float32) band = band * 0.0001 band = band.flatten() band_clean = np.full(band.shape, np.nan) band_clean[mosaic_clean_mask] = band[mosaic_clean_mask] band_stack.append(band_clean) band_stack = np.array(band_stack).transpose() for b in range(6): band_stack = np.hstack( (band_stack, np.expand_dims(np.log(band_stack[:, b]), axis=1))) for b in range(6): band_stack = np.hstack( (band_stack, np.expand_dims(np.multiply(band_stack[:, b], band_stack[:, b + 6]), axis=1))) for b in range(6): for b2 in range(b + 1, 6): band_stack = np.hstack( (band_stack, np.expand_dims(np.multiply(band_stack[:, b], band_stack[:, b2]), axis=1))) for b in range(6): for b2 in range(b + 1, 6): band_stack = np.hstack( (band_stack, np.expand_dims(np.multiply(band_stack[:, b + 6], band_stack[:, b2 + 6]), axis=1))) for b in range(6): for b2 in range(b + 1, 6): band_stack = np.hstack((band_stack, np.expand_dims(np.divide( band_stack[:, b2] - band_stack[:, b], band_stack[:, b2] + band_stack[:, b]), axis=1))) band_stack = np.nan_to_num( band_stack) # Now a n x 63 matrix (assuming one acquisition) ones = np.ones(band_stack.shape[0]) ones = ones.reshape(ones.shape[0], 1) band_stack = np.concatenate( (band_stack, ones), axis=1) # Now a n x 64 matrix (assuming one acquisition) end_members = np.loadtxt(csv_file_path, delimiter=',') # Creates a 64 x 3 matrix SumToOneWeight = 0.02 ones = np.ones(end_members.shape[1]) * SumToOneWeight ones = ones.reshape(1, end_members.shape[1]) end_members = np.concatenate((end_members, ones), axis=0).astype(np.float32) result = np.zeros((band_stack.shape[0], end_members.shape[1]), dtype=np.float32) # Creates an n x 3 matrix for i in range(band_stack.shape[0]): if mosaic_clean_mask[i]: result[i, :] = ( opt.nnls(end_members, band_stack[i, :])[0].clip(0, 2.54) * 100).astype(np.int16) else: result[i, :] = np.ones((end_members.shape[1]), dtype=np.int16) * ( -9999) # Set as no data latitude = dataset_in.latitude longitude = dataset_in.longitude result = result.reshape(latitude.size, longitude.size, 3) pv_band = result[:, :, 0] npv_band = result[:, :, 1] bs_band = result[:, :, 2] pv_clean = np.full(pv_band.shape, -9999) npv_clean = np.full(npv_band.shape, -9999) bs_clean = np.full(bs_band.shape, -9999) pv_clean[clean_mask] = pv_band[clean_mask] npv_clean[clean_mask] = npv_band[clean_mask] bs_clean[clean_mask] = bs_band[clean_mask] rapp_bands = collections.OrderedDict([('bs', (['latitude', 'longitude'], bs_band)), ('pv', (['latitude', 'longitude'], pv_band)), ('npv', (['latitude', 'longitude'], npv_band))]) rapp_dataset = xr.Dataset(rapp_bands, coords={ 'latitude': latitude, 'longitude': longitude }) return rapp_dataset