def restore_or_convert_dtypes(dtype_for_all=None, dataset_in_dtypes=None, dataset_out=None, no_data=-9999): """ Converts datatypes of data variables in a copy of an xarray Dataset. Parameters ---------- dtype_for_all: str or numpy.dtype A string denoting a Python datatype name (e.g. int, float) or a NumPy dtype (e.g. numpy.int16, numpy.float32) to convert the data to. dataset_in_dtypes: dict A dictionary mapping data variable names to datatypes. One of `dtype_for_all` or `dataset_in_dtypes` must be `None`. no_data: int, float, or None The no data value. Set to None (default) if there is no such value. Returns ------- dataset_out: xarray.Dataset The output Dataset. """ assert dtype_for_all is None or dataset_in_dtypes is None, \ "One of `dtype_for_all` or `dataset_in_dtypes` must be `None`." if dtype_for_all is not None: # Integer types can't represent nan. if np.issubdtype(dtype_for_all, np.integer): # This also works for Python int type. dataset_out = dataset_out.where(~xr_nan(dataset_out), no_data) # Convert no_data value to nan for float types. if np.issubdtype(dtype_for_all, np.float): dataset_out = dataset_out.where(dataset_out != no_data, np.nan) dataset_out = dataset_out.astype(dtype_for_all) else: # Restore dtypes to state before masking. for data_var in dataset_in_dtypes: data_var_dtype = dataset_in_dtypes[data_var] if np.issubdtype(data_var_dtype, np.integer): dataset_out[data_var] = \ dataset_out[data_var].where(~xr_nan(dataset_out[data_var]), no_data) if np.issubdtype(dtype_for_all, np.float): dataset_out[data_var] = \ dataset_out[data_var].where(dataset_out[data_var]!=no_data, np.nan) dataset_out[data_var] = dataset_out[data_var].astype( data_var_dtype) return dataset_out
def clustering_pre_processing(dataset_in, bands): # Determine the pixel indices which have no NaN values and remove all other pixels. dims = list(dataset_in.dims) no_nan_mask = xr_not(xr_nan(dataset_in.to_array().transpose(*dims, 'variable')).any('variable')).values array_from = [] for band in bands: array_from.append(dataset_in[band].values[no_nan_mask].flatten()) features = np.array(array_from) features = np.swapaxes(features, 0, 1) np.set_printoptions(suppress=True) return features, no_nan_mask
def lone_object_filter(image, min_size=2, connectivity=1, kernel_size=3, unique_vals=None): """ Replaces isolated, contiguous regions of values in a raster with values representing the surrounding pixels. More specifically, this reduces noise in a raster by setting contiguous regions of values greater than a specified minimum size to the modal value in a specified neighborhood. The default argument values filter out lone, singular pixels. This filter is not idempotent, so it may need to be applied repeatedly until the output stops changing or the results are acceptable. Args: image (numpy.ndarray): The image to filter. Must not contain NaNs. min_size (int): Defines the minimum number of contiguous pixels that will not be set to the modal value of their neighborhood. Must be greater than 2. Setting this to 1 is pointless, since this function will then do nothing to the raster. connectivity (int): The maximum distance between any two pixels such that they are considered one group. For example, a connectivity of 1 considers only adjacent values to be within one group (contiguous areas), but a connectivity of 2 also considers diagonally connected values to be within one group. Must be greater than 0. kernel_size (int or float): The diameter of the circular kernel to use for the modal filter. If there are still pixels that should be set to the modal value of their neighborhood, increase this value to remove them. Note that the larger this value is, the more detail will tend to be lost in the image and the slower this function will run. unique_vals: numpy.ndarray The unique values in `image`. If this is not supplied, the unique values will be determined on each call. Returns: The filtered image. Authors: Andrew Lubawy ([email protected])\n John Rattz ([email protected]) """ import dask from .clean_mask import create_circular_mask from skimage.filters.rank import modal from skimage.morphology import remove_small_objects from .unique import dask_array_uniques assert kernel_size % 2 == 1, "The parameter `kernel_size` must be an odd number." image_min, image_max = image.min(), image.max() image_dtype = image.dtype image = (((image - image_min) / (image_max - image_min)) * 255).astype( np.uint8) if isinstance(image, np.ndarray): modal_filtered = modal(image, create_circular_mask(kernel_size, kernel_size)) elif isinstance(image, dask.array.core.Array): modal_filtered = image.map_blocks(modal, selem=create_circular_mask( kernel_size, kernel_size)) image_da = xr.DataArray(image) if unique_vals is None: unique_vals = [] if isinstance(image, np.ndarray): unique_vals = np.unique(image) elif isinstance(image, dask.array.core.Array): unique_vals = dask_array_uniques(image) else: # Scale to the range [0,1]. unique_vals = (((unique_vals - image_min) / (image_max - image_min)) * 255).astype(np.uint8) for i, val in enumerate(unique_vals): # Determine the pixels with this value that will not be filtered (True to keep). if isinstance(image, np.ndarray): layer = remove_small_objects(image == val, min_size=min_size, connectivity=connectivity) elif isinstance(image, dask.array.core.Array): layer = (image == val).map_blocks(remove_small_objects, min_size=min_size, connectivity=connectivity) # Select the values from the image that will remain (filter it). filtered = image_da.where(layer) if i == 0 else filtered.combine_first( image_da.where(layer)) # Fill in the removed values with their local modes. filtered_nan_mask = xr_nan(filtered).data filtered = filtered.where(~filtered_nan_mask, modal_filtered) filtered = ((filtered / 255) * (image_max - image_min) + image_min).astype(image_dtype) return filtered.data
def create_output_products(self, data, task_id=None): """Create the final output products for this algorithm. Open the final dataset and metadata and generate all remaining metadata. Convert and write the dataset to variuos formats and register all values in the task model Update status and exit. Args: data: tuple in the format of processing_task function - path, metadata, and {chunk ids} """ task = TsmTask.objects.get(pk=task_id) if check_cancel_task(self, task): return full_metadata = data[1] dataset = xr.open_dataset(data[0]).astype('float64') dataset['variability'] = dataset['max'] - dataset['normalized_data'] dataset['wofs'] = dataset.wofs / dataset.wofs_total_clean dataset = dataset.where(~xr_nan(dataset), 0) dataset_masked = mask_water_quality(dataset, dataset.wofs) task.result_path = os.path.join(task.get_result_path(), "tsm.png") task.clear_observations_path = os.path.join(task.get_result_path(), "clear_observations.png") task.water_percentage_path = os.path.join(task.get_result_path(), "water_percentage.png") task.data_path = os.path.join(task.get_result_path(), "data_tif.tif") task.data_netcdf_path = os.path.join(task.get_result_path(), "data_netcdf.nc") task.animation_path = os.path.join(task.get_result_path(), "animation.gif") if task.animated_product.animation_id != 'none' else "" task.final_metadata_from_dataset(dataset_masked) task.metadata_from_dict(full_metadata) bands = [task.query_type.data_variable, 'total_clean', 'wofs'] band_paths = [task.result_path, task.clear_observations_path, task.water_percentage_path] export_xarray_to_netcdf(dataset_masked, task.data_netcdf_path) write_geotiff_from_xr(task.data_path, dataset_masked, bands=bands, no_data=task.satellite.no_data_value) for band, band_path in zip(bands, band_paths): write_single_band_png_from_xr( band_path, dataset_masked, band, color_scale=task.color_scales[band], fill_color='black', interpolate=False, no_data=task.satellite.no_data_value) if task.animated_product.animation_id != "none": with imageio.get_writer(task.animation_path, mode='I', duration=1.0) as writer: valid_range = range(len(full_metadata)) for index in valid_range: path = os.path.join(task.get_temp_path(), "animation_final_{}.nc".format(index)) if os.path.exists(path): png_path = os.path.join(task.get_temp_path(), "animation_{}.png".format(index)) animated_data = mask_water_quality( xr.open_dataset(path).astype('float64'), dataset.wofs) if task.animated_product.animation_id != "scene" else xr.open_dataset( path) write_single_band_png_from_xr( png_path, animated_data, task.animated_product.data_variable, color_scale=task.color_scales[task.animated_product.data_variable], fill_color='black', interpolate=False, no_data=task.satellite.no_data_value) image = imageio.imread(png_path) writer.append_data(image) dates = list(map(lambda x: datetime.strptime(x, "%m/%d/%Y"), task._get_field_as_list('acquisition_list'))) if len(dates) > 1: task.plot_path = os.path.join(task.get_result_path(), "plot_path.png") create_2d_plot( task.plot_path, dates=dates, datasets=task._get_field_as_list('clean_pixel_percentages_per_acquisition'), data_labels="Clean Pixel Percentage (%)", titles="Clean Pixel Percentage Per Acquisition") logger.info("All products created.") task.update_bounds_from_dataset(dataset_masked) task.complete = True task.execution_end = datetime.now() task.update_status("OK", "All products have been generated. Your result will be loaded on the map.") return True