def restore_or_convert_dtypes(dtype_for_all=None,
                              dataset_in_dtypes=None,
                              dataset_out=None,
                              no_data=-9999):
    """
    Converts datatypes of data variables in a copy of an xarray Dataset.

    Parameters
    ----------
    dtype_for_all: str or numpy.dtype
        A string denoting a Python datatype name (e.g. int, float) or a NumPy dtype (e.g.
        numpy.int16, numpy.float32) to convert the data to.
    dataset_in_dtypes: dict
        A dictionary mapping data variable names to datatypes.
        One of `dtype_for_all` or `dataset_in_dtypes` must be `None`.
    no_data: int, float, or None
        The no data value. Set to None (default) if there is no such value.

    Returns
    -------
    dataset_out: xarray.Dataset
        The output Dataset.
    """
    assert dtype_for_all is None or dataset_in_dtypes is None, \
        "One of `dtype_for_all` or `dataset_in_dtypes` must be `None`."
    if dtype_for_all is not None:
        # Integer types can't represent nan.
        if np.issubdtype(dtype_for_all,
                         np.integer):  # This also works for Python int type.
            dataset_out = dataset_out.where(~xr_nan(dataset_out), no_data)
        # Convert no_data value to nan for float types.
        if np.issubdtype(dtype_for_all, np.float):
            dataset_out = dataset_out.where(dataset_out != no_data, np.nan)
        dataset_out = dataset_out.astype(dtype_for_all)
    else:  # Restore dtypes to state before masking.
        for data_var in dataset_in_dtypes:
            data_var_dtype = dataset_in_dtypes[data_var]
            if np.issubdtype(data_var_dtype, np.integer):
                dataset_out[data_var] = \
                    dataset_out[data_var].where(~xr_nan(dataset_out[data_var]), no_data)
            if np.issubdtype(dtype_for_all, np.float):
                dataset_out[data_var] = \
                    dataset_out[data_var].where(dataset_out[data_var]!=no_data, np.nan)
            dataset_out[data_var] = dataset_out[data_var].astype(
                data_var_dtype)
    return dataset_out
Beispiel #2
0
def clustering_pre_processing(dataset_in, bands):
    # Determine the pixel indices which have no NaN values and remove all other pixels.
    dims = list(dataset_in.dims)
    no_nan_mask = xr_not(xr_nan(dataset_in.to_array().transpose(*dims, 'variable')).any('variable')).values
    array_from = []
    for band in bands:
        array_from.append(dataset_in[band].values[no_nan_mask].flatten())
    features = np.array(array_from)
    features = np.swapaxes(features, 0, 1)
    np.set_printoptions(suppress=True)
    return features, no_nan_mask
def lone_object_filter(image,
                       min_size=2,
                       connectivity=1,
                       kernel_size=3,
                       unique_vals=None):
    """
    Replaces isolated, contiguous regions of values in a raster with values
    representing the surrounding pixels.

    More specifically, this reduces noise in a raster by setting
    contiguous regions of values greater than a specified minimum size to
    the modal value in a specified neighborhood.

    The default argument values filter out lone, singular pixels.
    This filter is not idempotent, so it may need to be applied repeatedly
    until the output stops changing or the results are acceptable.

    Args:
        image (numpy.ndarray):
            The image to filter. Must not contain NaNs.
        min_size (int):
            Defines the minimum number of contiguous pixels that will not
            be set to the modal value of their neighborhood. Must be greater than 2.
            Setting this to 1 is pointless, since this function will then do nothing to the raster.
        connectivity (int):
            The maximum distance between any two pixels such that they are
            considered one group. For example, a connectivity of 1 considers
            only adjacent values to be within one group (contiguous areas), but 
            a connectivity of 2 also considers diagonally connected values to be 
            within one group. Must be greater than 0.
        kernel_size (int or float):
            The diameter of the circular kernel to use for the modal filter.
            If there are still pixels that should be set to the
            modal value of their neighborhood, increase this value to remove them.
            Note that the larger this value is, the more detail will tend to be
            lost in the image and the slower this function will run.
        unique_vals: numpy.ndarray
            The unique values in `image`. If this is not supplied, the unique values will be
            determined on each call.

    Returns:
        The filtered image.

    Authors:
        Andrew Lubawy ([email protected])\n
        John Rattz    ([email protected])
    """
    import dask
    from .clean_mask import create_circular_mask
    from skimage.filters.rank import modal
    from skimage.morphology import remove_small_objects
    from .unique import dask_array_uniques

    assert kernel_size % 2 == 1, "The parameter `kernel_size` must be an odd number."
    image_min, image_max = image.min(), image.max()
    image_dtype = image.dtype
    image = (((image - image_min) / (image_max - image_min)) * 255).astype(
        np.uint8)
    if isinstance(image, np.ndarray):
        modal_filtered = modal(image,
                               create_circular_mask(kernel_size, kernel_size))
    elif isinstance(image, dask.array.core.Array):
        modal_filtered = image.map_blocks(modal,
                                          selem=create_circular_mask(
                                              kernel_size, kernel_size))

    image_da = xr.DataArray(image)
    if unique_vals is None:
        unique_vals = []
        if isinstance(image, np.ndarray):
            unique_vals = np.unique(image)
        elif isinstance(image, dask.array.core.Array):
            unique_vals = dask_array_uniques(image)
    else:  # Scale to the range [0,1].
        unique_vals = (((unique_vals - image_min) / (image_max - image_min)) *
                       255).astype(np.uint8)

    for i, val in enumerate(unique_vals):
        # Determine the pixels with this value that will not be filtered (True to keep).
        if isinstance(image, np.ndarray):
            layer = remove_small_objects(image == val,
                                         min_size=min_size,
                                         connectivity=connectivity)
        elif isinstance(image, dask.array.core.Array):
            layer = (image == val).map_blocks(remove_small_objects,
                                              min_size=min_size,
                                              connectivity=connectivity)
        # Select the values from the image that will remain (filter it).
        filtered = image_da.where(layer) if i == 0 else filtered.combine_first(
            image_da.where(layer))
    # Fill in the removed values with their local modes.
    filtered_nan_mask = xr_nan(filtered).data
    filtered = filtered.where(~filtered_nan_mask, modal_filtered)
    filtered = ((filtered / 255) * (image_max - image_min) +
                image_min).astype(image_dtype)
    return filtered.data
Beispiel #4
0
def create_output_products(self, data, task_id=None):
    """Create the final output products for this algorithm.

    Open the final dataset and metadata and generate all remaining metadata.
    Convert and write the dataset to variuos formats and register all values in the task model
    Update status and exit.

    Args:
        data: tuple in the format of processing_task function - path, metadata, and {chunk ids}
    """
    task = TsmTask.objects.get(pk=task_id)
    if check_cancel_task(self, task): return

    full_metadata = data[1]
    dataset = xr.open_dataset(data[0]).astype('float64')
    dataset['variability'] = dataset['max'] - dataset['normalized_data']
    dataset['wofs'] = dataset.wofs / dataset.wofs_total_clean
    dataset = dataset.where(~xr_nan(dataset), 0)
    dataset_masked = mask_water_quality(dataset, dataset.wofs)

    task.result_path = os.path.join(task.get_result_path(), "tsm.png")
    task.clear_observations_path = os.path.join(task.get_result_path(), "clear_observations.png")
    task.water_percentage_path = os.path.join(task.get_result_path(), "water_percentage.png")
    task.data_path = os.path.join(task.get_result_path(), "data_tif.tif")
    task.data_netcdf_path = os.path.join(task.get_result_path(), "data_netcdf.nc")
    task.animation_path = os.path.join(task.get_result_path(),
                                       "animation.gif") if task.animated_product.animation_id != 'none' else ""
    task.final_metadata_from_dataset(dataset_masked)
    task.metadata_from_dict(full_metadata)

    bands = [task.query_type.data_variable, 'total_clean', 'wofs']
    band_paths = [task.result_path, task.clear_observations_path, task.water_percentage_path]

    export_xarray_to_netcdf(dataset_masked, task.data_netcdf_path)

    write_geotiff_from_xr(task.data_path, dataset_masked, bands=bands, no_data=task.satellite.no_data_value)

    for band, band_path in zip(bands, band_paths):
        write_single_band_png_from_xr(
            band_path,
            dataset_masked,
            band,
            color_scale=task.color_scales[band],
            fill_color='black',
            interpolate=False,
            no_data=task.satellite.no_data_value)

    if task.animated_product.animation_id != "none":
        with imageio.get_writer(task.animation_path, mode='I', duration=1.0) as writer:
            valid_range = range(len(full_metadata))
            for index in valid_range:
                path = os.path.join(task.get_temp_path(), "animation_final_{}.nc".format(index))
                if os.path.exists(path):
                    png_path = os.path.join(task.get_temp_path(), "animation_{}.png".format(index))
                    animated_data = mask_water_quality(
                        xr.open_dataset(path).astype('float64'),
                        dataset.wofs) if task.animated_product.animation_id != "scene" else xr.open_dataset(
                            path)
                    write_single_band_png_from_xr(
                        png_path,
                        animated_data,
                        task.animated_product.data_variable,
                        color_scale=task.color_scales[task.animated_product.data_variable],
                        fill_color='black',
                        interpolate=False,
                        no_data=task.satellite.no_data_value)
                    image = imageio.imread(png_path)
                    writer.append_data(image)

    dates = list(map(lambda x: datetime.strptime(x, "%m/%d/%Y"), task._get_field_as_list('acquisition_list')))
    if len(dates) > 1:
        task.plot_path = os.path.join(task.get_result_path(), "plot_path.png")
        create_2d_plot(
            task.plot_path,
            dates=dates,
            datasets=task._get_field_as_list('clean_pixel_percentages_per_acquisition'),
            data_labels="Clean Pixel Percentage (%)",
            titles="Clean Pixel Percentage Per Acquisition")

    logger.info("All products created.")
    task.update_bounds_from_dataset(dataset_masked)
    task.complete = True
    task.execution_end = datetime.now()
    task.update_status("OK", "All products have been generated. Your result will be loaded on the map.")
    return True