Beispiel #1
0
def test_2d_convolution_gpu(convolve_2d_data, kernel_circle_1_1_1,
                            convolution_kernel_circle_1_1_1,
                            kernel_annulus_2_2_2_1,
                            convolution_kernel_annulus_2_2_1):
    import cupy
    cupy_data = cupy.asarray(convolve_2d_data)

    kernel_custom = np.ones((1, 1))
    result_kernel_custom = convolve_2d(cupy_data, kernel_custom)
    assert isinstance(result_kernel_custom, cupy.ndarray)
    # kernel is [[1]], thus the result equals input data
    np.testing.assert_allclose(result_kernel_custom.get(),
                               convolve_2d_data,
                               equal_nan=True)

    result_kernel_circle = convolve_2d(cupy_data, kernel_circle_1_1_1)
    assert isinstance(result_kernel_circle, cupy.ndarray)
    np.testing.assert_allclose(result_kernel_circle.get(),
                               convolution_kernel_circle_1_1_1,
                               equal_nan=True)

    result_kernel_annulus = convolve_2d(cupy_data, kernel_annulus_2_2_2_1)
    assert isinstance(result_kernel_annulus, cupy.ndarray)
    np.testing.assert_allclose(result_kernel_annulus.get(),
                               convolution_kernel_annulus_2_2_1,
                               equal_nan=True)

    # dask + cupy case not implemented
    dask_cupy_agg = xr.DataArray(
        da.from_array(cupy.asarray(convolve_2d_data), chunks=(3, 3)))
    with pytest.raises(NotImplementedError) as e_info:
        convolve_2d(dask_cupy_agg.data, kernel_custom)
        assert e_info
Beispiel #2
0
def _hotspots_dask_numpy(raster, kernel):

    # apply kernel to raster values
    mean_array = convolve_2d(raster.data, kernel / kernel.sum())

    # calculate z-scores
    global_mean = da.nanmean(raster.data)
    global_std = da.nanstd(raster.data)

    # commented out to avoid early compute to check if global_std is zero
    # if global_std == 0:
    #     raise ZeroDivisionError(
    #         "Standard deviation of the input raster values is 0."
    #     )

    z_array = (mean_array - global_mean) / global_std

    _func = partial(_calc_hotspots_numpy)
    pad_h = kernel.shape[0] // 2
    pad_w = kernel.shape[1] // 2

    out = z_array.map_overlap(_func,
                              depth=(pad_h, pad_w),
                              boundary=np.nan,
                              meta=np.array(()))
    return out
Beispiel #3
0
def test_convolution_numpy(convolve_2d_data, kernel_circle_1_1_1,
                           convolution_kernel_circle_1_1_1,
                           kernel_annulus_2_2_2_1,
                           convolution_kernel_annulus_2_2_1):
    kernel_custom = np.ones((1, 1))
    result_kernel_custom = convolve_2d(convolve_2d_data, kernel_custom)
    assert isinstance(result_kernel_custom, np.ndarray)
    # kernel is [[1]], thus the result equals input data
    np.testing.assert_allclose(result_kernel_custom,
                               convolve_2d_data,
                               equal_nan=True)

    result_kernel_circle = convolve_2d(convolve_2d_data, kernel_circle_1_1_1)
    assert isinstance(result_kernel_circle, np.ndarray)
    np.testing.assert_allclose(result_kernel_circle,
                               convolution_kernel_circle_1_1_1,
                               equal_nan=True)

    result_kernel_annulus = convolve_2d(convolve_2d_data,
                                        kernel_annulus_2_2_2_1)
    assert isinstance(result_kernel_annulus, np.ndarray)
    np.testing.assert_allclose(result_kernel_annulus,
                               convolution_kernel_annulus_2_2_1,
                               equal_nan=True)
Beispiel #4
0
def test_convolution_dask_numpy(convolve_2d_data, kernel_circle_1_1_1,
                                convolution_kernel_circle_1_1_1,
                                kernel_annulus_2_2_2_1,
                                convolution_kernel_annulus_2_2_1):
    dask_data = da.from_array(convolve_2d_data, chunks=(3, 3))
    kernel_custom = np.ones((1, 1))
    result_kernel_custom = convolve_2d(dask_data, kernel_custom)
    assert isinstance(result_kernel_custom, da.Array)
    # kernel is [[1]], thus the result equals input data
    np.testing.assert_allclose(result_kernel_custom.compute(),
                               convolve_2d_data,
                               equal_nan=True)

    result_kernel_circle = convolve_2d(dask_data, kernel_circle_1_1_1)
    assert isinstance(result_kernel_circle, da.Array)
    np.testing.assert_allclose(result_kernel_circle.compute(),
                               convolution_kernel_circle_1_1_1,
                               equal_nan=True)

    result_kernel_annulus = convolve_2d(dask_data, kernel_annulus_2_2_2_1)
    assert isinstance(result_kernel_annulus, da.Array)
    np.testing.assert_allclose(result_kernel_annulus.compute(),
                               convolution_kernel_annulus_2_2_1,
                               equal_nan=True)
Beispiel #5
0
def test_convolution_numpy(convolve_2d_data, convolution_custom_kernel,
                           kernel_circle_1_1_1,
                           convolution_kernel_circle_1_1_1,
                           kernel_annulus_2_2_2_1,
                           convolution_kernel_annulus_2_2_1):
    kernel_custom, expected_result_custom = convolution_custom_kernel
    result_kernel_custom = convolve_2d(convolve_2d_data, kernel_custom)
    assert isinstance(result_kernel_custom, np.ndarray)
    np.testing.assert_allclose(result_kernel_custom,
                               expected_result_custom,
                               equal_nan=True)

    result_kernel_circle = convolve_2d(convolve_2d_data, kernel_circle_1_1_1)
    assert isinstance(result_kernel_circle, np.ndarray)
    np.testing.assert_allclose(result_kernel_circle,
                               convolution_kernel_circle_1_1_1,
                               equal_nan=True)

    result_kernel_annulus = convolve_2d(convolve_2d_data,
                                        kernel_annulus_2_2_2_1)
    assert isinstance(result_kernel_annulus, np.ndarray)
    np.testing.assert_allclose(result_kernel_annulus,
                               convolution_kernel_annulus_2_2_1,
                               equal_nan=True)
Beispiel #6
0
def _hotspots_cupy(raster, kernel):
    if not (issubclass(raster.data.dtype.type, cupy.integer)
            or issubclass(raster.data.dtype.type, cupy.floating)):
        raise ValueError("data type must be integer or float")

    # apply kernel to raster values
    mean_array = convolve_2d(raster.data, kernel / kernel.sum())

    # calculate z-scores
    global_mean = cupy.nanmean(raster.data)
    global_std = cupy.nanstd(raster.data)
    if global_std == 0:
        raise ZeroDivisionError(
            "Standard deviation of the input raster values is 0.")
    z_array = (mean_array - global_mean) / global_std

    out = _calc_hotspots_cupy(z_array)
    return out
Beispiel #7
0
def test_2d_convolution_gpu_equals_cpu():

    import cupy

    data = convolve_2d_data
    numpy_agg = xr.DataArray(data)
    cupy_agg = xr.DataArray(cupy.asarray(data))

    kernel1 = np.ones((1, 1))
    output_numpy1 = convolve_2d(numpy_agg.data, kernel1)
    output_cupy1 = convolve_2d(cupy_agg.data, kernel1)
    assert isinstance(output_cupy1, cupy.ndarray)
    np.testing.assert_allclose(output_numpy1,
                               output_cupy1.get(),
                               equal_nan=True)

    kernel2 = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
    output_numpy2 = convolve_2d(numpy_agg.data, kernel2)
    output_cupy2 = convolve_2d(cupy_agg.data, kernel2)
    assert isinstance(output_cupy2, cupy.ndarray)
    np.testing.assert_allclose(output_numpy2,
                               output_cupy2.get(),
                               equal_nan=True)

    kernel3 = np.array([[0, 1, 0], [1, 0, 1], [0, 1, 0]])
    output_numpy3 = convolve_2d(numpy_agg.data, kernel3)
    output_cupy3 = convolve_2d(cupy_agg.data, kernel3)
    assert isinstance(output_cupy3, cupy.ndarray)
    np.testing.assert_allclose(output_numpy3,
                               output_cupy3.get(),
                               equal_nan=True)

    # dask + cupy case not implemented
    dask_cupy_agg = xr.DataArray(
        da.from_array(cupy.asarray(data), chunks=(3, 3)))
    with pytest.raises(NotImplementedError) as e_info:
        convolve_2d(dask_cupy_agg.data, kernel3)
        assert e_info
Beispiel #8
0
def _hotspots_cupy(raster, kernel):
    if not (issubclass(raster.data.dtype.type, cupy.integer)
            or issubclass(raster.data.dtype.type, cupy.floating)):
        raise ValueError("data type must be integer or float")

    data = raster.data.astype(cupy.float32)

    # apply kernel to raster values
    mean_array = convolve_2d(data, kernel / kernel.sum())

    # calculate z-scores
    global_mean = cupy.nanmean(data)
    global_std = cupy.nanstd(data)
    if global_std == 0:
        raise ZeroDivisionError(
            "Standard deviation of the input raster values is 0.")
    z_array = (mean_array - global_mean) / global_std

    out = cupy.zeros_like(z_array, dtype=cupy.int8)
    griddim, blockdim = cuda_args(z_array.shape)
    _run_gpu_hotspots[griddim, blockdim](z_array, out)
    return out
Beispiel #9
0
def test_convolution():
    data = convolve_2d_data
    dask_data = da.from_array(data, chunks=(3, 3))

    kernel1 = np.ones((1, 1))
    numpy_output_1 = convolve_2d(data, kernel1)
    expected_output_1 = np.array([[0., 1., 1., 1., 1., 1.],
                                  [1., 0., 1., 1., 1., 1.],
                                  [1., 1., 0., 1., 1., 1.],
                                  [1., 1., 1., np.nan, 1., 1.],
                                  [1., 1., 1., 1., 0., 1.],
                                  [1., 1., 1., 1., 1., 0.]])
    assert isinstance(numpy_output_1, np.ndarray)
    assert np.isclose(numpy_output_1, expected_output_1, equal_nan=True).all()

    dask_output_1 = convolve_2d(dask_data, kernel1)
    assert isinstance(dask_output_1, da.Array)
    assert np.isclose(
        dask_output_1.compute(), expected_output_1, equal_nan=True
    ).all()

    kernel2 = np.array([[0, 1, 0],
                        [1, 1, 1],
                        [0, 1, 0]])
    numpy_output_2 = convolve_2d(data, kernel2)
    expected_output_2 = np.array([
        [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
        [np.nan, 4., 3., 5., 5., np.nan],
        [np.nan, 3., np.nan, np.nan, np.nan, np.nan],
        [np.nan, 5., np.nan, np.nan, np.nan, np.nan],
        [np.nan, 5., np.nan, np.nan, np.nan, np.nan],
        [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]
    ])
    # kernel2 is of 3x3, thus the border edge is 1 cell long.
    # currently, ignoring border edge (i.e values in edges are all nans)
    assert isinstance(numpy_output_2, np.ndarray)
    assert np.isclose(
        numpy_output_2, expected_output_2, equal_nan=True
    ).all()

    dask_output_2 = convolve_2d(dask_data, kernel2)
    assert isinstance(dask_output_2, da.Array)
    assert np.isclose(
        dask_output_2.compute(), expected_output_2, equal_nan=True
    ).all()

    kernel3 = np.array([[0, 1, 0],
                        [1, 0, 1],
                        [0, 1, 0]])
    numpy_output_3 = convolve_2d(data, kernel3)
    expected_output_3 = np.array([
        [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
        [np.nan, 4., 2., 4., 4., np.nan],
        [np.nan, 2., np.nan, np.nan, np.nan, np.nan],
        [np.nan, 4., np.nan, np.nan, np.nan, np.nan],
        [np.nan, 4., np.nan, np.nan, np.nan, np.nan],
        [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]
    ])
    # kernel3 is of 3x3, thus the border edge is 1 cell long.
    # currently, ignoring border edge (i.e values in edges are all nans)
    assert isinstance(numpy_output_3, np.ndarray)
    assert np.isclose(numpy_output_3, expected_output_3, equal_nan=True).all()

    dask_output_3 = convolve_2d(dask_data, kernel3)
    assert isinstance(dask_output_3, da.Array)
    assert np.isclose(
        dask_output_3.compute(), expected_output_3, equal_nan=True
    ).all()
Beispiel #10
0
def test_convolution():
    n, m = 6, 6
    raster = xr.DataArray(np.ones((n, m)), dims=['y', 'x'])
    raster['x'] = np.linspace(0, n, n)
    raster['y'] = np.linspace(0, m, m)
    cellsize_x, cellsize_y = calc_cellsize(raster)

    # add some nan pixels
    nan_cells = [(i, i) for i in range(n)]
    for cell in nan_cells:
        raster[cell[0], cell[1]] = np.nan

    # kernel array = [[1]]
    kernel = np.ones((1, 1))

    # np.nansum(np.array([np.nan])) = 0.0
    expected_out_sum_1 = np.array([[0., 1., 1., 1., 1., 1.],
                                   [1., 0., 1., 1., 1., 1.],
                                   [1., 1., 0., 1., 1., 1.],
                                   [1., 1., 1., 0., 1., 1.],
                                   [1., 1., 1., 1., 0., 1.],
                                   [1., 1., 1., 1., 1., 0.]])
    # Convolution will return np.nan, so convert nan to 0
    assert np.all(np.nan_to_num(expected_out_sum_1) == expected_out_sum_1)

    # np.nanmean(np.array([np.nan])) = nan
    mean_output_1 = convolve_2d(raster.values, kernel / kernel.sum())
    for cell in nan_cells:
        assert np.isnan(mean_output_1[cell[0], cell[1]])
    # remaining cells are 1s
    for i in range(n):
        for j in range(m):
            if i != j:
                assert mean_output_1[i, j] == 1

    # kernel array: [[0, 1, 0],
    #                [1, 1, 1],
    #                [0, 1, 0]]
    kernel = circle_kernel(cellsize_x, cellsize_y, 2)
    sum_output_2 = convolve_2d(np.nan_to_num(raster.values), kernel, pad=False)
    expected_out_sum_2 = np.array([[2., 2., 4., 4., 4., 3.],
                                   [2., 4., 3., 5., 5., 4.],
                                   [4., 3., 4., 3., 5., 4.],
                                   [4., 5., 3., 4., 3., 4.],
                                   [4., 5., 5., 3., 4., 2.],
                                   [3., 4., 4., 4., 2., 2.]])

    assert np.all(sum_output_2 == expected_out_sum_2)

    mean_output_2 = convolve_2d(np.ones((n, m)),
                                kernel / kernel.sum(),
                                pad=True)
    expected_mean_output_2 = np.ones((n, m))
    assert np.all(mean_output_2 == expected_mean_output_2)

    # kernel array: [[0, 1, 0],
    #                [1, 0, 1],
    #                [0, 1, 0]]
    kernel = annulus_kernel(cellsize_x, cellsize_y, 2.0, 0.5)
    sum_output_3 = convolve_2d(np.nan_to_num(raster.values), kernel, pad=False)
    expected_out_sum_3 = np.array([[2., 1., 3., 3., 3., 2.],
                                   [1., 4., 2., 4., 4., 3.],
                                   [3., 2., 4., 2., 4., 3.],
                                   [3., 4., 2., 4., 2., 3.],
                                   [3., 4., 4., 2., 4., 1.],
                                   [2., 3., 3., 3., 1., 2.]])

    assert np.all(sum_output_3 == expected_out_sum_3)

    mean_output_3 = convolve_2d(np.ones((n, m)),
                                kernel / kernel.sum(),
                                pad=True)
    expected_mean_output_3 = np.ones((n, m))
    assert np.all(mean_output_3 == expected_mean_output_3)
Beispiel #11
0
def _focal_sum_cupy(data, kernel):
    out = convolve_2d(data, kernel)
    return out
Beispiel #12
0
def _focal_mean_cupy(data, kernel):
    out = convolve_2d(data, kernel / kernel.sum())
    return out
Beispiel #13
0
def hotspots(raster: xr.DataArray,
             kernel: xr.DataArray,
             x: Optional[str] = 'x',
             y: Optional[str] = 'y') -> xr.DataArray:
    """
    Identify statistically significant hot spots and cold spots in an input
    raster. To be a statistically significant hot spot, a feature will have a
    high value and be surrounded by other features with high values as well.
    Neighborhood of a feature defined by the input kernel, which currently
    support a shape of circle, annulus, or custom kernel.

    The result should be a raster with the following 7 values:
         90 for 90% confidence high value cluster
         95 for 95% confidence high value cluster
         99 for 99% confidence high value cluster
        -90 for 90% confidence low value cluster
        -95 for 95% confidence low value cluster
        -99 for 99% confidence low value cluster
         0 for no significance

    Parameters:
    ----------
    raster: xarray.DataArray
        2D Input raster image with shape = (height, width).
    kernel: Numpy Array
        2D array where values of 1 indicate the kernel.

    Returns:
    ----------
    xarray.DataArray
        2D array of hotspots with values indicating confidence level.

    Examples:
    ----------
        Imports
    >>> import numpy as np
    >>> import xarray as xr
    >>> from xrspatial import focal

    Create Data Array
    >>> agg = xr.DataArray(np.array([[0, 0, 0, 0, 0, 0, 0],
    >>>                              [0, 0, 0, 0, 0, 0, 0],
    >>>                              [0, 0, 10, 10, 10, 0, 0],
    >>>                              [0, 0, 10, 10, 10, 0, 0],
    >>>                              [0, 0, 10, 10, 10, 0, 0],
    >>>                              [0, 0, 0, 0, 0, 0, 0],
    >>>                              [0, 0, 0, 0, 0, 0, 0]]),
    >>>                              dims = ["lat", "lon"])
    >>> height, width = agg.shape
    >>> _lon = np.linspace(0, width - 1, width)
    >>> _lat = np.linspace(0, height - 1, height)
    >>> agg["lon"] = _lon
    >>> agg["lat"] = _lat

        Create Kernel
    >>> kernel = focal.circle_kernel(1, 1, 1)

        Create Hotspot Data Array
    >>> focal.hotspots(agg, kernel, x = 'lon', y = 'lat')
    <xarray.DataArray (lat: 7, lon: 7)>
    array([[ 0,  0,  0,  0,  0,  0,  0],
           [ 0,  0,  0,  0,  0,  0,  0],
           [ 0,  0,  0,  0,  0,  0,  0],
           [ 0,  0,  0, 95,  0,  0,  0],
           [ 0,  0,  0,  0,  0,  0,  0],
           [ 0,  0,  0,  0,  0,  0,  0],
           [ 0,  0,  0,  0,  0,  0,  0]], dtype=int8)
    Coordinates:
      * lon      (lon) float64 0.0 1.0 2.0 3.0 4.0 5.0 6.0
      * lat      (lat) float64 0.0 1.0 2.0 3.0 4.0 5.0 6.0
    """

    # validate raster
    if not isinstance(raster, DataArray):
        raise TypeError("`raster` must be instance of DataArray")

    if raster.ndim != 2:
        raise ValueError("`raster` must be 2D")

    if not (issubclass(raster.values.dtype.type, np.integer)
            or issubclass(raster.values.dtype.type, np.floating)):
        raise ValueError("`raster` must be an array of integers or float")

    raster_dims = raster.dims
    if raster_dims != (y, x):
        raise ValueError("raster.coords should be named as coordinates:"
                         "(%s, %s)".format(y, x))

    # apply kernel to raster values
    mean_array = convolve_2d(raster.values, kernel / kernel.sum(), pad=True)

    # calculate z-scores
    global_mean = np.nanmean(raster.values)
    global_std = np.nanstd(raster.values)
    if global_std == 0:
        raise ZeroDivisionError("Standard deviation "
                                "of the input raster values is 0.")
    z_array = (mean_array - global_mean) / global_std

    out = _hotspots(z_array)

    result = DataArray(out,
                       coords=raster.coords,
                       dims=raster.dims,
                       attrs=raster.attrs)

    return result
import matplotlib.pyplot as plt
from xrspatial import convolution

# load datasets
hh_path = '/home/[email protected]/Documents/sm_paper/smapvex16/insitu_handheld/SV16M_PSM_SoilMoistureHandheld_Vers3_w_coords.csv'
s1_path = '/home/[email protected]/Documents/sm_paper/smapvex16/s1sm/SMCS1_20160719_001513_063_A.tif'

hh_data = pd.read_csv(hh_path, index_col=[1, 2], parse_dates=[1])
# hh_data['SITE_ID'] = [x.split("-")[0] for x in hh_data['SITE_ID']]
# hh_data = hh_data.groupby('SITE_ID').mean()
#hh_data = hh_data.groupby(level=0).mean()
hh_data = hh_data.xs('Top', level='LOCATION')
hh_data = hh_data.loc[hh_data.index.date == dt.date(year=2016, month=7,
                                                    day=19)]
s1_data = xr.open_rasterio(s1_path)
s1_data = convolution.convolve_2d(s1_data, np.full((3, 3), 1 / 3))

# extract values
smlist = list()

for irow in range(hh_data.shape[0]):
    try:
        tmp = s1_data.interp(x=hh_data['Lon'].iloc[irow],
                             y=hh_data['Lat'].iloc[irow],
                             method='linear').values[0]
        # if tmp > 40:
        #     hh_data['SOIL_MOISTURE'].iloc[irow] = hh_data['SOIL_MOISTURE'].iloc[irow] + 0.1
        smlist.append(tmp)
    except:
        smlist.append(np.nan)
Beispiel #15
0
def hotspots(raster, kernel, x='x', y='y'):
    """Identify statistically significant hot spots and cold spots in an input
    raster. To be a statistically significant hot spot, a feature will have a
    high value and be surrounded by other features with high values as well.
    Neighborhood of a feature defined by the input kernel, which currently
    support a shape of circle, annulus, or custom kernel.

    The result should be a raster with the following 7 values:
    90 for 90% confidence high value cluster
    95 for 95% confidence high value cluster
    99 for 99% confidence high value cluster
    -90 for 90% confidence low value cluster
    -95 for 95% confidence low value cluster
    -99 for 99% confidence low value cluster
    0 for no significance

    Parameters
    ----------
    raster: xarray.DataArray
        Input raster image with shape=(height, width)
    kernel: Kernel

    Returns
    -------
    hotspots: xarray.DataArray
    """

    # validate raster
    if not isinstance(raster, DataArray):
        raise TypeError("`raster` must be instance of DataArray")

    if raster.ndim != 2:
        raise ValueError("`raster` must be 2D")

    if not (issubclass(raster.values.dtype.type, np.integer)
            or issubclass(raster.values.dtype.type, np.floating)):
        raise ValueError("`raster` must be an array of integers or float")

    raster_dims = raster.dims
    if raster_dims != (y, x):
        raise ValueError("raster.coords should be named as coordinates:"
                         "(%s, %s)".format(y, x))

    # apply kernel to raster values
    mean_array = convolve_2d(raster.values, kernel / kernel.sum(), pad=True)

    # calculate z-scores
    global_mean = np.nanmean(raster.values)
    global_std = np.nanstd(raster.values)
    if global_std == 0:
        raise ZeroDivisionError("Standard deviation "
                                "of the input raster values is 0.")
    z_array = (mean_array - global_mean) / global_std

    out = _hotspots(z_array)

    result = DataArray(out,
                       coords=raster.coords,
                       dims=raster.dims,
                       attrs=raster.attrs)

    return result