Esempio n. 1
0
 def test_x_ticks_are_rotated_for_time(self):
     time = pd.date_range('2000-01-01', '2000-01-10')
     a = DataArray(np.arange(len(time)), [('t', time)])
     a.plot.line()
     rotation = plt.gca().get_xticklabels()[0].get_rotation()
     self.assertFalse(rotation == 0)
Esempio n. 2
0
def impedance(n,
              branch_components=None,
              snapshot=None,
              pu_system=True,
              linear=True,
              skip_pre=False):
    """
    Calculate the impedance of the network branches.

    Naturally the impdance of controllable branches is not existent. However,
    in https://www.preprints.org/manuscript/202001.0352/v1 a method was
    presented how to calculate the impendance of controllable branches if they
    were passive AC lines. If 'Link' is included in branch_components,
    the flow-dependent pseudo-impedance is calculated based on the formulation
    presented in the paper. Note that in this case the flow must be given
    for all branches.

    Parameters
    ----------
    n : pypsa.Network
    branch_components : list, optional
        List of branch components. The default None results in
        n.passive_branch_components.
    snapshot : str/pd.Timestamp, optional
        Only relevant if 'Link' in branch_components. The default None results
        in the first snapshot of n.
    pu_system : bool, optional
        Whether the use the per uni system for the impendance.
        The default is True.
    linear : bool, optional
        Whether to use the linear approximation. The default is True.
    skip_pre : bool, optional
        Whether to calcuate dependent quantities beforehand. The default is False.

    Returns
    -------
    z : xr.DataArray
        Impedance for each branch in branch_components.

    """
    # standard impedance, note z must not be inf or nan
    branch_components = check_passive_branch_comps(branch_components, n)
    x = 'x_pu' if pu_system else 'x'
    r = 'r_pu' if pu_system else 'r'

    if not skip_pre:
        if pu_system and (n.lines[x] == 0).all():
            n.calculate_dependent_values()

    comps = sorted(set(branch_components) & n.passive_branch_components)
    if linear:
        z = pd.concat({
            c: n.df(c)[x].where(
                n.df(c).bus0.map(n.buses.carrier) == 'AC',
                n.df(c)[r])
            for c in comps
        })
    else:
        z = pd.concat({c: n.df(c).eval(f'{r} + 1.j * {x}') for c in comps})
    if not n.lines.empty:
        assert not np.isinf(z).any() | z.isna().any(), (
            'There '
            f'seems to be a problem with your {x} or {r} values. At least one of '
            f'these is nan or inf. Please check the values in components {comps}.'
        )
    z = DataArray(z.rename_axis(['component', 'branch_i']), dims='branch')

    if ('Link' not in branch_components) | n.links.empty:
        return z

    # add pseudo impedance for links, in dependence on the current flow:
    if snapshot is None:
        logger.warn('Link in argument "branch_components", but no '
                    'snapshot given. Falling back to first snapshot')
        snapshot = n.snapshots[0]

    f = network_flow(n, snapshot)
    branches_i = f.get_index('branch')
    C = Cycles(n, branches_i[abs(f).values > 1e-8])\
        .reindex(branch=branches_i, fill_value=0)
    # C_mix is all the active cycles where at least one link is included
    C_mix = C[:, ((C != 0) &
                  (f != 0)).groupby('component').any().loc['Link'].values]

    if not C_mix.size:
        sub = f.loc['Link'][abs(f.loc['Link']).values > 1e-8]
        omega = DataArray(1, sub.coords)
    elif not z.size:
        omega = null(C_mix.loc['Link'] * f.loc['Link'])[0]
    else:
        d = {'branch': 'Link'}
        omega = -dot(pinv(dot(C_mix.loc['Link'].T, diag(f.loc['Link']))),
                     dot(C_mix.drop_sel(d).T, diag(z), f.drop_sel(d)))

    omega = omega.round(10).assign_coords({'component': 'Link'})
    omega[(omega == 0) & (f.loc['Link'] != 0)] = 1
    Z = z.reindex_like(f).copy()
    links_i = branches_i[branches_i.get_loc('Link')]
    Z.loc[links_i] = omega.reindex_like(f.loc['Link'], fill_value=0)
    return Z.assign_coords(snapshot=snapshot)
Esempio n. 3
0
def array_with_type(general, specific):
    return DataArray(0.0, attrs={"datatype": (general, specific)})
Esempio n. 4
0
    def value_check(self):
        Ne = 0 * self.Ne
        with self.assertRaises(ValueError):
            utilities.input_check("Ne",
                                  Ne,
                                  np.ndarray,
                                  greater_than_or_equal_zero=False)

        Ne = -1 * self.Ne
        with self.assertRaises(ValueError):
            utilities.input_check("Ne",
                                  Ne,
                                  np.ndarray,
                                  greater_than_or_equal_zero=True)

        Ne = np.nan * self.Ne
        with self.assertRaises(ValueError):
            utilities.input_check("Ne", Ne, np.ndarray)

        Ne = np.inf * self.Ne
        with self.assertRaises(ValueError):
            utilities.input_check("Ne", Ne, np.ndarray)

        Ne = -np.inf * self.Ne
        with self.assertRaises(ValueError):
            utilities.input_check("Ne", Ne, np.ndarray)

        Ne = self.Ne[:, np.newaxis]
        with self.assertRaises(ValueError):
            utilities.input_check("Ne", Ne, np.ndarray, ndim_to_check=1)

        # Check dropped channel handling
        t = np.array([78.5, 80.5, 82.5])
        rho = np.linspace(0, 1, 11)
        Ne = np.logspace(19.0, 16.0, 11)
        Ne = np.tile(Ne, [3, 1])
        Ne[1, :] /= 10.0
        Ne[2, :] *= 10.0

        dropped_t_coord = np.array([80.5])
        dropped_rho_coord = np.array([rho[3], rho[7]])

        Ne = DataArray(
            data=Ne,
            coords=[("t", t), ("rho_poloidal", rho)],
            dims=["t", "rho_poloidal"],
        )

        dropped = Ne.sel({"t": dropped_t_coord})
        dropped = dropped.sel({"rho_poloidal": dropped_rho_coord})

        Ne.loc[{
            "t": dropped_t_coord,
            "rho_poloidal": dropped_rho_coord
        }] = np.nan

        Ne.attrs["dropped"] = dropped

        try:
            utilities.input_check("Ne", Ne, DataArray, ndim_to_check=2)
        except Exception as e:
            raise e
def test_contains_cftime_datetimes_dask_1d(data):
    assert contains_cftime_datetimes(data.time.chunk())


@requires_cftime
def test_contains_cftime_datetimes_3d(times_3d):
    assert contains_cftime_datetimes(times_3d)


@requires_cftime
@requires_dask
def test_contains_cftime_datetimes_dask_3d(times_3d):
    assert contains_cftime_datetimes(times_3d.chunk())


@pytest.mark.parametrize("non_cftime_data", [DataArray([]), DataArray([1, 2])])
def test_contains_cftime_datetimes_non_cftimes(non_cftime_data):
    assert not contains_cftime_datetimes(non_cftime_data)


@requires_dask
@pytest.mark.parametrize("non_cftime_data", [DataArray([]), DataArray([1, 2])])
def test_contains_cftime_datetimes_non_cftimes_dask(non_cftime_data):
    assert not contains_cftime_datetimes(non_cftime_data.chunk())


@requires_cftime
@pytest.mark.parametrize("shape", [(24, ), (8, 3), (2, 4, 3)])
def test_encode_cf_datetime_overflow(shape):
    # Test for fix to GH 2272
    dates = pd.date_range("2100", periods=24).values.reshape(shape)
xi, yi, mski = d['lont'], d['latt'], np.bool8(d['volmsk'])
msk = np.logical_and(msko, ~mski)
TAREAmsk = TAREA[msk]
A = TAREAmsk.sum()

# fnames = fnames[-90:]
t = np.array([])
Cterms = dict()
_ = [Cterms.update({term: np.array([], ndmin=1)}) for term in terms]
n = 0
for fname in fnames:
    date = fname.split('.')[0][-10:]
    print(date)
    t = np.append(t, Timestamp(date).to_pydatetime())
    ds = open_dataset(fname)
    for term in terms:
        dsterm = ds[term]
        dsterm[:, 0] = np.nan
        dsterm[:, -1] = np.nan
        aux = np.nansum(dsterm.values[msk] * TAREAmsk) / A  # Area-averaging.
        # NOTE: Excluding first and last columns because curl did not wrap around.
        Cterms[term] = np.append(Cterms[term], aux)

for term in terms:
    Cterms[term] = DataArray(Cterms[term], coords=dict(t=t), dims='t')

ds = Datasetx(data_vars=Cterms, coords=dict(t=t))

fout = head + 'data/circulation_terms_circumpolar.nc'
ds.to_netcdf(fout)
Esempio n. 7
0
def apply(raster, kernel, func=_calc_mean, name='focal_apply'):
    """
    Returns custom function applied array using a user-created window.

    Parameters
    ----------
    raster : xarray.DataArray
        2D array of input values to be filtered. Can be a NumPy backed,
        or Dask with NumPy backed DataArray.
    kernel : numpy.ndarray
        2D array where values of 1 indicate the kernel.
    func : callable, default=xrspatial.focal._calc_mean
        Function which takes an input array and returns an array.

    Returns
    -------
    agg : xarray.DataArray of same type as `raster`
        2D aggregate array of filtered values.

    Examples
    --------
    Focal apply works with NumPy backed xarray DataArray
    .. sourcecode:: python

        >>> import numpy as np
        >>> import xarray as xr
        >>> from xrspatial.convolution import circle_kernel
        >>> from xrspatial.focal import apply
        >>> data = np.arange(20, dtype=np.float64).reshape(4, 5)
        >>> raster = xr.DataArray(data, dims=['y', 'x'], name='raster')
        >>> print(raster)
        <xarray.DataArray 'raster' (y: 4, x: 5)>
        array([[ 0.,  1.,  2.,  3.,  4.],
               [ 5.,  6.,  7.,  8.,  9.],
               [10., 11., 12., 13., 14.],
               [15., 16., 17., 18., 19.]])
        Dimensions without coordinates: y, x
        >>> kernel = circle_kernel(2, 2, 3)
        >>> kernel
        array([[0., 1., 0.],
               [1., 1., 1.],
               [0., 1., 0.]])
        >>> # apply kernel mean by default
        >>> apply_mean_agg = apply(raster, kernel)
        >>> apply_mean_agg
        <xarray.DataArray 'focal_apply' (y: 4, x: 5)>
        array([[ 2.        ,  2.25   ,  3.25      ,  4.25      ,  5.33333333],
               [ 5.25      ,  6.     ,  7.        ,  8.        ,  8.75      ],
               [10.25      , 11.     , 12.        , 13.        , 13.75      ],
               [13.66666667, 14.75   , 15.75      , 16.75      , 17.        ]])
        Dimensions without coordinates: y, x

    Focal apply works with Dask with NumPy backed xarray DataArray.
    Note that if input raster is a numpy or dask with numpy backed data array,
    the applied function must be decorated with ``numba.jit``
    xrspatial already provides ``ngjit`` decorator, where:
    ``ngjit = numba.jit(nopython=True, nogil=True)``

    .. sourcecode:: python

    >>> from xrspatial.utils import ngjit
    >>> from xrspatial.convolution import custom_kernel
    >>> kernel = custom_kernel(np.array([
        [0, 1, 0],
        [0, 1, 1],
        [0, 1, 0],
    ]))
    >>> weight = np.array([
        [0, 0.5, 0],
        [0, 1, 0.5],
        [0, 0.5, 0],
    ])
    >>> @ngjit
    >>> def func(kernel_data):
    ...     weight = np.array([
    ...         [0, 0.5, 0],
    ...         [0, 1, 0.5],
    ...         [0, 0.5, 0],
    ...     ])
    ...     return np.nansum(kernel_data * weight)

    >>> import dask.array as da
    >>> data_da = da.from_array(np.ones((6, 4), dtype=np.float64), chunks=(3, 2))
    >>> raster_da = xr.DataArray(data_da, dims=['y', 'x'], name='raster_da')
    >>> print(raster_da)
    <xarray.DataArray 'raster_da' (y: 6, x: 4)>
    dask.array<array, shape=(6, 4), dtype=float64, chunksize=(3, 2), chunktype=numpy.ndarray>  # noqa
    Dimensions without coordinates: y, x
    >>> apply_func_agg = apply(raster_da, kernel, func)
    >>> print(apply_func_agg)
    <xarray.DataArray 'focal_apply' (y: 6, x: 4)>
    dask.array<_trim, shape=(6, 4), dtype=float64, chunksize=(3, 2), chunktype=numpy.ndarray>  # noqa
    Dimensions without coordinates: y, x
    >>> print(apply_func_agg.compute())
    <xarray.DataArray 'focal_apply' (y: 6, x: 4)>
    array([[2. , 2. , 2. , 1.5],
           [2.5, 2.5, 2.5, 2. ],
           [2.5, 2.5, 2.5, 2. ],
           [2.5, 2.5, 2.5, 2. ],
           [2.5, 2.5, 2.5, 2. ],
           [2. , 2. , 2. , 1.5]])
    Dimensions without coordinates: y, x
    """
    # validate raster
    if not isinstance(raster, DataArray):
        raise TypeError("`raster` must be instance of DataArray")

    if raster.ndim != 2:
        raise ValueError("`raster` must be 2D")

    # Validate the kernel
    kernel = custom_kernel(kernel)

    # apply kernel to raster values
    # if raster is a numpy or dask with numpy backed data array,
    # the function func must be a @ngjit
    mapper = ArrayTypeFunctionMapping(
        numpy_func=_apply_numpy,
        cupy_func=lambda *args: not_implemented_func(
            *args, messages='apply() does not support cupy backed DataArray.'),
        dask_func=_apply_dask_numpy,
        dask_cupy_func=lambda *args: not_implemented_func(
            *args,
            messages=
            'apply() does not support dask with cupy backed DataArray.'),
    )
    out = mapper(raster)(raster.data, kernel, func)
    result = DataArray(out,
                       name=name,
                       coords=raster.coords,
                       dims=raster.dims,
                       attrs=raster.attrs)
    return result
Esempio n. 8
0
def quantile(agg: xr.DataArray,
             k: int = 4,
             name: Optional[str] = 'quantile') -> xr.DataArray:
    """
    Groups data for array (agg) into quantiles by distributing
    the values into groups that contain an equal number of values.
    The number of quantiles produced is based on (k) with a default value
    of 4. The result is an xarray.DataArray.

    Parameters:
    ----------
    agg: xarray.DataArray
        2D array of values to bin:
        NumPy, CuPy, NumPy-backed Dask, or Cupy-backed Dask array.
    k: int
        Number of quantiles to be produced, default = 4.
    name: str, optional (default = "quantile")
        Name of the output aggregate array.

    Returns:
    ----------
    xarray.DataArray, quantiled aggregate
        2D array, of the same type as the input, of quantile allocations.
        All other input attributes are preserved.

    Notes:
    ----------
    Adapted from PySAL:
    - https://pysal.org/mapclassify/_modules/mapclassify/classifiers.html#Quantiles # noqa

    Note that dask's percentile algorithm is approximate,
    while numpy's is exact. This may cause some differences
    between results of vanilla numpy and dask version of the input agg.
    - https://github.com/dask/dask/issues/3099

    Examples:
    ----------
        Imports
    >>> import numpy as np
    >>> import xarray as xr
    >>> from xrspatial.classify import quantile

        Create DataArray
    >>> np.random.seed(0)
    >>> agg = xr.DataArray(np.random.rand(4,4),
                                    dims = ["lat", "lon"])
    >>> height, width = agg.shape
    >>> _lat = np.linspace(0, height - 1, height)
    >>> _lon = np.linspace(0, width - 1, width)
    >>> agg["lat"] = _lat
    >>> agg["lon"] = _lon
    >>> print(agg)
    <xarray.DataArray (lat: 4, lon: 4)>
    array([[0.5488135 , 0.71518937, 0.60276338, 0.54488318],
           [0.4236548 , 0.64589411, 0.43758721, 0.891773  ],
           [0.96366276, 0.38344152, 0.79172504, 0.52889492],
            [0.56804456, 0.92559664, 0.07103606, 0.0871293 ]])
    Coordinates:
    * lon      (lon) float64 0.0 1.0 2.0 3.0
    * lat      (lat) float64 0.0 1.0 2.0 3.0

    Create Quantile Aggregate
    >>> quantile_agg = quantile(agg)
    >>> print(quantile_agg)
    <xarray.DataArray 'quantile' (lat: 4, lon: 4)>
    array([[1., 2., 2., 1.],
           [0., 2., 1., 3.],
           [3., 0., 3., 1.],
           [2., 3., 0., 0.]], dtype=float32)
    Coordinates:
    * lon      (lon) float64 0.0 1.0 2.0 3.0
    * lat      (lat) float64 0.0 1.0 2.0 3.0

    With k quantiles
    >>> quantile_agg = quantile(agg, k = 6, name = "Six Quantiles")
    >>> print(quantile_agg)
    <xarray.DataArray 'Six Quantiles' (lat: 4, lon: 4)>
    array([[2., 4., 3., 2.],
           [1., 3., 1., 5.],
           [5., 0., 4., 1.],
           [3., 5., 0., 0.]], dtype=float32)
    Coordinates:
    * lon      (lon) float64 0.0 1.0 2.0 3.0
    * lat      (lat) float64 0.0 1.0 2.0 3.0
    """

    q = _quantile(agg, k)
    k_q = q.shape[0]
    if k_q < k:
        print("Quantile Warning: Not enough unique values"
              "for k classes (using {} bins)".format(k_q))
        k = k_q

    out = _bin(agg.data, bins=q, new_values=np.arange(k))

    return DataArray(out,
                     name=name,
                     dims=agg.dims,
                     coords=agg.coords,
                     attrs=agg.attrs)
Esempio n. 9
0
def natural_breaks(agg: xr.DataArray,
                   num_sample: Optional[int] = None,
                   name: Optional[str] = 'natural_breaks',
                   k: int = 5) -> xr.DataArray:
    """
    Groups data for array (agg) by distributing
    values using the Jenks Natural Breaks or k-means
    clustering method. Values are grouped so that
    similar values are placed in the same group and
    space between groups is maximized.
    The result is an xarray.DataArray.

    Parameters:
    ----------
    agg: xarray.DataArray
        2D array of values to bin.
        NumPy, CuPy, NumPy-backed Dask, or Cupy-backed Dask array
    num_sample: int (optional)
        Number of sample data points used to fit the model.
        Natural Breaks (Jenks) classification is indeed O(n²) complexity,
        where n is the total number of data points, i.e: agg.size
        When n is large, we should fit the model on a small sub-sample
        of the data instead of using the whole dataset.
    k: int (default = 5)
        Number of classes to be produced.
    name: str, optional (default = "natural_breaks")
        Name of output aggregate.

    Returns:
    ----------
    natural_breaks_agg: xarray.DataArray
        2D array, of the same type as the input, of class allocations.

    Algorithm References:
    ----------
    Map Classify:
    - https://pysal.org/mapclassify/_modules/mapclassify/classifiers.html#NaturalBreaks # noqa
    perrygeo:
    - https://github.com/perrygeo/jenks/blob/master/jenks.pyx

    Examples:
    ----------
    Imports
    >>> import numpy as np
    >>> import xarray as xr
    >>> from xrspatial.classify import natural_breaks

    Create DataArray
    >>> np.random.seed(0)
    >>> agg = xr.DataArray(np.random.rand(4,4),
                                    dims = ["lat", "lon"])
    >>> height, width = agg.shape
    >>> _lat = np.linspace(0, height - 1, height)
    >>> _lon = np.linspace(0, width - 1, width)
    >>> agg["lat"] = _lat
    >>> agg["lon"] = _lon
    >>> print(agg)
    <xarray.DataArray (lat: 4, lon: 4)>
    array([[0.5488135 , 0.71518937, 0.60276338, 0.54488318],
           [0.4236548 , 0.64589411, 0.43758721, 0.891773  ],
           [0.96366276, 0.38344152, 0.79172504, 0.52889492],
            [0.56804456, 0.92559664, 0.07103606, 0.0871293 ]])
    Coordinates:
    * lon      (lon) float64 0.0 1.0 2.0 3.0
    * lat      (lat) float64 0.0 1.0 2.0 3.0

    Create Natural Breaks Aggregate
    >>> natural_breaks_agg = natural_breaks(agg, k = 5)
    >>> print(natural_breaks_agg)
    <xarray.DataArray 'natural_breaks' (lat: 4, lon: 4)>
    array([[2., 3., 2., 2.],
           [1., 2., 1., 4.],
           [4., 1., 3., 2.],
           [2., 4., 0., 0.]], dtype=float32)
    Coordinates:
      * lat      (lat) float64 0.0 1.0 2.0 3.0
      * lon      (lon) float64 0.0 1.0 2.0 3.0
    """

    # numpy case
    if isinstance(agg.data, np.ndarray):
        out = _run_numpy_natural_break(agg.data, num_sample, k)

    # cupy case
    elif has_cuda() and isinstance(agg.data, cupy.ndarray):
        out = _run_cupy_natural_break(agg.data, num_sample, k)

    else:
        raise TypeError('Unsupported Array Type: {}'.format(type(agg.data)))

    return DataArray(out,
                     name=name,
                     coords=agg.coords,
                     dims=agg.dims,
                     attrs=agg.attrs)
Esempio n. 10
0
 def test_2d_before_squeeze(self):
     a = DataArray(easy_array((1, 5)))
     a.plot()
Esempio n. 11
0
def reclassify(agg: xr.DataArray,
               bins: List[int],
               new_values: List[int],
               name: Optional[str] = 'reclassify') -> xr.DataArray:
    """
    Reclassifies data for array (agg) into new values based on bins.

    Parameters:
    ----------
    agg: xarray.DataArray
        2D array of values to be reclassified.
        NumPy, CuPy, NumPy-backed Dask, or Cupy-backed Dask array.
    bins: array-like object
        Values or ranges of values to be changed.
    new_values: array-like object
        New values for each bin.
    name: str, optional (default = "reclassify")
        Name of output aggregate.

    Returns:
    ----------
    xarray.DataArray, reclassified aggregate.
        2D array of new values. All input attributes are preserved.

    Notes:
    ----------
    Adapted from PySal:
        - https://pysal.org/mapclassify/_modules/mapclassify/classifiers.html

    Examples:
    ----------
    Imports
    >>> import numpy as np
    >>> import xarray as xr
    >>> from xrspatial.classify import reclassify

    Create Initial DataArray
    >>> np.random.seed(1)
    >>> agg = xr.DataArray(np.random.randint(2, 8, (4, 4)),
    >>>                    dims = ["lat", "lon"])
    >>> height, width = agg.shape
    >>> _lon = np.linspace(0, width - 1, width)
    >>> _lat = np.linspace(0, height - 1, height)
    >>> agg["lon"] = _lon
    >>> agg["lat"] = _lat
    >>> print(agg)
    <xarray.DataArray (lat: 4, lon: 4)>
    array([[7, 5, 6, 2],
           [3, 5, 7, 2],
           [2, 3, 6, 7],
           [6, 3, 4, 6]])
    Coordinates:
      * lon      (lon) float64 0.0 1.0 2.0 3.0
      * lat      (lat) float64 0.0 1.0 2.0 3.0

    Reclassify
    >>> bins = list(range(2, 8))
    >>> new_val = list(range(20, 80, 10))
    >>> reclassify_agg = reclassify(agg, bins, new_val)
    >>> print(reclassify_agg)
    <xarray.DataArray 'reclassify' (lat: 4, lon: 4)>
    array([[70., 50., 60., 20.],
           [30., 50., 70., 20.],
           [20., 30., 60., 70.],
           [60., 30., 40., 60.]], dtype=float32)
    Coordinates:
      * lon      (lon) float64 0.0 1.0 2.0 3.0
      * lat      (lat) float64 0.0 1.0 2.0 3.0
    """

    if len(bins) != len(new_values):
        raise ValueError('bins and new_values mismatch.'
                         'Should have same length.')
    out = _bin(agg.data, bins, new_values)
    return DataArray(out,
                     name=name,
                     dims=agg.dims,
                     coords=agg.coords,
                     attrs=agg.attrs)
Esempio n. 12
0
 def setUp(self):
     self.darray = DataArray(easy_array((2, 3, 4)))
Esempio n. 13
0
 def test_nonnumeric_index_raises_typeerror(self):
     a = DataArray(easy_array((3, 2)), coords=[['a', 'b', 'c'], ['d', 'e']])
     with raises_regex(TypeError, r'[Pp]lot'):
         self.plotfunc(a)
Esempio n. 14
0
 def test_3d_raises_valueerror(self):
     a = DataArray(easy_array((2, 3, 4)))
     with raises_regex(ValueError, r'DataArray must be 2d'):
         self.plotfunc(a)
Esempio n. 15
0
def binary(agg, values):
    return DataArray(_binary(agg.data, values),
                     name='binary',
                     dims=agg.dims,
                     coords=agg.coords,
                     attrs=agg.attrs)
Esempio n. 16
0
def equal_interval(agg: xr.DataArray,
                   k: int = 5,
                   name: Optional[str] = 'equal_interval') -> xr.DataArray:
    """
    Groups data for array (agg) by distributing values into at equal intervals.
    The result is an xarray.DataArray.

    Parameters:
    ----------
    agg: xarray.DataArray
        2D array of values to bin.
        NumPy, CuPy, NumPy-backed Dask, or Cupy-backed Dask array
    k: int
        Number of classes to be produced.
    name: str, optional (default = "equal_interval")
        Name of output aggregate.

    Returns:
    ----------
    equal_interval_agg: xarray.DataArray
        2D array, of the same type as the input, of class allocations.

    Notes:
    ----------
    Intervals defined to have equal width:

    Algorithm References:
    ----------
    PySal:
    - https://pysal.org/mapclassify/_modules/mapclassify/classifiers.html#EqualInterval # noqa
    SciKit:
    - https://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html#sphx-glr-auto-examples-classification-plot-classifier-comparison-py # noqa

    Examples:
    ----------
    Imports
    >>> import numpy as np
    >>> import xarray as xr
    >>> from xrspatial.classify import equal_interval, natural_breaks

        Create Initial DataArray
    >>> np.random.seed(1)
    >>> agg = xr.DataArray(np.random.randint(2, 8, (4, 4)),
    >>>                    dims = ["lat", "lon"])
    >>> height, width = agg.shape
    >>> _lon = np.linspace(0, width - 1, width)
    >>> _lat = np.linspace(0, height - 1, height)
    >>> agg["lon"] = _lon
    >>> agg["lat"] = _lat
    >>> print(agg)
    <xarray.DataArray (lat: 4, lon: 4)>
    array([[7, 5, 6, 2],
           [3, 5, 7, 2],
           [2, 3, 6, 7],
           [6, 3, 4, 6]])
    Coordinates:
      * lon      (lon) float64 0.0 1.0 2.0 3.0
      * lat      (lat) float64 0.0 1.0 2.0 3.0

    Create Equal Interval DataArray
    >>> equal_interval_agg = equal_interval(agg, k = 5)
    >>> print(equal_interval_agg)
    <xarray.DataArray 'equal_interval' (lat: 4, lon: 4)>
    array([[4., 2., 3., 0.],
           [0., 2., 4., 0.],
           [0., 0., 3., 4.],
           [3., 0., 1., 3.]], dtype=float32)
    Coordinates:
      * lon      (lon) float64 0.0 1.0 2.0 3.0
      * lat      (lat) float64 0.0 1.0 2.0 3.0
    """

    # numpy case
    if isinstance(agg.data, np.ndarray):
        out = _run_numpy_equal_interval(agg.data, k)

    # cupy case
    elif has_cuda() and isinstance(agg.data, cupy.ndarray):
        out = _run_cupy_equal_interval(agg.data, k)

    # dask + cupy case
    elif has_cuda() and \
            isinstance(agg.data, cupy.ndarray) and \
            is_cupy_backed(agg):
        out = _run_dask_cupy_equal_interval(agg.data, k)

    # dask + numpy case
    elif isinstance(agg.data, da.Array):
        out = _run_dask_numpy_equal_interval(agg.data, k)

    else:
        raise TypeError('Unsupported Array Type: {}'.format(type(agg.data)))

    return DataArray(out,
                     name=name,
                     coords=agg.coords,
                     dims=agg.dims,
                     attrs=agg.attrs)
Esempio n. 17
0
def generate_terrain(canvas, seed=10, zfactor=4000, full_extent=None):
    """
    Generates a pseudo-random terrain which can be helpful for testing raster functions

    Parameters
    ----------
    canvas : ds.Canvas instance for passing output dimensions / ranges

    seed : seed for random number generator

    zfactor : used as multipler for z values

    full_extent : optional string, bbox<xmin, ymin, xmax, ymax>
      full extent of coordinate system.

    Returns
    -------
    terrain: DataArray

    Notes:
    ------
    Algorithm References:
     - This was inspired by Michael McHugh's 2016 PyCon Canada talk:
       https://www.youtube.com/watch?v=O33YV4ooHSo
     - https://www.redblobgames.com/maps/terrain-from-noise/
    """
    def _gen_heights(bumps):
        out = np.zeros(len(bumps))
        for i, b in enumerate(bumps):
            x = b[0]
            y = b[1]
            val = agg.data[y, x]
            if val >= 0.33 and val <= 3:
                out[i] = 0.1
        return out

    def _scale(value, old_range, new_range):
        return ((value - old_range[0]) /
                (old_range[1] - old_range[0])) * (new_range[1] -
                                                  new_range[0]) + new_range[0]

    if not isinstance(canvas, Canvas):
        raise TypeError('canvas must be instance type datashader.Canvas')

    mercator_extent = (-np.pi * 6378137, -np.pi * 6378137, np.pi * 6378137,
                       np.pi * 6378137)
    crs_extents = {'3857': mercator_extent}

    if isinstance(full_extent, str):
        full_extent = crs_extents[full_extent]

    elif full_extent is None:
        full_extent = (canvas.x_range[0], canvas.y_range[0], canvas.x_range[1],
                       canvas.y_range[1])

    elif not isinstance(full_extent, (list, tuple)) and len(full_extent) != 4:
        raise TypeError('full_extent must be tuple(4) or str wkid')

    full_xrange = (full_extent[0], full_extent[2])
    full_yrange = (full_extent[1], full_extent[3])

    x_range_scaled = (_scale(canvas.x_range[0], full_xrange, (0.0, 1.0)),
                      _scale(canvas.x_range[1], full_xrange, (0.0, 1.0)))

    y_range_scaled = (_scale(canvas.y_range[0], full_yrange, (0.0, 1.0)),
                      _scale(canvas.y_range[1], full_yrange, (0.0, 1.0)))

    data = _gen_terrain(canvas.plot_width,
                        canvas.plot_height,
                        seed,
                        x_range=x_range_scaled,
                        y_range=y_range_scaled)

    data = (data - np.min(data)) / np.ptp(data)
    data[data < 0.3] = 0  # create water
    data *= zfactor

    # DataArray coords were coming back different from cvs.points...
    hack_agg = canvas.points(pd.DataFrame({'x': [], 'y': []}), 'x', 'y')
    agg = DataArray(data,
                    name='terrain',
                    coords=hack_agg.coords,
                    dims=hack_agg.dims,
                    attrs={'res': 1})

    return agg
Esempio n. 18
0
 def test_download_from_github(self, tmp_path) -> None:
     cache_dir = tmp_path / tutorial._default_cache_dir_name
     ds = tutorial.open_dataset(self.testfile, cache_dir=cache_dir).load()
     tiny = DataArray(range(5), name="tiny").to_dataset()
     assert_identical(ds, tiny)
Esempio n. 19
0
def mean(agg, passes=1, excludes=[np.nan], name='mean'):
    """
    Returns Mean filtered array using a 3x3 window.
    Default behaviour to 'mean' is to exclude NaNs from calculations.

    Parameters
    ----------
    agg : xarray.DataArray
        2D array of input values to be filtered.
    passes : int, default=1
        Number of times to run mean.
    name : str, default='mean'
        Output xr.DataArray.name property.

    Returns
    -------
    mean_agg : xarray.DataArray of same type as `agg`
        2D aggregate array of filtered values.

    Examples
    --------
    Focal mean works with NumPy backed xarray DataArray
    .. sourcecode:: python

        >>> import numpy as np
        >>> import xarray as xr
        >>> from xrspatial.focal import mean
        >>> data = np.array([
            [0., 0., 0., 0., 0.],
            [0., 0., 0., 0., 0.],
            [0., 0., 9., 0., 0.],
            [0., 0., 0., 0., 0.],
            [0., 0., 0., 0., 0.]])
        >>> raster = xr.DataArray(data)
        >>> mean_agg = mean(raster)
        >>> print(mean_agg)
        <xarray.DataArray 'mean' (dim_0: 5, dim_1: 5)>
        array([[0., 0., 0., 0., 0.],
               [0., 1., 1., 1., 0.],
               [0., 1., 1., 1., 0.],
               [0., 1., 1., 1., 0.],
               [0., 0., 0., 0., 0.]])
        Dimensions without coordinates: dim_0, dim_1

    Focal mean works with Dask with NumPy backed xarray DataArray.
    Increase number of runs by setting a specific value for parameter `passes`
    .. sourcecode:: python

        >>> import dask.array as da
        >>> data_da = da.from_array(data, chunks=(3, 3))
        >>> raster_da = xr.DataArray(data_da, dims=['y', 'x'], name='raster_da')  # noqa
        >>> print(raster_da)
        <xarray.DataArray 'raster_da' (y: 5, x: 5)>
        dask.array<array, shape=(5, 5), dtype=int64, chunksize=(3, 3), chunktype=numpy.ndarray>  # noqa
        Dimensions without coordinates: y, x
        >>> mean_da = mean(raster_da, passes=2)
        >>> print(mean_da)
        <xarray.DataArray 'mean' (y: 5, x: 5)>
        dask.array<_trim, shape=(5, 5), dtype=float64, chunksize=(3, 3), chunktype=numpy.ndarray>  # noqa
        Dimensions without coordinates: y, x
        >>> print(mean_da.compute())
        <xarray.DataArray 'mean' (y: 5, x: 5)>
        array([[0.25      , 0.33333333, 0.5       , 0.33333333, 0.25      ],
               [0.33333333, 0.44444444, 0.66666667, 0.44444444, 0.33333333],
               [0.5       , 0.66666667, 1.        , 0.66666667, 0.5       ],
               [0.33333333, 0.44444444, 0.66666667, 0.44444444, 0.33333333],
               [0.25      , 0.33333333, 0.5       , 0.33333333, 0.25      ]])
        Dimensions without coordinates: y, x

    Focal mean works with CuPy backed xarray DataArray.
    In this example, we set `passes` to the number of elements of the array,
    we'll get a mean array where every element has the same value.
    .. sourcecode:: python

        >>> import cupy
        >>> raster_cupy = xr.DataArray(cupy.asarray(data), name='raster_cupy')
        >>> mean_cupy = mean(raster_cupy, passes=25)
        >>> print(type(mean_cupy.data))
        <class 'cupy.core.core.ndarray'>
        >>> print(mean_cupy)
        <xarray.DataArray 'mean' (dim_0: 5, dim_1: 5)>
        array([[0.47928995, 0.47928995, 0.47928995, 0.47928995, 0.47928995],
               [0.47928995, 0.47928995, 0.47928995, 0.47928995, 0.47928995],
               [0.47928995, 0.47928995, 0.47928995, 0.47928995, 0.47928995],
               [0.47928995, 0.47928995, 0.47928995, 0.47928995, 0.47928995],
               [0.47928995, 0.47928995, 0.47928995, 0.47928995, 0.47928995]])
        Dimensions without coordinates: dim_0, dim_1
    """

    out = agg.data.astype(float)
    for i in range(passes):
        out = _mean(out, tuple(excludes))

    return DataArray(out,
                     name=name,
                     dims=agg.dims,
                     coords=agg.coords,
                     attrs=agg.attrs)
Esempio n. 20
0
    def get_test_content(self, filename, filename_info, filetype_info):
        """Mimic reader input file content"""
        start_time = filename_info['start_time']
        end_time = filename_info['end_time'].replace(year=start_time.year,
                                                     month=start_time.month,
                                                     day=start_time.day)

        prefix1 = 'Data_Products/{file_group}'.format(**filetype_info)
        prefix2 = '{prefix}/{file_group}_Aggr'.format(prefix=prefix1,
                                                      **filetype_info)
        prefix3 = 'All_Data/{file_group}_All'.format(**filetype_info)
        begin_date = start_time.strftime('%Y%m%d')
        begin_time = start_time.strftime('%H%M%S.%fZ')
        ending_date = end_time.strftime('%Y%m%d')
        ending_time = end_time.strftime('%H%M%S.%fZ')
        if filename[:3] == 'SVI':
            geo_prefix = 'GIMGO'
        elif filename[:3] == 'SVM':
            geo_prefix = 'GMODO'
        else:
            geo_prefix = None
        file_content = {
            "{prefix2}/attr/AggregateBeginningDate":
            begin_date,
            "{prefix2}/attr/AggregateBeginningTime":
            begin_time,
            "{prefix2}/attr/AggregateEndingDate":
            ending_date,
            "{prefix2}/attr/AggregateEndingTime":
            ending_time,
            "{prefix2}/attr/G-Ring_Longitude":
            np.array([0.0, 0.1, 0.2, 0.3]),
            "{prefix2}/attr/G-Ring_Latitude":
            np.array([0.0, 0.1, 0.2, 0.3]),
            "{prefix2}/attr/AggregateBeginningOrbitNumber":
            "{0:d}".format(filename_info['orbit']),
            "{prefix2}/attr/AggregateEndingOrbitNumber":
            "{0:d}".format(filename_info['orbit']),
            "{prefix1}/attr/Instrument_Short_Name":
            "VIIRS",
            "/attr/Platform_Short_Name":
            "NPP",
        }
        if geo_prefix:
            file_content['/attr/N_GEO_Ref'] = geo_prefix + filename[5:]
        for k, v in list(file_content.items()):
            file_content[k.format(prefix1=prefix1, prefix2=prefix2)] = v

        if filename[:3] in ['SVM', 'SVI', 'SVD']:
            if filename[2:5] in ['M{:02d}'.format(x)
                                 for x in range(12)] + ['I01', 'I02', 'I03']:
                keys = ['Radiance', 'Reflectance']
            elif filename[2:5] in ['M{:02d}'.format(x)
                                   for x in range(12, 17)] + ['I04', 'I05']:
                keys = ['Radiance', 'BrightnessTemperature']
            else:
                # DNB
                keys = ['Radiance']

            for k in keys:
                k = prefix3 + "/" + k
                file_content[k] = DEFAULT_FILE_DATA.copy()
                file_content[k + "/shape"] = DEFAULT_FILE_SHAPE
                file_content[k + "Factors"] = DEFAULT_FILE_FACTORS.copy()
        elif filename[0] == 'G':
            if filename[:5] in ['GMODO', 'GIMGO']:
                lon_data = np.linspace(
                    15, 55, DEFAULT_FILE_SHAPE[1]).astype(DEFAULT_FILE_DTYPE)
                lat_data = np.linspace(
                    55, 75, DEFAULT_FILE_SHAPE[1]).astype(DEFAULT_FILE_DTYPE)
            else:
                lon_data = np.linspace(
                    5, 45, DEFAULT_FILE_SHAPE[1]).astype(DEFAULT_FILE_DTYPE)
                lat_data = np.linspace(
                    45, 65, DEFAULT_FILE_SHAPE[1]).astype(DEFAULT_FILE_DTYPE)

            for k in ["Latitude"]:
                k = prefix3 + "/" + k
                file_content[k] = lat_data
                file_content[k] = np.repeat([file_content[k]],
                                            DEFAULT_FILE_SHAPE[0],
                                            axis=0)
                file_content[k + "/shape"] = DEFAULT_FILE_SHAPE
            for k in ["Longitude"]:
                k = prefix3 + "/" + k
                file_content[k] = lon_data
                file_content[k] = np.repeat([file_content[k]],
                                            DEFAULT_FILE_SHAPE[0],
                                            axis=0)
                file_content[k + "/shape"] = DEFAULT_FILE_SHAPE

        # convert to xarrays
        from xarray import DataArray
        import dask.array as da
        for key, val in file_content.items():
            if isinstance(val, np.ndarray):
                val = da.from_array(val, chunks=val.shape)
                if val.ndim > 1:
                    file_content[key] = DataArray(val, dims=('y', 'x'))
                else:
                    file_content[key] = DataArray(val)

        return file_content
Esempio n. 21
0
def hotspots(raster, kernel):
    """
    Identify statistically significant hot spots and cold spots in an
    input raster. To be a statistically significant hot spot, a feature
    will have a high value and be surrounded by other features with
    high values as well.
    Neighborhood of a feature defined by the input kernel, which
    currently support a shape of circle, annulus, or custom kernel.

    The result should be a raster with the following 7 values:
        - 90 for 90% confidence high value cluster
        - 95 for 95% confidence high value cluster
        - 99 for 99% confidence high value cluster
        - 90 for 90% confidence low value cluster
        - 95 for 95% confidence low value cluster
        - 99 for 99% confidence low value cluster
        - 0 for no significance

    Parameters
    ----------
    raster : xarray.DataArray
        2D Input raster image with `raster.shape` = (height, width).
        Can be a NumPy backed, CuPy backed, or Dask with NumPy backed DataArray
    kernel : Numpy Array
        2D array where values of 1 indicate the kernel.

    Returns
    -------
    hotspots_agg : xarray.DataArray of same type as `raster`
        2D array of hotspots with values indicating confidence level.

    Examples
    --------
    .. sourcecode:: python

        >>> import numpy as np
        >>> import xarray as xr
        >>> from xrspatial.convolution import custom_kernel
        >>> kernel = custom_kernel(np.array([[1, 1, 0]]))
        >>> data = np.array([
        ...    [0, 1000, 1000, 0, 0, 0],
        ...    [0, 0, 0, -1000, -1000, 0],
        ...    [0, -900, -900, 0, 0, 0],
        ...    [0, 100, 1000, 0, 0, 0]])
        >>> from xrspatial.focal import hotspots
        >>> hotspots(xr.DataArray(data), kernel)
        array([[  0,   0,  95,   0,   0,   0],
               [  0,   0,   0,   0, -90,   0],
               [  0,   0, -90,   0,   0,   0],
               [  0,   0,   0,   0,   0,   0]], dtype=int8)
        Dimensions without coordinates: dim_0, dim_1
    """

    # validate raster
    if not isinstance(raster, DataArray):
        raise TypeError("`raster` must be instance of DataArray")

    if raster.ndim != 2:
        raise ValueError("`raster` must be 2D")

    mapper = ArrayTypeFunctionMapping(
        numpy_func=_hotspots_numpy,
        cupy_func=_hotspots_cupy,
        dask_func=_hotspots_dask_numpy,
        dask_cupy_func=lambda *args: not_implemented_func(
            *args,
            messages=
            'hotspots() does not support dask with cupy backed DataArray.'
        ),  # noqa
    )
    out = mapper(raster)(raster, kernel)

    attrs = copy.deepcopy(raster.attrs)
    attrs['unit'] = '%'

    return DataArray(out, coords=raster.coords, dims=raster.dims, attrs=attrs)
Esempio n. 22
0
def from_series_or_scalar(se):
    try:
        return DataArray.from_series(se)
    except AttributeError:  # scalar case
        return DataArray(se)
Esempio n. 23
0
    def raster(self,
               source,
               band=1,
               upsample_method='linear',
               downsample_method='mean'):
        """Sample a raster dataset by canvas size and bounds. Note: requires
        `rasterio`. Missing values (those having the value indicated by the
        "nodata" attribute of the raster) are replaced with `NaN` if floats, and
        0 if int.

        Parameters
        ----------
        source : xarray.DataArray
            input datasource most likely obtain from `xr.open_rasterio()`.
        band : int (unused)
            source band number : optional default=1. Not yet implemented.
        upsample_method : str, optional default=linear
            resample mode when upsampling raster.
            options include: nearest, linear.
        downsample_method : str, optional default=mean
            resample mode when downsampling raster.
            options include: first, last, mean, mode, var, std

        Returns
        -------
        data : xarray.Dataset

        Notes
        -------
        requires `rasterio`.
        """
        try:
            import rasterio as rio
        except ImportError:
            raise ImportError('install rasterio to use this feature')

        upsample_methods = dict(nearest=US_NEAREST, linear=US_LINEAR)

        downsample_methods = dict(first=DS_FIRST,
                                  last=DS_LAST,
                                  mean=DS_MEAN,
                                  mode=DS_MODE,
                                  var=DS_VAR,
                                  std=DS_STD)

        if upsample_method not in upsample_methods.keys():
            raise ValueError(
                'Invalid upsample method: options include {}'.format(
                    list(upsample_methods.keys())))
        if downsample_method not in downsample_methods.keys():
            raise ValueError(
                'Invalid downsample method: options include {}'.format(
                    list(downsample_methods.keys())))

        res = calc_res(source)
        left, bottom, right, top = calc_bbox(source.x.values, source.y.values,
                                             res)

        # window coodinates
        xmin = max(self.x_range[0], left)
        ymin = max(self.y_range[0], bottom)
        xmax = min(self.x_range[1], right)
        ymax = min(self.y_range[1], top)

        width_ratio = (xmax - xmin) / (self.x_range[1] - self.x_range[0])
        height_ratio = (ymax - ymin) / (self.y_range[1] - self.y_range[0])

        if np.isclose(width_ratio, 0) or np.isclose(height_ratio, 0):
            raise ValueError(
                'Canvas x_range or y_range values do not match closely-enough with the data source to be able to accurately rasterize. Please provide ranges that are more accurate.'
            )

        w = int(np.ceil(self.plot_width * width_ratio))
        h = int(np.ceil(self.plot_height * height_ratio))

        data = resample_2d(source.values[0].astype(np.float32),
                           w,
                           h,
                           ds_method=downsample_methods[downsample_method],
                           us_method=upsample_methods[upsample_method])

        if w != self.plot_width or h != self.plot_height:
            num_height = self.plot_height - h
            num_width = self.plot_width - w

            lpad = xmin - self.x_range[0]
            rpad = self.x_range[1] - xmax
            lpct = lpad / (lpad + rpad) if lpad + rpad > 0 else 0
            left = int(np.ceil(num_width * lpct))
            right = num_width - left
            left_pad = np.empty(shape=(self.plot_height, left)).astype(
                source.dtype) * np.nan
            right_pad = np.empty(shape=(self.plot_height, right)).astype(
                source.dtype) * np.nan

            tpad = ymin - self.y_range[0]
            bpad = self.y_range[1] - ymax
            tpct = tpad / (tpad + bpad) if tpad + bpad > 0 else 0
            top = int(np.ceil(num_height * tpct))
            bottom = num_height - top
            top_pad = np.empty(shape=(top, w)).astype(source.dtype) * np.nan
            bottom_pad = np.empty(shape=(bottom, w)).astype(
                source.dtype) * np.nan

            data = np.concatenate((bottom_pad, data, top_pad), axis=0)
            data = np.concatenate((left_pad, data, right_pad), axis=1)

        data = np.flipud(data)
        attrs = dict(res=res[0], nodata=source._file_obj.nodata)
        return DataArray(data, dims=['x', 'y'], attrs=attrs)
Esempio n. 24
0
def test_cftime_datetime_mean_dask_error():
    times = cftime_range('2000', periods=4)
    da = DataArray(times, dims=['time']).chunk()
    with pytest.raises(NotImplementedError):
        da.mean()
Esempio n. 25
0
 def test_download_from_github(self):
     ds = tutorial.open_dataset(self.testfile).load()
     tiny = DataArray(range(5), name="tiny").to_dataset()
     assert_identical(ds, tiny)
Esempio n. 26
0
def bump(width: int,
         height: int,
         count: Optional[int] = None,
         height_func=None,
         spread: int = 1) -> xr.DataArray:
    """
    Generate a simple bump map to simulate the appearance of land
    features.

    Using a user-defined height function, determines at what elevation
    a specific bump height is acceptable. Bumps of number `count` are
    applied over the area `width` x `height`.

    Parameters
    ----------
    width : int
        Total width, in pixels, of the image.
    height : int
        Total height, in pixels, of the image.
    count : int
        Number of bumps to generate.
    height_func : function which takes x, y and returns a height value
        Function used to apply varying bump heights to different
        elevations.
    spread : int, default=1
        Number of pixels to spread on all sides.

    Returns
    -------
    bump_agg : xarray.DataArray
        2D aggregate array of calculated bump heights.

    References
    ----------
        - ICA: http://www.mountaincartography.org/mt_hood/pdfs/nighbert_bump1.pdf # noqa

    Examples
    --------
    .. plot::
       :include-source:

        from functools import partial

        import matplotlib.pyplot as plt
        import numpy as np
        import xarray as xr

        from xrspatial import generate_terrain, bump


        # Generate Example Terrain
        W = 500
        H = 300

        template_terrain = xr.DataArray(np.zeros((H, W)))
        x_range=(-20e6, 20e6)
        y_range=(-20e6, 20e6)

        terrain_agg = generate_terrain(
            template_terrain, x_range=x_range, y_range=y_range
        )

        # Edit Attributes
        terrain_agg = terrain_agg.assign_attrs(
            {
                'Description': 'Example Terrain',
                'units': 'km',
                'Max Elevation': '4000',
            }
        )

        terrain_agg = terrain_agg.rename({'x': 'lon', 'y': 'lat'})
        terrain_agg = terrain_agg.rename('Elevation')

        # Create Height Function
        def heights(locations, src, src_range, height = 20):
            num_bumps = locations.shape[0]
            out = np.zeros(num_bumps, dtype = np.uint16)
            for r in range(0, num_bumps):
                loc = locations[r]
                x = loc[0]
                y = loc[1]
                val = src[y, x]
                if val >= src_range[0] and val < src_range[1]:
                    out[r] = height
            return out

        # Create Bump Map Aggregate Array
        bump_count = 10000
        src = terrain_agg.data

        # Short Bumps from z = 1000 to z = 1300
        bump_agg = bump(width = W, height = H, count = bump_count,
                        height_func = partial(heights, src = src,
                                            src_range = (1000, 1300),
                                            height = 5))

        # Tall Bumps from z = 1300 to z = 1700
        bump_agg += bump(width = W, height = H, count = bump_count // 2,
                        height_func = partial(heights, src = src,
                                            src_range = (1300, 1700),
                                            height=20))

        # Short Bumps from z = 1700 to z = 2000
        bump_agg += bump(width = W, height = H, count = bump_count // 3,
                        height_func = partial(heights, src = src,
                                            src_range = (1700, 2000),
                                            height=5))
        # Edit Attributes
        bump_agg = bump_agg.assign_attrs({'Description': 'Example Bump Map',
                                          'units': 'km'})

        bump_agg = bump_agg.rename('Bump Height')

        # Rename Coordinates
        bump_agg = bump_agg.assign_coords({'x': terrain_agg.coords['lon'].data,
                                           'y': terrain_agg.coords['lat'].data})

        # Remove zeros
        bump_agg.data[bump_agg.data == 0] = np.nan

        # Plot Terrain
        terrain_agg.plot(cmap = 'terrain', aspect = 2, size = 4)
        plt.title("Terrain")
        plt.ylabel("latitude")
        plt.xlabel("longitude")

        # Plot Bump Map
        bump_agg.plot(cmap = 'summer', aspect = 2, size = 4)
        plt.title("Bump Map")
        plt.ylabel("latitude")
        plt.xlabel("longitude")

    .. sourcecode:: python

        >>> print(terrain_agg[200:203, 200:202])
        <xarray.DataArray 'Elevation' (lat: 3, lon: 2)>
        array([[1264.02296597, 1261.947921  ],
               [1285.37105519, 1282.48079719],
               [1306.02339636, 1303.4069579 ]])
        Coordinates:
        * lon      (lon) float64 -3.96e+06 -3.88e+06
        * lat      (lat) float64 6.733e+06 6.867e+06 7e+06
        Attributes:
            res:            (80000.0, 133333.3333333333)
            Description:    Example Terrain
            units:          km
            Max Elevation:  4000

    .. sourcecode:: python

        >>> print(bump_agg[200:205, 200:206])
        <xarray.DataArray 'Bump Height' (y: 5, x: 6)>
        array([[nan, nan, nan, nan,  5.,  5.],
               [nan, nan, nan, nan, nan,  5.],
               [nan, nan, nan, nan, nan, nan],
               [nan, nan, nan, nan, nan, nan],
               [nan, nan, nan, nan, nan, nan]])
        Coordinates:
        * x        (x) float64 -3.96e+06 -3.88e+06 -3.8e+06 ... -3.64e+06 -3.56e+06
        * y        (y) float64 6.733e+06 6.867e+06 7e+06 7.133e+06 7.267e+06
        Attributes:
            res:          1
            Description:  Example Bump Map
            units:        km
    """
    linx = range(width)
    liny = range(height)

    if count is None:
        count = width * height // 10

    if height_func is None:
        height_func = lambda bumps: np.ones(len(bumps))  # noqa

    # create 2d array of random x, y for bump locations
    locs = np.empty((count, 2), dtype=np.uint16)
    locs[:, 0] = np.random.choice(linx, count)
    locs[:, 1] = np.random.choice(liny, count)

    heights = height_func(locs)

    bumps = _finish_bump(width, height, locs, heights, spread)
    return DataArray(bumps, dims=['y', 'x'], attrs=dict(res=1))
Esempio n. 27
0
    def get_test_content(self, filename, filename_info, filetype_info):
        """Mimic reader input file content"""
        file_content = {
            '/attr/time_coverage_start':
            filename_info['start_time'].strftime('%Y-%m-%dT%H:%M:%S.%fZ'),
            '/attr/time_coverage_end':
            filename_info['end_time'].strftime('%Y-%m-%dT%H:%M:%S.%fZ'),
            '/attr/start_orbit_number':
            1,
            '/attr/end_orbit_number':
            2,
            '/attr/platform_name':
            'NPP',
            '/attr/instrument_name':
            'CrIS, ATMS, VIIRS',
        }
        for k, units, standard_name in [
            ('Solar_Zenith', 'degrees', 'solar_zenith_angle'),
            ('Topography', 'meters', ''),
            ('Land_Fraction', '1', ''),
            ('Surface_Pressure', 'mb', ''),
            ('Skin_Temperature', 'Kelvin', 'surface_temperature'),
        ]:
            file_content[k] = DEFAULT_FILE_DATA
            file_content[k + '/shape'] = DEFAULT_FILE_SHAPE
            file_content[k + '/attr/units'] = units
            file_content[k + '/attr/valid_range'] = (0., 120.)
            file_content[k + '/attr/_FillValue'] = -9999.
            if standard_name:
                file_content[k + '/attr/standard_name'] = standard_name
        for k, units, standard_name in [
            ('Temperature', 'Kelvin', 'air_temperature'),
            ('H2O', '1', ''),
            ('H2O_MR', 'g/g', ''),
            ('O3', '1', ''),
            ('O3_MR', '1', ''),
            ('Liquid_H2O', '1', ''),
            ('Liquid_H2O_MR', 'g/g', 'cloud_liquid_water_mixing_ratio'),
            ('CO', '1', ''),
            ('CO_MR', '1', ''),
            ('CH4', '1', ''),
            ('CH4_MR', '1', ''),
            ('CO2', '1', ''),
            ('HNO3', '1', ''),
            ('HNO3_MR', '1', ''),
            ('N2O', '1', ''),
            ('N2O_MR', '1', ''),
            ('SO2', '1', ''),
            ('SO2_MR', '1', ''),
        ]:
            file_content[k] = DEFAULT_PRES_FILE_DATA
            file_content[k + '/shape'] = DEFAULT_PRES_FILE_SHAPE
            file_content[k + '/attr/units'] = units
            file_content[k + '/attr/valid_range'] = (0., 120.)
            file_content[k + '/attr/_FillValue'] = -9999.
            if standard_name:
                file_content[k + '/attr/standard_name'] = standard_name
        k = 'Pressure'
        file_content[k] = ALL_PRESSURE_LEVELS
        file_content[k + '/shape'] = DEFAULT_PRES_FILE_SHAPE
        file_content[k + '/attr/units'] = 'mb'
        file_content[k + '/attr/valid_range'] = (0., 2000.)
        file_content[k + '/attr/_FillValue'] = -9999.

        k = 'Quality_Flag'
        file_content[k] = DEFAULT_FILE_DATA.astype(np.int32)
        file_content[k + '/shape'] = DEFAULT_FILE_SHAPE
        file_content[k + '/attr/valid_range'] = (0, 31)
        file_content[k + '/attr/_FillValue'] = -9999.

        k = 'Longitude'
        file_content[k] = DEFAULT_LON_DATA
        file_content[k + '/shape'] = DEFAULT_FILE_SHAPE
        file_content[k + '/attr/units'] = 'degrees_east'
        file_content[k + '/attr/valid_range'] = (-180., 180.)
        file_content[k + '/attr/standard_name'] = 'longitude'
        file_content[k + '/attr/_FillValue'] = -9999.

        k = 'Latitude'
        file_content[k] = DEFAULT_LAT_DATA
        file_content[k + '/shape'] = DEFAULT_FILE_SHAPE
        file_content[k + '/attr/units'] = 'degrees_north'
        file_content[k + '/attr/valid_range'] = (-90., 90.)
        file_content[k + '/attr/standard_name'] = 'latitude'
        file_content[k + '/attr/_FillValue'] = -9999.

        # convert to xarrays
        from xarray import DataArray
        for key, val in file_content.items():
            if isinstance(val, np.ndarray):
                attrs = {}
                for a in [
                        '_FillValue', 'flag_meanings', 'flag_values', 'units'
                ]:
                    if key + '/attr/' + a in file_content:
                        attrs[a] = file_content[key + '/attr/' + a]
                if val.ndim == 1:
                    file_content[key] = DataArray(val,
                                                  dims=('number_of_FORs', ),
                                                  attrs=attrs)
                elif val.ndim > 1:
                    file_content[key] = DataArray(val,
                                                  dims=('number_of_FORs',
                                                        'number_of_p_levels'),
                                                  attrs=attrs)
                else:
                    file_content[key] = DataArray(val, attrs=attrs)

        return file_content
Esempio n. 28
0
    def raster(
            self,
            source,
            layer=None,
            upsample_method='linear',  # Deprecated as of datashader=0.6.4
            downsample_method=rd.mean(),  # Deprecated as of datashader=0.6.4
            nan_value=None,
            agg=None,
            interpolate=None,
            chunksize=None,
            max_mem=None):
        """Sample a raster dataset by canvas size and bounds.

        Handles 2D or 3D xarray DataArrays, assuming that the last two
        array dimensions are the y- and x-axis that are to be
        resampled. If a 3D array is supplied a layer may be specified
        to resample to select the layer along the first dimension to
        resample.

        Missing values (those having the value indicated by the
        "nodata" attribute of the raster) are replaced with `NaN` if
        floats, and 0 if int.

        Also supports resampling out-of-core DataArrays backed by dask
        Arrays. By default it will try to maintain the same chunksize
        in the output array but a custom chunksize may be provided.
        If there are memory constraints they may be defined using the
        max_mem parameter, which determines how large the chunks in
        memory may be.

        Parameters
        ----------
        source : xarray.DataArray or xr.Dataset
            2D or 3D labelled array (if Dataset, the agg reduction must
            define the data variable).
        layer : float
            For a 3D array, value along the z dimension : optional default=None
        ds_method : str (optional)
            Grid cell aggregation method for a possible downsampling.
        us_method : str (optional)
            Grid cell interpolation method for a possible upsampling.
        nan_value : int or float, optional
            Optional nan_value which will be masked out when applying
            the resampling.
        agg : Reduction, optional default=mean()
            Resampling mode when downsampling raster.
            options include: first, last, mean, mode, var, std, min, max
            Accepts an executable function, function object, or string name.
        interpolate : str, optional  default=linear
            Resampling mode when upsampling raster.
            options include: nearest, linear.
        chunksize : tuple(int, int) (optional)
            Size of the output chunks. By default this the chunk size is
            inherited from the *src* array.
        max_mem : int (optional)
            The maximum number of bytes that should be loaded into memory
            during the regridding operation.

        Returns
        -------
        data : xarray.Dataset
        """
        # For backwards compatibility
        if agg is None: agg = downsample_method
        if interpolate is None: interpolate = upsample_method

        upsample_methods = ['nearest', 'linear']

        downsample_methods = {
            'first': 'first',
            rd.first: 'first',
            'last': 'last',
            rd.last: 'last',
            'mode': 'mode',
            rd.mode: 'mode',
            'mean': 'mean',
            rd.mean: 'mean',
            'var': 'var',
            rd.var: 'var',
            'std': 'std',
            rd.std: 'std',
            'min': 'min',
            rd.min: 'min',
            'max': 'max',
            rd.max: 'max'
        }

        if interpolate not in upsample_methods:
            raise ValueError(
                'Invalid interpolate method: options include {}'.format(
                    upsample_methods))

        if not isinstance(source, (DataArray, Dataset)):
            raise ValueError('Expected xarray DataArray or Dataset as '
                             'the data source, found %s.' %
                             type(source).__name__)

        column = None
        if isinstance(agg, rd.Reduction):
            agg, column = type(agg), agg.column
            if (isinstance(source, DataArray) and column is not None
                    and source.name != column):
                agg_repr = '%s(%r)' % (agg.__name__, column)
                raise ValueError('DataArray name %r does not match '
                                 'supplied reduction %s.' %
                                 (source.name, agg_repr))

        if isinstance(source, Dataset):
            data_vars = list(source.data_vars)
            if column is None:
                raise ValueError('When supplying a Dataset the agg reduction '
                                 'must specify the variable to aggregate. '
                                 'Available data_vars include: %r.' %
                                 data_vars)
            elif column not in source.data_vars:
                raise KeyError('Supplied reduction column %r not found '
                               'in Dataset, expected one of the following '
                               'data variables: %r.' % (column, data_vars))
            source = source[column]

        if agg not in downsample_methods.keys():
            raise ValueError(
                'Invalid aggregation method: options include {}'.format(
                    list(downsample_methods.keys())))
        ds_method = downsample_methods[agg]

        if source.ndim not in [2, 3]:
            raise ValueError('Raster aggregation expects a 2D or 3D '
                             'DataArray, found %s dimensions' % source.ndim)

        res = calc_res(source)
        ydim, xdim = source.dims[-2:]
        xvals, yvals = source[xdim].values, source[ydim].values
        left, bottom, right, top = calc_bbox(xvals, yvals, res)
        if layer is not None:
            source = source.sel(**{source.dims[0]: layer})
        array = orient_array(source, res)
        dtype = array.dtype

        if nan_value is not None:
            mask = array == nan_value
            array = np.ma.masked_array(array, mask=mask, fill_value=nan_value)
            fill_value = nan_value
        else:
            fill_value = np.NaN

        if self.x_range is None: self.x_range = (left, right)
        if self.y_range is None: self.y_range = (bottom, top)

        # window coordinates
        xmin = max(self.x_range[0], left)
        ymin = max(self.y_range[0], bottom)
        xmax = min(self.x_range[1], right)
        ymax = min(self.y_range[1], top)

        width_ratio = min((xmax - xmin) / (self.x_range[1] - self.x_range[0]),
                          1)
        height_ratio = min((ymax - ymin) / (self.y_range[1] - self.y_range[0]),
                           1)

        if np.isclose(width_ratio, 0) or np.isclose(height_ratio, 0):
            raise ValueError(
                'Canvas x_range or y_range values do not match closely enough with the data source to be able to accurately rasterize. Please provide ranges that are more accurate.'
            )

        w = max(int(round(self.plot_width * width_ratio)), 1)
        h = max(int(round(self.plot_height * height_ratio)), 1)
        cmin, cmax = get_indices(xmin, xmax, xvals, res[0])
        rmin, rmax = get_indices(ymin, ymax, yvals, res[1])

        kwargs = dict(w=w,
                      h=h,
                      ds_method=ds_method,
                      us_method=interpolate,
                      fill_value=fill_value)
        if array.ndim == 2:
            source_window = array[rmin:rmax + 1, cmin:cmax + 1]
            if ds_method in ['var', 'std']:
                source_window = source_window.astype('f')
            if isinstance(source_window, da.Array):
                data = resample_2d_distributed(source_window,
                                               chunksize=chunksize,
                                               max_mem=max_mem,
                                               **kwargs)
            else:
                data = resample_2d(source_window, **kwargs)
            layers = 1
        else:
            source_window = array[:, rmin:rmax + 1, cmin:cmax + 1]
            if ds_method in ['var', 'std']:
                source_window = source_window.astype('f')
            arrays = []
            for arr in source_window:
                if isinstance(arr, da.Array):
                    arr = resample_2d_distributed(arr,
                                                  chunksize=chunksize,
                                                  max_mem=max_mem,
                                                  **kwargs)
                else:
                    arr = resample_2d(arr, **kwargs)
                arrays.append(arr)
            data = np.dstack(arrays)
            layers = len(arrays)

        if w != self.plot_width or h != self.plot_height:
            num_height = self.plot_height - h
            num_width = self.plot_width - w

            lpad = xmin - self.x_range[0]
            rpad = self.x_range[1] - xmax
            lpct = lpad / (lpad + rpad) if lpad + rpad > 0 else 0
            left = max(int(np.ceil(num_width * lpct)), 0)
            right = max(num_width - left, 0)
            lshape, rshape = (self.plot_height, left), (self.plot_height,
                                                        right)
            if layers > 1:
                lshape, rshape = lshape + (layers, ), rshape + (layers, )
            left_pad = np.full(lshape, fill_value, source_window.dtype)
            right_pad = np.full(rshape, fill_value, source_window.dtype)

            tpad = ymin - self.y_range[0]
            bpad = self.y_range[1] - ymax
            tpct = tpad / (tpad + bpad) if tpad + bpad > 0 else 0
            top = max(int(np.ceil(num_height * tpct)), 0)
            bottom = max(num_height - top, 0)
            tshape, bshape = (top, w), (bottom, w)
            if layers > 1:
                tshape, bshape = tshape + (layers, ), bshape + (layers, )
            top_pad = np.full(tshape, fill_value, source_window.dtype)
            bottom_pad = np.full(bshape, fill_value, source_window.dtype)

            concat = da.concatenate if isinstance(data,
                                                  da.Array) else np.concatenate
            if top_pad.shape[0] > 0:
                data = concat((top_pad, data, bottom_pad), axis=0)
            if left_pad.shape[1] > 0:
                data = concat((left_pad, data, right_pad), axis=1)

        # Reorient array to original orientation
        if res[1] > 0: data = data[::-1]
        if res[0] < 0: data = data[:, ::-1]

        # Restore nan_value from masked array
        if nan_value is not None:
            data = data.filled()

        # Restore original dtype
        if dtype != data.dtype:
            data = data.astype(dtype)

        # Compute DataArray metadata
        xs, ys = compute_coords(self.plot_width, self.plot_height,
                                self.x_range, self.y_range, res)
        coords = {xdim: xs, ydim: ys}
        dims = [ydim, xdim]
        attrs = dict(res=res[0])
        if source._file_obj is not None and hasattr(source._file_obj,
                                                    'nodata'):
            attrs['nodata'] = source._file_obj.nodata

        # Handle DataArray with layers
        if data.ndim == 3:
            data = data.transpose([2, 0, 1])
            layer_dim = source.dims[0]
            coords[layer_dim] = source.coords[layer_dim]
            dims = [layer_dim] + dims
        return DataArray(data, coords=coords, dims=dims, attrs=attrs)
Esempio n. 29
0
def manhattan(data, colora="#5689AC", colorb="#21334F", 
              anno_pv_max=None, pts_kws=None, ax=None):
    """
    Produce a manhattan plot.

    Parameters
    ----------
    data : DataFrame, dict
        DataFrame containing the chromosome, base-pair positions, and
        p-values.
    colora : matplotlib color
        Points color of the first group.
    colorb : matplotlib color
        Points color of the second group.
    anno_pv_max : float
        Threshold of maximum p value for annotating data['id'] in the plot.
    pts_kws : dict, optional
        Keyword arguments forwarded to the matplotlib function used for
        plotting the points.
    ax : matplotlib Axes, optional
        The target handle for this figure. If ``None``, the current axes is
        set.

    Example
    -------
    .. plot::

        >>> import limix_plot as lp
        >>> from numpy import log10
        >>>
        >>> df = lp.load_dataset('gwas')
        >>> df = df.rename(columns={"chr": "chrom"})
        >>> print(df.head())
            chrom     pos       pv
        234    10  224239  0.00887
        239    10  229681  0.00848
        253    10  240788  0.00721
        258    10  246933  0.00568
        266    10  255222  0.00593
        >>> lp.manhattan(df)
        >>> plt = lp.get_pyplot()
        >>> _ = plt.axhline(-log10(1e-7), color='red')
        >>> _ = plt.ylim(2, plt.ylim()[1])
    """
    from numpy import log10, unique, where
    from xarray import DataArray
    import pandas as pd

    plt = get_pyplot()

    if isinstance(data, pd.DataFrame):
        data = DataArray(
            data=data["pv"],
            dims=["candidate"],
            coords={k: ("candidate", data[k]) for k in data.columns},
        )
    else:
        data = DataArray(data=data)

    if len(data) == 0:
        raise ValueError("DataFrame is empty.")

    if pts_kws is None:
        pts_kws = dict()

    ax = plt.gca() if ax is None else ax

    data["chrom"] = data["chrom"].astype(str)
    data["pos"] = data["pos"].astype(int)
    chr_order = _chr_precedence(data)
    data["order"] = ("candidate", [chr_order[i] for i in data["chrom"].values])

    data = data.sortby(["order", "pos"])

    data = _abs_pos(data)

    if "markersize" not in pts_kws:
        pts_kws["markersize"] = 2
    if "marker" not in pts_kws:
        pts_kws["marker"] = "."
    if "linestyle" not in pts_kws:
        pts_kws["linestyle"] = ""

    colors = {0: colora, 1: colorb}

    for i, c in enumerate(unique(data["order"])):
        ok = data["order"] == c
        pts_kws["color"] = colors[i % 2]
        x = data.loc[ok]["abs_pos"]
        y = -log10(data.loc[ok].values)
        ax.plot(x, y, **pts_kws)
        
        if anno_pv_max is not None:
            _idx = where(y > -log10(anno_pv_max))[0]
            for _ii in _idx:
                if 'id' in data.coords:
                    _txt = data['id'].loc[ok].loc[_ii].values
                else:
                    _txt = (str(data['chrom'].loc[ok].loc[_ii].values) + "_" + 
                            str(data['pos'].loc[ok].loc[_ii].values))
                ax.annotate(_txt, (x[_ii], y[_ii]), ha='center')

    ax.set_xlim(data["abs_pos"].min(), data["abs_pos"].max())
    ax.set_ylim(0, ax.get_ylim()[1])

    ax.set_ylabel("-log$_{10}$pv")
    ax.set_xlabel("chromosome")

    u = unique(data["chrom"].values)
    chrom_labels = sorted(u, key=lambda x: chr_order[x])
    _set_ticks(ax, _chrom_bounds(data), chrom_labels)
Esempio n. 30
0
 def test_nonnumeric_index_raises_typeerror(self):
     a = DataArray([1, 2, 3], {'letter': ['a', 'b', 'c']}, dims='letter')
     with raises_regex(TypeError, r'[Pp]lot'):
         a.plot.line()