Example #1
0
def test_dataset_join():
    import numpy as np
    ds0 = xr.Dataset({'a': ('x', [1, 2]), 'x': [0, 1]})
    ds1 = xr.Dataset({'a': ('x', [99, 3]), 'x': [1, 2]})

    with pytest.raises(TypeError):
        apply_ufunc(operator.add, ds0, ds1, dataset_join='outer')

    def add(a, b, join, dataset_join):
        return apply_ufunc(operator.add, a, b, join=join,
                           dataset_join=dataset_join,
                           dataset_fill_value=np.nan)

    actual = add(ds0, ds1, 'outer', 'inner')
    expected = xr.Dataset({'a': ('x', [np.nan, 101, np.nan]),
                           'x': [0, 1, 2]})
    assert_identical(actual, expected)

    actual = add(ds0, ds1, 'outer', 'outer')
    assert_identical(actual, expected)

    # if variables don't match, join will perform add with np.nan
    ds2 = xr.Dataset({'b': ('x', [99, 3]), 'x': [1, 2]})
    actual = add(ds0, ds2, 'outer', 'inner')
    expected = xr.Dataset({'x': [0, 1, 2]})
    assert_identical(actual, expected)

    actual = add(ds0, ds2, 'outer', 'outer')
    expected = xr.Dataset({'a': ('x', [np.nan, np.nan, np.nan]),
                           'b': ('x', [np.nan, np.nan, np.nan]),
                           'x': [0, 1, 2]})
    assert_identical(actual, expected)
Example #2
0
def test_apply_dask_multiple_inputs():
    import dask.array as da

    def covariance(x, y):
        return ((x - x.mean(axis=-1, keepdims=True)) *
                (y - y.mean(axis=-1, keepdims=True))).mean(axis=-1)

    rs = np.random.RandomState(42)
    array1 = da.from_array(rs.randn(4, 4), chunks=(2, 4))
    array2 = da.from_array(rs.randn(4, 4), chunks=(2, 4))
    data_array_1 = xr.DataArray(array1, dims=('x', 'z'))
    data_array_2 = xr.DataArray(array2, dims=('y', 'z'))

    expected = apply_ufunc(
        covariance, data_array_1.compute(), data_array_2.compute(),
        input_core_dims=[['z'], ['z']])
    allowed = apply_ufunc(
        covariance, data_array_1, data_array_2, input_core_dims=[['z'], ['z']],
        dask='allowed')
    assert isinstance(allowed.data, da.Array)
    xr.testing.assert_allclose(expected, allowed.compute())

    parallelized = apply_ufunc(
        covariance, data_array_1, data_array_2, input_core_dims=[['z'], ['z']],
        dask='parallelized', output_dtypes=[float])
    assert isinstance(parallelized.data, da.Array)
    xr.testing.assert_allclose(expected, parallelized.compute())
Example #3
0
def test_apply_exclude():

    def concatenate(objects, dim='x'):
        def func(*x):
            return np.concatenate(x, axis=-1)
        result = apply_ufunc(func, *objects,
                             input_core_dims=[[dim]] * len(objects),
                             output_core_dims=[[dim]],
                             exclude_dims={dim})
        if isinstance(result, (xr.Dataset, xr.DataArray)):
            # note: this will fail if dim is not a coordinate on any input
            new_coord = np.concatenate([obj.coords[dim] for obj in objects])
            result.coords[dim] = new_coord
        return result

    arrays = [np.array([1]), np.array([2, 3])]
    variables = [xr.Variable('x', a) for a in arrays]
    data_arrays = [xr.DataArray(v, {'x': c, 'y': ('x', range(len(c)))})
                   for v, c in zip(variables, [['a'], ['b', 'c']])]
    datasets = [xr.Dataset({'data': data_array}) for data_array in data_arrays]

    expected_array = np.array([1, 2, 3])
    expected_variable = xr.Variable('x', expected_array)
    expected_data_array = xr.DataArray(expected_variable, [('x', list('abc'))])
    expected_dataset = xr.Dataset({'data': expected_data_array})

    assert_identical(expected_array, concatenate(arrays))
    assert_identical(expected_variable, concatenate(variables))
    assert_identical(expected_data_array, concatenate(data_arrays))
    assert_identical(expected_dataset, concatenate(datasets))

    # must also be a core dimension
    with pytest.raises(ValueError):
        apply_ufunc(identity, variables[0], exclude_dims={'x'})
Example #4
0
def test_apply_exclude():

    def concatenate(objects, dim='x'):
        sig = ([(dim,)] * len(objects), [(dim,)])
        new_coord = np.concatenate(
            [obj.coords[dim] if hasattr(obj, 'coords') else []
             for obj in objects])
        func = lambda *x: np.concatenate(x, axis=-1)
        result = apply_ufunc(func, *objects, signature=sig, exclude_dims={dim})
        if isinstance(result, (xr.Dataset, xr.DataArray)):
            result.coords[dim] = new_coord
        return result

    arrays = [np.array([1]), np.array([2, 3])]
    variables = [xr.Variable('x', a) for a in arrays]
    data_arrays = [xr.DataArray(v, {'x': c, 'y': ('x', range(len(c)))})
                   for v, c in zip(variables, [['a'], ['b', 'c']])]
    datasets = [xr.Dataset({'data': data_array}) for data_array in data_arrays]

    expected_array = np.array([1, 2, 3])
    expected_variable = xr.Variable('x', expected_array)
    expected_data_array = xr.DataArray(expected_variable, [('x', list('abc'))])
    expected_dataset = xr.Dataset({'data': expected_data_array})

    assert_identical(expected_array, concatenate(arrays))
    assert_identical(expected_variable, concatenate(variables))
    assert_identical(expected_data_array, concatenate(data_arrays))
    assert_identical(expected_dataset, concatenate(datasets))

    identity = lambda x: x
    # must also be a core dimension
    with pytest.raises(ValueError):
        apply_ufunc(identity, variables[0], exclude_dims={'x'})
Example #5
0
def xr_polyfit(obj, dim, ix=None, deg=0.5, poly='hermite'):
    """Fit a polynomial of degree ``deg`` using least-squares along ``dim``.

    Parameters
    ----------
    obj : xarray.Dataset or xarray.DataArray
        The object to fit.
    dim : str, optional
        The dimension to fit along.
    ix : {None, int, array_like}, optional
        If ``None``, interpolate the polynomial at the original x points.
        If ``int``, linearly space this many points along the range of the
        original data and interpolate with these.
        If array-like, interpolate at these given points.
    deg : int or float, optional
        The degree of the polynomial to fit. Used directly if integer. If float
        supplied, with ``0.0 < deg < 1.0``, the proportion of the total
        possible degree to use.
    poly : {'chebyshev', 'polynomial', 'legendre',
            'laguerre', hermite}, optional
        The type of polynomial to fit.

    Returns
    -------
    new_xobj : xarray.DataArray or xarray.Dataset

    See Also
    --------
    xr_unispline
    """
    input_core_dims = [(dim,), (dim,)]
    args = (obj[dim], obj)

    if ix is None:
        kwargs = {'ix': ix, 'axis': -1, 'deg': deg, 'poly': poly}
        output_core_dims = [(dim,)]
        return apply_ufunc(_broadcast_polyfit, *args, kwargs=kwargs,
                           input_core_dims=input_core_dims,
                           output_core_dims=output_core_dims)

    if isinstance(ix, int):
        ix = np.linspace(float(obj[dim].min()), float(obj[dim].max()), ix)

    kwargs = {'ix': ix, 'axis': -1, 'deg': deg, 'poly': poly}
    output_core_dims = [('__temp_dim__',)]

    result = apply_ufunc(_broadcast_polyfit, *args, kwargs=kwargs,
                         input_core_dims=input_core_dims,
                         output_core_dims=output_core_dims)
    result['__temp_dim__'] = ix
    return result.rename({'__temp_dim__': dim})
Example #6
0
def test_output_wrong_number():
    variable = xr.Variable('x', np.arange(10))

    def identity(x):
        return x

    def tuple3x(x):
        return (x, x, x)

    with raises_regex(ValueError, 'number of outputs'):
        apply_ufunc(identity, variable, output_core_dims=[(), ()])

    with raises_regex(ValueError, 'number of outputs'):
        apply_ufunc(tuple3x, variable, output_core_dims=[(), ()])
Example #7
0
def test_apply_input_core_dimension():

    def first_element(obj, dim):
        def func(x):
            return x[..., 0]
        return apply_ufunc(func, obj, input_core_dims=[[dim]])

    array = np.array([[1, 2], [3, 4]])
    variable = xr.Variable(['x', 'y'], array)
    data_array = xr.DataArray(variable, {'x': ['a', 'b'], 'y': [-1, -2]})
    dataset = xr.Dataset({'data': data_array})

    expected_variable_x = xr.Variable(['y'], [1, 2])
    expected_data_array_x = xr.DataArray(expected_variable_x, {'y': [-1, -2]})
    expected_dataset_x = xr.Dataset({'data': expected_data_array_x})

    expected_variable_y = xr.Variable(['x'], [1, 3])
    expected_data_array_y = xr.DataArray(expected_variable_y,
                                         {'x': ['a', 'b']})
    expected_dataset_y = xr.Dataset({'data': expected_data_array_y})

    assert_identical(expected_variable_x, first_element(variable, 'x'))
    assert_identical(expected_variable_y, first_element(variable, 'y'))

    assert_identical(expected_data_array_x, first_element(data_array, 'x'))
    assert_identical(expected_data_array_y, first_element(data_array, 'y'))

    assert_identical(expected_dataset_x, first_element(dataset, 'x'))
    assert_identical(expected_dataset_y, first_element(dataset, 'y'))

    assert_identical(expected_data_array_x,
                     first_element(data_array.groupby('y'), 'x'))
    assert_identical(expected_dataset_x,
                     first_element(dataset.groupby('y'), 'x'))

    def multiply(*args):
        val = args[0]
        for arg in args[1:]:
            val = val * arg
        return val

    # regression test for GH:2341
    with pytest.raises(ValueError):
        apply_ufunc(multiply, data_array, data_array['y'].values,
                    input_core_dims=[['y']], output_core_dims=[['y']])
    expected = xr.DataArray(multiply(data_array, data_array['y']),
                            dims=['x', 'y'], coords=data_array.coords)
    actual = apply_ufunc(multiply, data_array, data_array['y'].values,
                         input_core_dims=[['y'], []], output_core_dims=[['y']])
    assert_identical(expected, actual)
Example #8
0
 def original_and_stack_negative(obj):
     func = lambda x: (x, xr.core.npcompat.stack([x, -x], axis=-1))
     sig = ([()], [(), ('sign',)])
     result = apply_ufunc(func, obj, signature=sig)
     if isinstance(result[1], (xr.Dataset, xr.DataArray)):
         result[1].coords['sign'] = [1, -1]
     return result
Example #9
0
def xr_diff_u_err(obj, dim):
    """Propagate error through uneven-third-order finite difference derivative.
    If you have calculated a derivative already using ``xr_diff_u``, and you
    have data about the uncertainty on the original data, this function
    propagates that error through to be an error on the derivative.

    Parameters
    ----------
    obj : xarray.Dataset or xarray.DataArray
        The object to differentiate.
    dim : str
        The dimension to differentiate along.

    Returns
    -------
    new_xobj : xarray.DataArray or xarray.Dataset
    """
    kwargs = {'axis': -1}
    input_core_dims = [(dim,), (dim,)]
    output_core_dims = [(dim,)]
    args = (obj[dim], obj)
    return apply_ufunc(_broadcast_diff_u_err, *args,
                       input_core_dims=input_core_dims,
                       output_core_dims=output_core_dims,
                       kwargs=kwargs)
Example #10
0
def xr_diff_u(obj, dim):
    """Uneven-third-order finite difference derivative [1].

    [1] Singh, Ashok K., and B. S. Bhadauria. "Finite difference formulae for
    unequal sub-intervals using lagrange’s interpolation formula."
    International Journal of Mathematics and Analysis 3.17 (2009): 815-827.

    Parameters
    ----------
    obj : xarray.Dataset or xarray.DataArray
        The object to differentiate.
    dim : str
        The dimension to differentiate along.

    Returns
    -------
    new_xobj : xarray.DataArray or xarray.Dataset
    """
    kwargs = {'axis': -1}
    input_core_dims = [(dim,), (dim,)]
    output_core_dims = [(dim,)]
    args = (obj[dim], obj)
    return apply_ufunc(_broadcast_diff_u, *args,
                       input_core_dims=input_core_dims,
                       output_core_dims=output_core_dims,
                       kwargs=kwargs)
Example #11
0
def test_vectorize():
    data_array = xr.DataArray([[0, 1, 2], [1, 2, 3]], dims=('x', 'y'))
    expected = xr.DataArray([1, 2], dims=['x'])
    actual = apply_ufunc(pandas_median, data_array,
                         input_core_dims=[['y']],
                         vectorize=True)
    assert_identical(expected, actual)
Example #12
0
 def original_and_stack_negative(obj):
     def func(x):
         return (x, np.stack([x, -x], axis=-1))
     result = apply_ufunc(func, obj, output_core_dims=[[], ['sign']])
     if isinstance(result[1], (xr.Dataset, xr.DataArray)):
         result[1].coords['sign'] = [1, -1]
     return result
Example #13
0
 def stack_negative(obj):
     def func(x):
         return np.stack([x, -x], axis=-1)
     result = apply_ufunc(func, obj, output_core_dims=[['sign']])
     if isinstance(result, (xr.Dataset, xr.DataArray)):
         result.coords['sign'] = [1, -1]
     return result
Example #14
0
def xr_interp(obj, dim, ix=100, order=3):
    """Interpolate along axis ``dim`` using :func:`scipy.interpolate.interp1d`.

    Parameters
    ----------
    obj : xarray.Dataset or xarray.DataArray
        The object to interpolate.
    dim : str
        The axis to interpolate along.
    ix : int or array
        If int, interpolate to this many points spaced evenly along the range
        of the original data. If array, interpolate to those points directly.
    order : int
        Supplied to :func:`scipy.interpolate.interp1d` as the order of
        interpolation.

    Returns
    -------
    new_xobj : xarray.DataArray or xarray.Dataset

    See Also
    --------
    xr_pchip
    """

    input_core_dims = [(dim,), (dim,)]
    args = (obj[dim], obj)
    kwargs = {'ix': ix, 'axis': -1, 'order': order}

    if ix is None:
        output_core_dims = [(dim,)]
        return apply_ufunc(_broadcast_interp, *args, kwargs=kwargs,
                           input_core_dims=input_core_dims,
                           output_core_dims=output_core_dims)

    if isinstance(ix, int):
        ix = np.linspace(float(obj[dim].min()), float(obj[dim].max()), ix)

    kwargs['ix'] = ix
    output_core_dims = [('__temp_dim__',)]

    result = apply_ufunc(_broadcast_interp, *args, kwargs=kwargs,
                         input_core_dims=input_core_dims,
                         output_core_dims=output_core_dims)
    result['__temp_dim__'] = ix
    return result.rename({'__temp_dim__': dim})
Example #15
0
def xr_filter_wiener(obj, dim, mysize=5, noise=1e-2):
    kwargs = {'mysize': mysize, 'noise': noise, 'axis': -1}
    input_core_dims = [(dim,), (dim,)]
    output_core_dims = [(dim,)]
    args = (obj[dim], obj)
    return apply_ufunc(_broadcast_filter_wiener, *args,
                       input_core_dims=input_core_dims,
                       output_core_dims=output_core_dims,
                       kwargs=kwargs)
Example #16
0
def test_vectorize_dask():
    data_array = xr.DataArray([[0, 1, 2], [1, 2, 3]], dims=('x', 'y'))
    expected = xr.DataArray([1, 2], dims=['x'])
    actual = apply_ufunc(pandas_median, data_array.chunk({'x': 1}),
                         input_core_dims=[['y']],
                         vectorize=True,
                         dask='parallelized',
                         output_dtypes=[float])
    assert_identical(expected, actual)
Example #17
0
 def concatenate(objects, dim='x'):
     sig = ([(dim,)] * len(objects), [(dim,)])
     new_coord = np.concatenate(
         [obj.coords[dim] if hasattr(obj, 'coords') else []
          for obj in objects])
     func = lambda *x: np.concatenate(x, axis=-1)
     result = apply_ufunc(func, *objects, signature=sig, exclude_dims={dim})
     if isinstance(result, (xr.Dataset, xr.DataArray)):
         result.coords[dim] = new_coord
     return result
Example #18
0
def test_vectorize():
    if LooseVersion(np.__version__) < LooseVersion('1.12.0'):
        pytest.skip('numpy 1.12 or later to support vectorize=True.')

    data_array = xr.DataArray([[0, 1, 2], [1, 2, 3]], dims=('x', 'y'))
    expected = xr.DataArray([1, 2], dims=['x'])
    actual = apply_ufunc(pandas_median, data_array,
                         input_core_dims=[['y']],
                         vectorize=True)
    assert_identical(expected, actual)
Example #19
0
def test_apply_dask():
    import dask.array as da

    array = da.ones((2,), chunks=2)
    variable = _NoCacheVariable('x', array)
    coords = xr.DataArray(variable).coords.variables
    data_array = xr.DataArray(variable, coords, fastpath=True)
    dataset = xr.Dataset({'y': variable})

    identity = lambda x: x

    # encountered dask array, but did not set dask_array='allowed'
    with pytest.raises(ValueError):
        apply_ufunc(identity, array)
    with pytest.raises(ValueError):
        apply_ufunc(identity, variable)
    with pytest.raises(ValueError):
        apply_ufunc(identity, data_array)
    with pytest.raises(ValueError):
        apply_ufunc(identity, dataset)

    # unknown setting for dask array handling
    with pytest.raises(ValueError):
        apply_ufunc(identity, array, dask_array='auto')

    def dask_safe_identity(x):
        return apply_ufunc(identity, x, dask_array='allowed')

    assert array is dask_safe_identity(array)

    actual = dask_safe_identity(variable)
    assert isinstance(actual.data, da.Array)
    assert_identical(variable, actual)

    actual = dask_safe_identity(data_array)
    assert isinstance(actual.data, da.Array)
    assert_identical(data_array, actual)

    actual = dask_safe_identity(dataset)
    assert isinstance(actual['y'].data, da.Array)
    assert_identical(dataset, actual)
Example #20
0
def test_vectorize_dask():
    if LooseVersion(np.__version__) < LooseVersion('1.12.0'):
        pytest.skip('numpy 1.12 or later to support vectorize=True.')

    data_array = xr.DataArray([[0, 1, 2], [1, 2, 3]], dims=('x', 'y'))
    expected = xr.DataArray([1, 2], dims=['x'])
    actual = apply_ufunc(pandas_median, data_array.chunk({'x': 1}),
                         input_core_dims=[['y']],
                         vectorize=True,
                         dask='parallelized',
                         output_dtypes=[float])
    assert_identical(expected, actual)
Example #21
0
 def concatenate(objects, dim='x'):
     def func(*x):
         return np.concatenate(x, axis=-1)
     result = apply_ufunc(func, *objects,
                          input_core_dims=[[dim]] * len(objects),
                          output_core_dims=[[dim]],
                          exclude_dims={dim})
     if isinstance(result, (xr.Dataset, xr.DataArray)):
         # note: this will fail if dim is not a coordinate on any input
         new_coord = np.concatenate([obj.coords[dim] for obj in objects])
         result.coords[dim] = new_coord
     return result
Example #22
0
def test_apply_exclude():
    def concatenate(objects, dim="x"):
        def func(*x):
            return np.concatenate(x, axis=-1)

        result = apply_ufunc(func,
                             *objects,
                             input_core_dims=[[dim]] * len(objects),
                             output_core_dims=[[dim]],
                             exclude_dims={dim})
        if isinstance(result, (xr.Dataset, xr.DataArray)):
            # note: this will fail if dim is not a coordinate on any input
            new_coord = np.concatenate([obj.coords[dim] for obj in objects])
            result.coords[dim] = new_coord
        return result

    arrays = [np.array([1]), np.array([2, 3])]
    variables = [xr.Variable("x", a) for a in arrays]
    data_arrays = [
        xr.DataArray(v, {
            "x": c,
            "y": ("x", range(len(c)))
        }) for v, c in zip(variables, [["a"], ["b", "c"]])
    ]
    datasets = [xr.Dataset({"data": data_array}) for data_array in data_arrays]

    expected_array = np.array([1, 2, 3])
    expected_variable = xr.Variable("x", expected_array)
    expected_data_array = xr.DataArray(expected_variable, [("x", list("abc"))])
    expected_dataset = xr.Dataset({"data": expected_data_array})

    assert_identical(expected_array, concatenate(arrays))
    assert_identical(expected_variable, concatenate(variables))
    assert_identical(expected_data_array, concatenate(data_arrays))
    assert_identical(expected_dataset, concatenate(datasets))

    # must also be a core dimension
    with pytest.raises(ValueError):
        apply_ufunc(identity, variables[0], exclude_dims={"x"})
def test_vectorize_dask_new_output_dims():
    # regression test for GH3574
    # run vectorization in dask.array.gufunc by using `dask='parallelized'`
    data_array = xr.DataArray([[0, 1, 2], [1, 2, 3]], dims=("x", "y"))
    func = lambda x: x[np.newaxis, ...]
    expected = data_array.expand_dims("z")
    actual = apply_ufunc(
        func,
        data_array.chunk({"x": 1}),
        output_core_dims=[["z"]],
        vectorize=True,
        dask="parallelized",
        output_dtypes=[float],
        dask_gufunc_kwargs=dict(output_sizes={"z": 1}),
    ).transpose(*expected.dims)
    assert_identical(expected, actual)

    with raises_regex(ValueError, "dimension 'z1' in 'output_sizes' must correspond"):
        apply_ufunc(
            func,
            data_array.chunk({"x": 1}),
            output_core_dims=[["z"]],
            vectorize=True,
            dask="parallelized",
            output_dtypes=[float],
            dask_gufunc_kwargs=dict(output_sizes={"z1": 1}),
        )

    with raises_regex(
        ValueError, "dimension 'z' in 'output_core_dims' needs corresponding"
    ):
        apply_ufunc(
            func,
            data_array.chunk({"x": 1}),
            output_core_dims=[["z"]],
            vectorize=True,
            dask="parallelized",
            output_dtypes=[float],
        )
Example #24
0
def test_apply_dask_parallelized():
    import dask.array as da

    array = da.ones((2, 2), chunks=(1, 1))
    data_array = xr.DataArray(array, dims=('x', 'y'))

    actual = apply_ufunc(identity,
                         data_array,
                         dask='parallelized',
                         output_dtypes=[float])
    assert isinstance(actual.data, da.Array)
    assert actual.data.chunks == array.chunks
    assert_identical(data_array, actual)
def test_vectorize_dask():
    # run vectorization in dask.array.gufunc by using `dask='parallelized'`
    data_array = xr.DataArray([[0, 1, 2], [1, 2, 3]], dims=("x", "y"))
    expected = xr.DataArray([1, 2], dims=["x"])
    actual = apply_ufunc(
        pandas_median,
        data_array.chunk({"x": 1}),
        input_core_dims=[["y"]],
        vectorize=True,
        dask="parallelized",
        output_dtypes=[float],
    )
    assert_identical(expected, actual)
Example #26
0
def test_vectorize_dask():
    if LooseVersion(np.__version__) < LooseVersion('1.12.0'):
        pytest.skip('numpy 1.12 or later to support vectorize=True.')

    data_array = xr.DataArray([[0, 1, 2], [1, 2, 3]], dims=('x', 'y'))
    expected = xr.DataArray([1, 2], dims=['x'])
    actual = apply_ufunc(pandas_median,
                         data_array.chunk({'x': 1}),
                         input_core_dims=[['y']],
                         vectorize=True,
                         dask='parallelized',
                         output_dtypes=[float])
    assert_identical(expected, actual)
Example #27
0
def test_apply_dask_multiple_inputs():
    import dask.array as da

    def covariance(x, y):
        return ((x - x.mean(axis=-1, keepdims=True)) *
                (y - y.mean(axis=-1, keepdims=True))).mean(axis=-1)

    rs = np.random.RandomState(42)
    array1 = da.from_array(rs.randn(4, 4), chunks=(2, 4))
    array2 = da.from_array(rs.randn(4, 4), chunks=(2, 4))
    data_array_1 = xr.DataArray(array1, dims=("x", "z"))
    data_array_2 = xr.DataArray(array2, dims=("y", "z"))

    expected = apply_ufunc(
        covariance,
        data_array_1.compute(),
        data_array_2.compute(),
        input_core_dims=[["z"], ["z"]],
    )
    allowed = apply_ufunc(
        covariance,
        data_array_1,
        data_array_2,
        input_core_dims=[["z"], ["z"]],
        dask="allowed",
    )
    assert isinstance(allowed.data, da.Array)
    xr.testing.assert_allclose(expected, allowed.compute())

    parallelized = apply_ufunc(
        covariance,
        data_array_1,
        data_array_2,
        input_core_dims=[["z"], ["z"]],
        dask="parallelized",
        output_dtypes=[float],
    )
    assert isinstance(parallelized.data, da.Array)
    xr.testing.assert_allclose(expected, parallelized.compute())
Example #28
0
def test_apply_dask():
    import dask.array as da

    array = da.ones((2, ), chunks=2)
    variable = xr.Variable("x", array)
    coords = xr.DataArray(variable).coords.variables
    data_array = xr.DataArray(variable, dims=["x"], coords=coords)
    dataset = xr.Dataset({"y": variable})

    # encountered dask array, but did not set dask='allowed'
    with pytest.raises(ValueError):
        apply_ufunc(identity, array)
    with pytest.raises(ValueError):
        apply_ufunc(identity, variable)
    with pytest.raises(ValueError):
        apply_ufunc(identity, data_array)
    with pytest.raises(ValueError):
        apply_ufunc(identity, dataset)

    # unknown setting for dask array handling
    with pytest.raises(ValueError):
        apply_ufunc(identity, array, dask="unknown")

    def dask_safe_identity(x):
        return apply_ufunc(identity, x, dask="allowed")

    assert array is dask_safe_identity(array)

    actual = dask_safe_identity(variable)
    assert isinstance(actual.data, da.Array)
    assert_identical(variable, actual)

    actual = dask_safe_identity(data_array)
    assert isinstance(actual.data, da.Array)
    assert_identical(data_array, actual)

    actual = dask_safe_identity(dataset)
    assert isinstance(actual["y"].data, da.Array)
    assert_identical(dataset, actual)
Example #29
0
def test_vectorize_dask_dtype_without_output_dtypes(data_array):
    # ensure output_dtypes is preserved with vectorize=True
    # GH4015

    expected = data_array.copy()
    actual = apply_ufunc(
        identity,
        data_array.chunk({"x": 1}),
        vectorize=True,
        dask="parallelized",
    )

    assert_identical(expected, actual)
    assert expected.dtype == actual.dtype
Example #30
0
    def concatenate(objects, dim='x'):
        def func(*x):
            return np.concatenate(x, axis=-1)

        result = apply_ufunc(func,
                             *objects,
                             input_core_dims=[[dim]] * len(objects),
                             output_core_dims=[[dim]],
                             exclude_dims={dim})
        if isinstance(result, (xr.Dataset, xr.DataArray)):
            # note: this will fail if dim is not a coordinate on any input
            new_coord = np.concatenate([obj.coords[dim] for obj in objects])
            result.coords[dim] = new_coord
        return result
Example #31
0
def test_vectorize_exclude_dims():
    # GH 3890
    data_array_a = xr.DataArray([[0, 1, 2], [1, 2, 3]], dims=("x", "y"))
    data_array_b = xr.DataArray([[0, 1, 2, 3, 4], [1, 2, 3, 4, 5]], dims=("x", "y"))

    expected = xr.DataArray([3, 5], dims=["x"])
    actual = apply_ufunc(
        pandas_median_add,
        data_array_a,
        data_array_b,
        input_core_dims=[["y"], ["y"]],
        vectorize=True,
        exclude_dims=set("y"),
    )
    assert_identical(expected, actual)
Example #32
0
def test_dataset_join():
    import numpy as np
    ds0 = xr.Dataset({'a': ('x', [1, 2]), 'x': [0, 1]})
    ds1 = xr.Dataset({'a': ('x', [99, 3]), 'x': [1, 2]})

    with pytest.raises(TypeError):
        apply_ufunc(operator.add, ds0, ds1, dataset_join='outer')

    def add(a, b, join, dataset_join):
        return apply_ufunc(operator.add,
                           a,
                           b,
                           join=join,
                           dataset_join=dataset_join,
                           dataset_fill_value=np.nan)

    actual = add(ds0, ds1, 'outer', 'inner')
    expected = xr.Dataset({'a': ('x', [np.nan, 101, np.nan]), 'x': [0, 1, 2]})
    assert_identical(actual, expected)

    actual = add(ds0, ds1, 'outer', 'outer')
    assert_identical(actual, expected)

    # if variables don't match, join will perform add with np.nan
    ds2 = xr.Dataset({'b': ('x', [99, 3]), 'x': [1, 2]})
    actual = add(ds0, ds2, 'outer', 'inner')
    expected = xr.Dataset({'x': [0, 1, 2]})
    assert_identical(actual, expected)

    actual = add(ds0, ds2, 'outer', 'outer')
    expected = xr.Dataset({
        'a': ('x', [np.nan, np.nan, np.nan]),
        'b': ('x', [np.nan, np.nan, np.nan]),
        'x': [0, 1, 2]
    })
    assert_identical(actual, expected)
Example #33
0
def test_vectorize_exclude_dims_dask():
    # GH 3890
    data_array_a = xr.DataArray([[0, 1, 2], [1, 2, 3]], dims=("x", "y"))
    data_array_b = xr.DataArray([[0, 1, 2, 3, 4], [1, 2, 3, 4, 5]], dims=("x", "y"))

    expected = xr.DataArray([3, 5], dims=["x"])
    actual = apply_ufunc(
        pandas_median_add,
        data_array_a.chunk({"x": 1}),
        data_array_b.chunk({"x": 1}),
        input_core_dims=[["y"], ["y"]],
        exclude_dims=set("y"),
        vectorize=True,
        dask="parallelized",
        output_dtypes=[float],
    )
    assert_identical(expected, actual)
Example #34
0
def test_vectorize_dask_new_output_dims():
    # regression test for GH3574
    data_array = xr.DataArray([[0, 1, 2], [1, 2, 3]], dims=("x", "y"))
    func = lambda x: x[np.newaxis, ...]
    expected = data_array.expand_dims("z")
    actual = apply_ufunc(
        func,
        data_array.chunk({"x": 1}),
        output_core_dims=[["z"]],
        vectorize=True,
        dask="parallelized",
        output_dtypes=[float],
        output_sizes={
            "z": 1
        },
    ).transpose(*expected.dims)
    assert_identical(expected, actual)
Example #35
0
 def _ffill(arr, axis):
     """
     Generic implementation of ffill borrowed from xarray.core.missing.ffill.
     Requires bottleneck to work.
     """
     import bottleneck as bn
     from xarray.core.computation import apply_ufunc
     # work around for bottleneck 178
     _limit = arr.shape[axis]
     return apply_ufunc(
         bn.push,
         arr,
         dask="allowed",
         keep_attrs=True,
         output_dtypes=[arr.dtype],
         kwargs=dict(n=_limit, axis=axis),
     )
Example #36
0
def test_vectorize_dask_dtype_meta():
    # meta dtype takes precedence
    data_array = xr.DataArray([[0, 1, 2], [1, 2, 3]], dims=("x", "y"))
    expected = xr.DataArray([1, 2], dims=["x"])

    actual = apply_ufunc(
        pandas_median,
        data_array.chunk({"x": 1}),
        input_core_dims=[["y"]],
        vectorize=True,
        dask="parallelized",
        output_dtypes=[int],
        meta=np.ndarray((0, 0), dtype=np.float),
    )

    assert_identical(expected, actual)
    assert np.float == actual.dtype
Example #37
0
def xr_idxmin(obj, dim):
    """Find the coordinate of the minimum along ``dim``.

    Parameters
    ----------
    obj : xarray.DataArray or xarray.Dataset
        Object to find coordnate maximum in.
    dim : str
        Dimension along which to find maximum

    Returns
    -------
    new_xobj : xarray.DataArray or xarray.Dataset
    """
    input_core_dims = [(dim,), (dim,)]
    kwargs = {'axis': -1}
    allna = obj.isnull().all(dim)
    return apply_ufunc(gufunc_idxmin, obj.fillna(np.inf), obj[dim],
                       input_core_dims=input_core_dims, kwargs=kwargs,
                       dask='allowed').where(~allna)
Example #38
0
def test_output_wrong_dims():
    variable = xr.Variable('x', np.arange(10))

    def add_dim(x):
        return x[..., np.newaxis]

    def remove_dim(x):
        return x[..., 0]

    with raises_regex(ValueError, 'unexpected number of dimensions'):
        apply_ufunc(add_dim, variable, output_core_dims=[('y', 'z')])

    with raises_regex(ValueError, 'unexpected number of dimensions'):
        apply_ufunc(add_dim, variable)

    with raises_regex(ValueError, 'unexpected number of dimensions'):
        apply_ufunc(remove_dim, variable)
Example #39
0
def test_output_wrong_dims():
    variable = xr.Variable("x", np.arange(10))

    def add_dim(x):
        return x[..., np.newaxis]

    def remove_dim(x):
        return x[..., 0]

    with raises_regex(ValueError, "unexpected number of dimensions"):
        apply_ufunc(add_dim, variable, output_core_dims=[("y", "z")])

    with raises_regex(ValueError, "unexpected number of dimensions"):
        apply_ufunc(add_dim, variable)

    with raises_regex(ValueError, "unexpected number of dimensions"):
        apply_ufunc(remove_dim, variable)
Example #40
0
def xr_idxmin(obj, dim):
    """Find the coordinate of the minimum along ``dim``.

    Parameters
    ----------
    obj : xarray.DataArray or xarray.Dataset
        Object to find coordnate maximum in.
    dim : str
        Dimension along which to find maximum

    Returns
    -------
    new_xobj : xarray.DataArray or xarray.Dataset
    """
    input_core_dims = [(dim, ), (dim, )]
    kwargs = {'axis': -1}
    allna = obj.isnull().all(dim)
    return apply_ufunc(gufunc_idxmin,
                       obj.fillna(np.inf),
                       obj[dim],
                       input_core_dims=input_core_dims,
                       kwargs=kwargs,
                       dask='allowed').where(~allna)
Example #41
0
def xr_filtfilt_bessel(obj, dim, N=2, Wn=0.4):
    """Filter (with forward and backward pass) data along ``dim`` using
    the bessel design :py:func:`scipy.signal.bessel`.

    Parameters
    ----------
    obj : xarray.Dataset or xarray.DataArray
        The object to apply signal filtering to.
    dim : str
        The dimension to filter along.
    N : int, optional
        The order of the filter.
    Wn : scalar, optional
        Critical frequency.
    """
    kwargs = {'N': N, 'Wn': Wn, 'axis': -1}
    input_core_dims = [(dim,), (dim,)]
    output_core_dims = [(dim,)]
    args = (obj[dim], obj)
    return apply_ufunc(_broadcast_filtfilt_bessel, *args,
                       input_core_dims=input_core_dims,
                       output_core_dims=output_core_dims,
                       kwargs=kwargs)
Example #42
0
def test_dataset_join():
    ds0 = xr.Dataset({"a": ("x", [1, 2]), "x": [0, 1]})
    ds1 = xr.Dataset({"a": ("x", [99, 3]), "x": [1, 2]})

    # by default, cannot have different labels
    with raises_regex(ValueError, "indexes .* are not equal"):
        apply_ufunc(operator.add, ds0, ds1)
    with raises_regex(TypeError, "must supply"):
        apply_ufunc(operator.add, ds0, ds1, dataset_join="outer")

    def add(a, b, join, dataset_join):
        return apply_ufunc(
            operator.add,
            a,
            b,
            join=join,
            dataset_join=dataset_join,
            dataset_fill_value=np.nan,
        )

    actual = add(ds0, ds1, "outer", "inner")
    expected = xr.Dataset({"a": ("x", [np.nan, 101, np.nan]), "x": [0, 1, 2]})
    assert_identical(actual, expected)

    actual = add(ds0, ds1, "outer", "outer")
    assert_identical(actual, expected)

    with raises_regex(ValueError, "data variable names"):
        apply_ufunc(operator.add, ds0, xr.Dataset({"b": 1}))

    ds2 = xr.Dataset({"b": ("x", [99, 3]), "x": [1, 2]})
    actual = add(ds0, ds2, "outer", "inner")
    expected = xr.Dataset({"x": [0, 1, 2]})
    assert_identical(actual, expected)

    # we used np.nan as the fill_value in add() above
    actual = add(ds0, ds2, "outer", "outer")
    expected = xr.Dataset(
        {
            "a": ("x", [np.nan, np.nan, np.nan]),
            "b": ("x", [np.nan, np.nan, np.nan]),
            "x": [0, 1, 2],
        }
    )
    assert_identical(actual, expected)
Example #43
0
def xr_filtfilt_bessel(obj, dim, N=2, Wn=0.4):
    """Filter (with forward and backward pass) data along ``dim`` using
    the bessel design :py:func:`scipy.signal.bessel`.

    Parameters
    ----------
    obj : xarray.Dataset or xarray.DataArray
        The object to apply signal filtering to.
    dim : str
        The dimension to filter along.
    N : int, optional
        The order of the filter.
    Wn : scalar, optional
        Critical frequency.
    """
    kwargs = {'N': N, 'Wn': Wn, 'axis': -1}
    input_core_dims = [(dim, ), (dim, )]
    output_core_dims = [(dim, )]
    args = (obj[dim], obj)
    return apply_ufunc(_broadcast_filtfilt_bessel,
                       *args,
                       input_core_dims=input_core_dims,
                       output_core_dims=output_core_dims,
                       kwargs=kwargs)
Example #44
0
def test_dataset_join():
    ds0 = xr.Dataset({'a': ('x', [1, 2]), 'x': [0, 1]})
    ds1 = xr.Dataset({'a': ('x', [99, 3]), 'x': [1, 2]})

    # by default, cannot have different labels
    with raises_regex(ValueError, 'indexes .* are not equal'):
        apply_ufunc(operator.add, ds0, ds1)
    with raises_regex(TypeError, 'must supply'):
        apply_ufunc(operator.add, ds0, ds1, dataset_join='outer')

    def add(a, b, join, dataset_join):
        return apply_ufunc(operator.add,
                           a,
                           b,
                           join=join,
                           dataset_join=dataset_join,
                           dataset_fill_value=np.nan)

    actual = add(ds0, ds1, 'outer', 'inner')
    expected = xr.Dataset({'a': ('x', [np.nan, 101, np.nan]), 'x': [0, 1, 2]})
    assert_identical(actual, expected)

    actual = add(ds0, ds1, 'outer', 'outer')
    assert_identical(actual, expected)

    with raises_regex(ValueError, 'data variable names'):
        apply_ufunc(operator.add, ds0, xr.Dataset({'b': 1}))

    ds2 = xr.Dataset({'b': ('x', [99, 3]), 'x': [1, 2]})
    actual = add(ds0, ds2, 'outer', 'inner')
    expected = xr.Dataset({'x': [0, 1, 2]})
    assert_identical(actual, expected)

    # we used np.nan as the fill_value in add() above
    actual = add(ds0, ds2, 'outer', 'outer')
    expected = xr.Dataset({
        'a': ('x', [np.nan, np.nan, np.nan]),
        'b': ('x', [np.nan, np.nan, np.nan]),
        'x': [0, 1, 2]
    })
    assert_identical(actual, expected)
Example #45
0
def test_dataset_join():
    ds0 = xr.Dataset({'a': ('x', [1, 2]), 'x': [0, 1]})
    ds1 = xr.Dataset({'a': ('x', [99, 3]), 'x': [1, 2]})

    # by default, cannot have different labels
    with raises_regex(ValueError, 'indexes .* are not equal'):
        apply_ufunc(operator.add, ds0, ds1)
    with raises_regex(TypeError, 'must supply'):
        apply_ufunc(operator.add, ds0, ds1, dataset_join='outer')

    def add(a, b, join, dataset_join):
        return apply_ufunc(operator.add, a, b, join=join,
                           dataset_join=dataset_join,
                           dataset_fill_value=np.nan)

    actual = add(ds0, ds1, 'outer', 'inner')
    expected = xr.Dataset({'a': ('x', [np.nan, 101, np.nan]),
                           'x': [0, 1, 2]})
    assert_identical(actual, expected)

    actual = add(ds0, ds1, 'outer', 'outer')
    assert_identical(actual, expected)

    with raises_regex(ValueError, 'data variable names'):
        apply_ufunc(operator.add, ds0, xr.Dataset({'b': 1}))

    ds2 = xr.Dataset({'b': ('x', [99, 3]), 'x': [1, 2]})
    actual = add(ds0, ds2, 'outer', 'inner')
    expected = xr.Dataset({'x': [0, 1, 2]})
    assert_identical(actual, expected)

    # we used np.nan as the fill_value in add() above
    actual = add(ds0, ds2, 'outer', 'outer')
    expected = xr.Dataset({'a': ('x', [np.nan, np.nan, np.nan]),
                           'b': ('x', [np.nan, np.nan, np.nan]),
                           'x': [0, 1, 2]})
    assert_identical(actual, expected)
Example #46
0
 def add(a, b, keep_attrs):
     if keep_attrs:
         return apply_ufunc(operator.add, a, b, keep_attrs=keep_attrs)
     else:
         return apply_ufunc(operator.add, a, b)
Example #47
0
 def parallel_add(x, y):
     return apply_ufunc(operator.add,
                        x,
                        y,
                        dask='parallelized',
                        output_dtypes=[np.int64])
Example #48
0
    def first_element(obj, dim):
        def func(x):
            return x[..., 0]

        return apply_ufunc(func, obj, input_core_dims=[[dim]])
Example #49
0
 def apply_truncate_broadcast_invalid(obj):
     return apply_ufunc(truncate, obj)
Example #50
0
def add(a, b):
    return apply_ufunc(operator.add, a, b)
Example #51
0
    def twice(obj):
        def func(x):
            return (x, x)

        return apply_ufunc(func, obj, output_core_dims=[[], []])
Example #52
0
 def stack_negative(obj):
     def func(x):
         return np.stack([x, -x], axis=-1)
     return apply_ufunc(func, obj, output_core_dims=[['sign']],
                        dask='parallelized', output_dtypes=[obj.dtype],
                        output_sizes={'sign': 2})
Example #53
0
 def add(a, b, join, dataset_join):
     return apply_ufunc(operator.add, a, b, join=join,
                        dataset_join=dataset_join,
                        dataset_fill_value=np.nan)
Example #54
0
 def apply_truncate_x_z(obj):
     return apply_ufunc(truncate, obj, input_core_dims=[['x']],
                        output_core_dims=[['z']])
Example #55
0
 def dask_safe_identity(x):
     return apply_ufunc(identity, x, dask='allowed')
Example #56
0
 def apply_truncate_x_x_valid(obj):
     return apply_ufunc(truncate, obj, input_core_dims=[['x']],
                        output_core_dims=[['x']], exclude_dims={'x'})
Example #57
0
 def parallel_identity(x):
     return apply_ufunc(identity,
                        x,
                        dask='parallelized',
                        output_dtypes=[x.dtype])
Example #58
0
 def stack_negative(obj):
     def func(x):
         return np.stack([x, -x], axis=-1)
     return apply_ufunc(func, obj, output_core_dims=[['sign']],
                        dask='parallelized', output_dtypes=[obj.dtype],
                        output_sizes={'sign': 2})
Example #59
0
def test_apply_dask_parallelized_errors():
    import dask.array as da

    array = da.ones((2, 2), chunks=(1, 1))
    data_array = xr.DataArray(array, dims=('x', 'y'))

    with pytest.raises(NotImplementedError):
        apply_ufunc(identity,
                    data_array,
                    output_core_dims=[['z'], ['z']],
                    dask='parallelized')
    with raises_regex(ValueError, 'dtypes'):
        apply_ufunc(identity, data_array, dask='parallelized')
    with raises_regex(TypeError, 'list'):
        apply_ufunc(identity,
                    data_array,
                    dask='parallelized',
                    output_dtypes=float)
    with raises_regex(ValueError, 'must have the same length'):
        apply_ufunc(identity,
                    data_array,
                    dask='parallelized',
                    output_dtypes=[float, float])
    with raises_regex(ValueError, 'output_sizes'):
        apply_ufunc(identity,
                    data_array,
                    output_core_dims=[['z']],
                    output_dtypes=[float],
                    dask='parallelized')
    with raises_regex(ValueError, 'at least one input is an xarray object'):
        apply_ufunc(identity, array, dask='parallelized')

    with raises_regex(ValueError, 'consists of multiple chunks'):
        apply_ufunc(identity,
                    data_array,
                    dask='parallelized',
                    output_dtypes=[float],
                    input_core_dims=[('y', )],
                    output_core_dims=[('y', )])
Example #60
0
def test_apply_dask_parallelized_errors():
    import dask.array as da

    array = da.ones((2, 2), chunks=(1, 1))
    data_array = xr.DataArray(array, dims=('x', 'y'))

    with pytest.raises(NotImplementedError):
        apply_ufunc(identity, data_array, output_core_dims=[['z'], ['z']],
                    dask='parallelized')
    with raises_regex(ValueError, 'dtypes'):
        apply_ufunc(identity, data_array, dask='parallelized')
    with raises_regex(TypeError, 'list'):
        apply_ufunc(identity, data_array, dask='parallelized',
                    output_dtypes=float)
    with raises_regex(ValueError, 'must have the same length'):
        apply_ufunc(identity, data_array, dask='parallelized',
                    output_dtypes=[float, float])
    with raises_regex(ValueError, 'output_sizes'):
        apply_ufunc(identity, data_array, output_core_dims=[['z']],
                    output_dtypes=[float], dask='parallelized')
    with raises_regex(ValueError, 'at least one input is an xarray object'):
        apply_ufunc(identity, array, dask='parallelized')

    with raises_regex(ValueError, 'consists of multiple chunks'):
        apply_ufunc(identity, data_array, dask='parallelized',
                    output_dtypes=[float],
                    input_core_dims=[('y',)],
                    output_core_dims=[('y',)])