Esempio n. 1
0
def test_align_unsorted():

    a_sorted = DimArray([1, 2, 3, 4], axes=[[1, 2, 3, 4]], dims=['dim0'])
    a_mess = DimArray([3, 1, 4, 2], axes=[[3, 1, 4, 2]], dims=['dim0'])
    b_sorted = DimArray([11, 22, np.nan, 44], axes=[[1, 2, 3, 4]], dims=['dim0'])
    b_mess = DimArray([22, 44, 11], axes=[[2, 4, 1]], dims=['dim0'])

    # second array unsorted
    a_got, b_got = align([a_sorted, b_mess], join="outer")

    assert_equal_dimarrays(a_got, a_sorted)
    assert_equal_dimarrays(b_got, b_sorted)

    # first array unsorted
    b_got, a_got = align([b_mess, a_sorted], join="outer")

    assert not np.all(a_got.dim0 == a_sorted.dim0)  # not equal because not ordered

    a_got = a_got.sort_axis()
    b_got = b_got.sort_axis()

    assert_equal_dimarrays(a_got, a_sorted)
    assert_equal_dimarrays(b_got, b_sorted)

    # do the same, but pass as command line
    a_got, b_got = align([a_sorted, b_mess], join="outer", sort=True)

    # two arrays unsorted
    a_got, b_got = align([a_mess, b_mess], join="outer", sort=True)

    assert_equal_dimarrays(a_got, a_sorted)
    assert_equal_dimarrays(b_got, b_sorted)
Esempio n. 2
0
def concatenate_ds(datasets, axis=0, align=False, **kwargs):
    """ concatenate two datasets along an existing dimension

    Parameters
    ----------
    datasets: sequence of datasets 
    axis: axis along which to concatenate
    align, optional: if True, align secondary axes (via reindexing) prior to concatenating
    **kwargs : optional key-word arguments passed to align, if align is True

    Returns
    -------
    joint Dataset along axis

    NOTE: will raise an error if variables are there which do not contain the required dimension

    See Also
    --------
    stack_ds, concatenate, sort_axis

    Examples
    --------
    >>> a = da.zeros(axes=[list('abc')], dims=('x0',))  # 1-D DimArray
    >>> b = da.zeros(axes=[list('abc'), [1,2]], dims=('x0','x1')) # 2-D DimArray
    >>> ds = Dataset(a=a,b=b) # dataset of 2 variables from an experiment
    >>> a2 = da.ones(axes=[list('def')], dims=('x0',)) 
    >>> b2 = da.ones(axes=[list('def'), [1,2]], dims=('x0','x1')) # 2-D DimArray
    >>> ds2 = Dataset(a=a2,b=b2) # dataset of 2 variables from a second experiment
    >>> concatenate_ds([ds, ds2])
    Dataset of 2 variables
    0 / x0 (6): 'a' to 'f'
    1 / x1 (2): 1 to 2
    a: ('x0',)
    b: ('x0', 'x1')
    """
    # find the list of variables common to all datasets
    variables = None
    for ds in datasets:

        # check that variables have the same variables
        if variables is None:
            variables = ds.keys()
        else:
            assert sorted(ds.keys()) == sorted(variables), "variables differ across datasets"

    if align:
        # all dataset axes
        axis_nm = datasets[0].axes[axis].name
        aligned_dims = [d for d in _get_dims(*datasets) if d != axis_nm]
        for d in aligned_dims:
            datasets = da.align(datasets, axis=d, strict=True, **kwargs)

    # Compute concatenated dataset
    dataset = Dataset()
    for v in variables:
        arrays = [ds[v] for ds in datasets]
        array = concatenate(arrays, axis=axis, align=False, _no_check=align)
        dataset[v] = array

    return dataset
Esempio n. 3
0
def test_align():
    " test slices in an increasing axis"
    a = DimArray([1, 2, 3, 4], axes=[[1, 2, 3, 4]], dims=['dim0'])
    b = DimArray([0, 2, 4, 6], axes=[[0, 2, 4, 6]], dims=['dim0'])

    # outer join
    # expected
    a2 = DimArray([np.nan, 1, 2, 3, 4, np.nan], axes=[[0, 1, 2, 3, 4, 6]], dims=['dim0'])
    b2 = DimArray([0, np.nan, 2, np.nan, 4, 6], axes=[[0, 1, 2, 3, 4, 6]], dims=['dim0'])
    # got
    a2_got, b2_got = align([a, b], join="outer")
    # check
    assert_equal_dimarrays(a2, a2_got)
    assert_equal_dimarrays(b2, b2_got)

    # inner join
    # expected
    a3 = DimArray([2, 4], axes=[[2,4]], dims=['dim0'])
    b3 = DimArray([2, 4], axes=[[2,4]], dims=['dim0'])
    # got
    a3_got, b3_got = align([a, b], join="inner")
    # check
    assert_equal_dimarrays(a3, a3_got)
    assert_equal_dimarrays(b3, b3_got)
Esempio n. 4
0
def stack_ds(datasets, axis, keys=None, align=False, **kwargs):
    """ stack dataset along a new dimension

    Parameters
    ----------
    datasets: sequence or dict of datasets
    axis: str, new dimension along which to stack the dataset 
    keys, optional: stack axis values, useful if dataset is a sequence, or a non-ordered dictionary
    align, optional: if True, align axes (via reindexing) *prior* to stacking
    **kwargs : optional key-word arguments passed to align, if align is True

    Returns
    -------
    stacked dataset

    See Also
    --------
    concatenate_ds, stack, sort_axis

    Examples
    --------
    >>> a = DimArray([1,2,3], dims=('dima',))
    >>> b = DimArray([11,22], dims=('dimb',))
    >>> ds = Dataset(a=a,b=b) # dataset of 2 variables from an experiment
    >>> ds2 = Dataset(a=a*2,b=b*2) # dataset of 2 variables from a second experiment
    >>> stack_ds([ds, ds2], axis='stackdim', keys=['exp1','exp2'])
    Dataset of 2 variables
    0 / stackdim (2): 'exp1' to 'exp2'
    1 / dima (3): 0 to 2
    2 / dimb (2): 0 to 1
    a: ('stackdim', 'dima')
    b: ('stackdim', 'dimb')
    """
    # make a sequence of datasets
    datasets, keys = _check_stack_args(datasets, keys) 

    # make sure the stacking dimension is ok
    dims = _get_dims(*datasets)
    axis = _check_stack_axis(axis, dims) 

    if align:
        datasets = da.align(datasets, strict=True, **kwargs)

    # find the list of variables common to all datasets
    variables = None
    for ds in datasets:
        # check that stack axis is not already present
        assert axis not in ds.dims, axis+" already exists in the dataset" 

        # check that variables have the same variables
        if variables is None:
            variables = ds.keys()
        else:
            assert sorted(ds.keys()) == sorted(variables), "variables differ across datasets"

    # Compute stacked dataset
    dataset = Dataset()
    for v in variables:
        arrays = [ds[v] for ds in datasets]
        array = stack(arrays, axis=axis, keys=keys, align=False)
        dataset[v] = array

    return dataset