def test_align_unsorted(): a_sorted = DimArray([1, 2, 3, 4], axes=[[1, 2, 3, 4]], dims=['dim0']) a_mess = DimArray([3, 1, 4, 2], axes=[[3, 1, 4, 2]], dims=['dim0']) b_sorted = DimArray([11, 22, np.nan, 44], axes=[[1, 2, 3, 4]], dims=['dim0']) b_mess = DimArray([22, 44, 11], axes=[[2, 4, 1]], dims=['dim0']) # second array unsorted a_got, b_got = align([a_sorted, b_mess], join="outer") assert_equal_dimarrays(a_got, a_sorted) assert_equal_dimarrays(b_got, b_sorted) # first array unsorted b_got, a_got = align([b_mess, a_sorted], join="outer") assert not np.all(a_got.dim0 == a_sorted.dim0) # not equal because not ordered a_got = a_got.sort_axis() b_got = b_got.sort_axis() assert_equal_dimarrays(a_got, a_sorted) assert_equal_dimarrays(b_got, b_sorted) # do the same, but pass as command line a_got, b_got = align([a_sorted, b_mess], join="outer", sort=True) # two arrays unsorted a_got, b_got = align([a_mess, b_mess], join="outer", sort=True) assert_equal_dimarrays(a_got, a_sorted) assert_equal_dimarrays(b_got, b_sorted)
def concatenate_ds(datasets, axis=0, align=False, **kwargs): """ concatenate two datasets along an existing dimension Parameters ---------- datasets: sequence of datasets axis: axis along which to concatenate align, optional: if True, align secondary axes (via reindexing) prior to concatenating **kwargs : optional key-word arguments passed to align, if align is True Returns ------- joint Dataset along axis NOTE: will raise an error if variables are there which do not contain the required dimension See Also -------- stack_ds, concatenate, sort_axis Examples -------- >>> a = da.zeros(axes=[list('abc')], dims=('x0',)) # 1-D DimArray >>> b = da.zeros(axes=[list('abc'), [1,2]], dims=('x0','x1')) # 2-D DimArray >>> ds = Dataset(a=a,b=b) # dataset of 2 variables from an experiment >>> a2 = da.ones(axes=[list('def')], dims=('x0',)) >>> b2 = da.ones(axes=[list('def'), [1,2]], dims=('x0','x1')) # 2-D DimArray >>> ds2 = Dataset(a=a2,b=b2) # dataset of 2 variables from a second experiment >>> concatenate_ds([ds, ds2]) Dataset of 2 variables 0 / x0 (6): 'a' to 'f' 1 / x1 (2): 1 to 2 a: ('x0',) b: ('x0', 'x1') """ # find the list of variables common to all datasets variables = None for ds in datasets: # check that variables have the same variables if variables is None: variables = ds.keys() else: assert sorted(ds.keys()) == sorted(variables), "variables differ across datasets" if align: # all dataset axes axis_nm = datasets[0].axes[axis].name aligned_dims = [d for d in _get_dims(*datasets) if d != axis_nm] for d in aligned_dims: datasets = da.align(datasets, axis=d, strict=True, **kwargs) # Compute concatenated dataset dataset = Dataset() for v in variables: arrays = [ds[v] for ds in datasets] array = concatenate(arrays, axis=axis, align=False, _no_check=align) dataset[v] = array return dataset
def test_align(): " test slices in an increasing axis" a = DimArray([1, 2, 3, 4], axes=[[1, 2, 3, 4]], dims=['dim0']) b = DimArray([0, 2, 4, 6], axes=[[0, 2, 4, 6]], dims=['dim0']) # outer join # expected a2 = DimArray([np.nan, 1, 2, 3, 4, np.nan], axes=[[0, 1, 2, 3, 4, 6]], dims=['dim0']) b2 = DimArray([0, np.nan, 2, np.nan, 4, 6], axes=[[0, 1, 2, 3, 4, 6]], dims=['dim0']) # got a2_got, b2_got = align([a, b], join="outer") # check assert_equal_dimarrays(a2, a2_got) assert_equal_dimarrays(b2, b2_got) # inner join # expected a3 = DimArray([2, 4], axes=[[2,4]], dims=['dim0']) b3 = DimArray([2, 4], axes=[[2,4]], dims=['dim0']) # got a3_got, b3_got = align([a, b], join="inner") # check assert_equal_dimarrays(a3, a3_got) assert_equal_dimarrays(b3, b3_got)
def stack_ds(datasets, axis, keys=None, align=False, **kwargs): """ stack dataset along a new dimension Parameters ---------- datasets: sequence or dict of datasets axis: str, new dimension along which to stack the dataset keys, optional: stack axis values, useful if dataset is a sequence, or a non-ordered dictionary align, optional: if True, align axes (via reindexing) *prior* to stacking **kwargs : optional key-word arguments passed to align, if align is True Returns ------- stacked dataset See Also -------- concatenate_ds, stack, sort_axis Examples -------- >>> a = DimArray([1,2,3], dims=('dima',)) >>> b = DimArray([11,22], dims=('dimb',)) >>> ds = Dataset(a=a,b=b) # dataset of 2 variables from an experiment >>> ds2 = Dataset(a=a*2,b=b*2) # dataset of 2 variables from a second experiment >>> stack_ds([ds, ds2], axis='stackdim', keys=['exp1','exp2']) Dataset of 2 variables 0 / stackdim (2): 'exp1' to 'exp2' 1 / dima (3): 0 to 2 2 / dimb (2): 0 to 1 a: ('stackdim', 'dima') b: ('stackdim', 'dimb') """ # make a sequence of datasets datasets, keys = _check_stack_args(datasets, keys) # make sure the stacking dimension is ok dims = _get_dims(*datasets) axis = _check_stack_axis(axis, dims) if align: datasets = da.align(datasets, strict=True, **kwargs) # find the list of variables common to all datasets variables = None for ds in datasets: # check that stack axis is not already present assert axis not in ds.dims, axis+" already exists in the dataset" # check that variables have the same variables if variables is None: variables = ds.keys() else: assert sorted(ds.keys()) == sorted(variables), "variables differ across datasets" # Compute stacked dataset dataset = Dataset() for v in variables: arrays = [ds[v] for ds in datasets] array = stack(arrays, axis=axis, keys=keys, align=False) dataset[v] = array return dataset