def assert_allclose(a, b, rtol=1e-05, atol=1e-08, decode_bytes=True): """Like :py:func:`numpy.testing.assert_allclose`, but for xarray objects. Raises an AssertionError if two objects are not equal up to desired tolerance. Parameters ---------- a : xarray.Dataset, xarray.DataArray or xarray.Variable The first object to compare. b : xarray.Dataset, xarray.DataArray or xarray.Variable The second object to compare. rtol : float, optional Relative tolerance. atol : float, optional Absolute tolerance. decode_bytes : bool, optional Whether byte dtypes should be decoded to strings as UTF-8 or not. This is useful for testing serialization methods on Python 3 that return saved strings as bytes. See also -------- assert_identical, assert_equal, numpy.testing.assert_allclose """ __tracebackhide__ = True assert type(a) == type(b) equiv = functools.partial(_data_allclose_or_equiv, rtol=rtol, atol=atol, decode_bytes=decode_bytes) equiv.__name__ = "allclose" def compat_variable(a, b): a = getattr(a, "variable", a) b = getattr(b, "variable", b) return a.dims == b.dims and (a._data is b._data or equiv(a.data, b.data)) if isinstance(a, Variable): allclose = compat_variable(a, b) assert allclose, formatting.diff_array_repr(a, b, compat=equiv) elif isinstance(a, DataArray): allclose = utils.dict_equiv( a.coords, b.coords, compat=compat_variable) and compat_variable( a.variable, b.variable) assert allclose, formatting.diff_array_repr(a, b, compat=equiv) elif isinstance(a, Dataset): allclose = a._coord_names == b._coord_names and utils.dict_equiv( a.variables, b.variables, compat=compat_variable) assert allclose, formatting.diff_dataset_repr(a, b, compat=equiv) else: raise TypeError("{} not supported by assertion comparison".format( type(a)))
def test_to_dataset(self, tmpdir_factory, bout_xyt_example_files): path = bout_xyt_example_files(tmpdir_factory, nxpe=3, nype=4, nt=1) ds = open_boutdataset(datapath=path, inputfilepath=None, keep_xboundaries=False) da = ds['n'] new_ds = da.bout.to_dataset() assert dict_equiv(ds.attrs, new_ds.attrs) assert dict_equiv(ds.metadata, new_ds.metadata)
def test_to_dataset(self, bout_xyt_example_files): dataset_list = bout_xyt_example_files(None, nxpe=3, nype=4, nt=1) with pytest.warns(UserWarning): ds = open_boutdataset( datapath=dataset_list, inputfilepath=None, keep_xboundaries=False ) da = ds["n"] new_ds = da.bout.to_dataset() assert dict_equiv(ds.attrs, new_ds.attrs) assert dict_equiv(ds.metadata, new_ds.metadata)
def test_dict_equiv(self): x = OrderedDict() x['a'] = 3 x['b'] = np.array([1, 2, 3]) y = OrderedDict() y['b'] = np.array([1.0, 2.0, 3.0]) y['a'] = 3 self.assertTrue(utils.dict_equiv(x, y)) # two nparrays are equal y['b'] = [1, 2, 3] # np.array not the same as a list self.assertTrue(utils.dict_equiv(x, y)) # nparray == list x['b'] = [1.0, 2.0, 3.0] self.assertTrue(utils.dict_equiv(x, y)) # list vs. list x['c'] = None self.assertFalse(utils.dict_equiv(x, y)) # new key in x x['c'] = np.nan y['c'] = np.nan self.assertTrue(utils.dict_equiv(x, y)) # as intended, nan is nan x['c'] = np.inf y['c'] = np.inf self.assertTrue(utils.dict_equiv(x, y)) # inf == inf y = dict(y) self.assertTrue(utils.dict_equiv( x, y)) # different dictionary types are fine y['b'] = 3 * np.arange(3) self.assertFalse(utils.dict_equiv(x, y)) # not equal when arrays differ
def assert_variable_attrs_equal(self, ds_a, ds_b): # Does not test whether ds_a and ds_b have the same set of variables for var_name in ds_a.variables.keys(): attrs_a = ds_a[var_name].attrs attrs_b = ds_b[var_name].attrs assert dict_equiv(attrs_a, attrs_b), diff_attrs_repr( attrs_a, attrs_b, "identical")
def assert_global_attrs_close(self, attrs_a, attrs_b): attrs_a = attrs_a.copy() attrs_b = attrs_b.copy() self._assert_time_attrs_close(attrs_a, attrs_b) self._assert_numerical_attrs_close(attrs_a, attrs_b) assert dict_equiv(attrs_a, attrs_b), diff_attrs_repr(attrs_a, attrs_b, "identical")
def test_dict_equiv(self): x = OrderedDict() x["a"] = 3 x["b"] = np.array([1, 2, 3]) y = OrderedDict() y["b"] = np.array([1.0, 2.0, 3.0]) y["a"] = 3 assert utils.dict_equiv(x, y) # two nparrays are equal y["b"] = [1, 2, 3] # np.array not the same as a list assert utils.dict_equiv(x, y) # nparray == list x["b"] = [1.0, 2.0, 3.0] assert utils.dict_equiv(x, y) # list vs. list x["c"] = None assert not utils.dict_equiv(x, y) # new key in x x["c"] = np.nan y["c"] = np.nan assert utils.dict_equiv(x, y) # as intended, nan is nan x["c"] = np.inf y["c"] = np.inf assert utils.dict_equiv(x, y) # inf == inf y = dict(y) assert utils.dict_equiv(x, y) # different dictionary types are fine y["b"] = 3 * np.arange(3) assert not utils.dict_equiv(x, y) # not equal when arrays differ
def test_dict_equiv(self): x = OrderedDict() x['a'] = 3 x['b'] = np.array([1, 2, 3]) y = OrderedDict() y['b'] = np.array([1.0, 2.0, 3.0]) y['a'] = 3 assert utils.dict_equiv(x, y) # two nparrays are equal y['b'] = [1, 2, 3] # np.array not the same as a list assert utils.dict_equiv(x, y) # nparray == list x['b'] = [1.0, 2.0, 3.0] assert utils.dict_equiv(x, y) # list vs. list x['c'] = None assert not utils.dict_equiv(x, y) # new key in x x['c'] = np.nan y['c'] = np.nan assert utils.dict_equiv(x, y) # as intended, nan is nan x['c'] = np.inf y['c'] = np.inf assert utils.dict_equiv(x, y) # inf == inf y = dict(y) assert utils.dict_equiv(x, y) # different dictionary types are fine y['b'] = 3 * np.arange(3) assert not utils.dict_equiv(x, y) # not equal when arrays differ
def test_dict_equiv(self): x = OrderedDict() x["a"] = 3 x["b"] = np.array([1, 2, 3]) y = OrderedDict() y["b"] = np.array([1.0, 2.0, 3.0]) y["a"] = 3 self.assertTrue(utils.dict_equiv(x, y)) # two nparrays are equal y["b"] = [1, 2, 3] # np.array not the same as a list self.assertTrue(utils.dict_equiv(x, y)) # nparray == list x["b"] = [1.0, 2.0, 3.0] self.assertTrue(utils.dict_equiv(x, y)) # list vs. list x["c"] = None self.assertFalse(utils.dict_equiv(x, y)) # new key in x x["c"] = np.nan y["c"] = np.nan self.assertTrue(utils.dict_equiv(x, y)) # as intended, nan is nan x["c"] = np.inf y["c"] = np.inf self.assertTrue(utils.dict_equiv(x, y)) # inf == inf y = dict(y) self.assertTrue(utils.dict_equiv(x, y)) # different dictionary types are fine y["b"] = 3 * np.arange(3) self.assertFalse(utils.dict_equiv(x, y)) # not equal when arrays differ
def _dataset_multi_concat( datasets, dim, data_vars, coords, compat, positions, join="outer", ): """ Concatenate a sequence of datasets along a dimension, trying concatenation along alternate dimensions when the chosen dimension is not present. This function is based on _dataset_concat from xarray.core.concat.py in xarray 0.15. It includes a modification to drop mismatched coordinates from datasets instead of throwing a ValueError. This drop removes the variable from coordinates, but it remains a variable in the dataset. """ # Make sure we're working on a copy (we'll be loading variables) datasets = [ds.copy() for ds in datasets] # determine what dimensions we will be concatenating over, including the preferred dim and any alternatives when # the preferred dim is absent dims = _find_concat_dims(datasets, dim) dims, coordinates = _calc_concat_dims_coords(dims) datasets = align(*datasets, join=join, copy=False, exclude=dims) dim_coords, dims_sizes, coord_names, data_names = _parse_datasets(datasets) dim_names = set(dim_coords) unlabeled_dims = dim_names - coord_names both_data_and_coords = coord_names & data_names if both_data_and_coords: # Instead of throwing a ValueError, make the coordinates match by removing the mismatched coordinate for ds in datasets: for variable in both_data_and_coords: if variable in ds.coords: # This makes the variable no longer a coordinate, but does not remove it from the dataset entirely ds._coord_names.remove(variable) coord_names.discard(variable) # we don't want the concat dimensions in the result dataset yet for dim in dims: dim_coords.pop(dim, None) dims_sizes.pop(dim, None) # case where concat dimension is a coordinate or data_var but not a dimension if (dim in coord_names or dim in data_names) and dim not in dim_names: datasets = [ds.expand_dims(dim) for ds in datasets] # determine which variables to concatenate concat_over, equals, concat_dim_lengths = _calc_concat_over( datasets, dims, dim_names, data_vars, coords, compat) # determine which variables to merge, and then merge them according to compat variables_to_merge = (coord_names | data_names) - concat_over - dim_names result_vars = {} if variables_to_merge: to_merge = {var: [] for var in variables_to_merge} for ds in datasets: for var in variables_to_merge: if var in ds: to_merge[var].append(ds.variables[var]) for var in variables_to_merge: result_vars[var] = unique_variable(var, to_merge[var], compat=compat, equals=equals.get(var, None)) else: result_vars = {} result_vars.update(dim_coords) # assign attrs and encoding from first dataset result_attrs = datasets[0].attrs result_encoding = datasets[0].encoding # check that global attributes are fixed across all datasets if necessary for ds in datasets[1:]: if compat == "identical" and not utils.dict_equiv( ds.attrs, result_attrs): raise ValueError("Dataset global attributes not equal.") # we've already verified everything is consistent; now, calculate # shared dimension sizes so we can expand the necessary variables def ensure_common_dims(vars): # ensure each variable with the given name shares the same # dimensions and the same shape for all of them except along the # concat dimension common_dims = tuple(pd.unique([d for v in vars for d in v.dims])) # find the first concat dimension available in vars concat_dim = [x for x in dims if x in common_dims][0] if not concat_dim: # none of the concat dims are present - add the first one dim = dims[0] common_dims = (dim, ) + common_dims concat_dim = dim for var, dim_len in zip(vars, concat_dim_lengths[concat_dim]): if var.dims != common_dims: common_shape = tuple( dims_sizes.get(d, dim_len) for d in common_dims) var = var.expand_dims(common_dims, common_shape) yield var # stack up each variable to fill-out the dataset (in order) # n.b. this loop preserves variable order, needed for groupby. for k in datasets[0].variables: if k in concat_over: try: vars = ensure_common_dims([ds.variables[k] for ds in datasets]) except KeyError: raise ValueError("%r is not present in all datasets." % k) # get the dimension to concatenate this variable on - choose first applicable dim from dims dim = _get_concat_dim(dims, [ds.variables[k] for ds in datasets]) combined = concat_vars(vars, dim, positions) assert isinstance(combined, Variable) result_vars[k] = combined result = Dataset(result_vars, attrs=result_attrs) absent_coord_names = coord_names - set(result.variables) if absent_coord_names: raise ValueError( "Variables %r are coordinates in some datasets but not others." % absent_coord_names) # current versions of dataset.set_coords and dataset.drop force a _assert_all_in_dataset check that we don't want # xarray 0.15 has the option to disable this via errors='ignore', but for now just call the underlying logic #result = result.set_coords(coord_names, errors='ignore') result._coord_names.update(coord_names) result.encoding = result_encoding #result = result.drop(unlabeled_dims, errors='ignore') drop = set(unlabeled_dims) variables = OrderedDict( (k, v) for k, v in iteritems(result._variables) if k not in drop) coord_names = set(k for k in result._coord_names if k in variables) result._replace_vars_and_dims(variables, coord_names) for coord in coordinates: if coord: # add concat dimension last to ensure that its in the final Dataset result[coord.name] = coord return result