class TestCombineAuto: def test_combine_by_coords(self): objs = [Dataset({'x': [0]}), Dataset({'x': [1]})] actual = combine_by_coords(objs) expected = Dataset({'x': [0, 1]}) assert_identical(expected, actual) actual = combine_by_coords([actual]) assert_identical(expected, actual) objs = [Dataset({'x': [0, 1]}), Dataset({'x': [2]})] actual = combine_by_coords(objs) expected = Dataset({'x': [0, 1, 2]}) assert_identical(expected, actual) # ensure auto_combine handles non-sorted variables objs = [ Dataset({ 'x': ('a', [0]), 'y': ('a', [0]), 'a': [0] }), Dataset({ 'x': ('a', [1]), 'y': ('a', [1]), 'a': [1] }) ] actual = combine_by_coords(objs) expected = Dataset({ 'x': ('a', [0, 1]), 'y': ('a', [0, 1]), 'a': [0, 1] }) assert_identical(expected, actual) objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'y': [1], 'x': [1]})] actual = combine_by_coords(objs) expected = Dataset({'x': [0, 1], 'y': [0, 1]}) assert_equal(actual, expected) objs = [Dataset({'x': 0}), Dataset({'x': 1})] with raises_regex(ValueError, 'Could not find any dimension ' 'coordinates'): combine_by_coords(objs) objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [0]})] with raises_regex(ValueError, 'Every dimension needs a coordinate'): combine_by_coords(objs) def test_empty_input(self): assert_identical(Dataset(), combine_by_coords([])) @pytest.mark.parametrize("join, expected", [ ('outer', Dataset({ 'x': [0, 1], 'y': [0, 1] })), ('inner', Dataset({ 'x': [0, 1], 'y': [] })), ('left', Dataset({ 'x': [0, 1], 'y': [0] })), ('right', Dataset({ 'x': [0, 1], 'y': [1] })), ]) def test_combine_coords_join(self, join, expected): objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [1], 'y': [1]})] actual = combine_nested(objs, concat_dim='x', join=join) assert_identical(expected, actual) def test_combine_coords_join_exact(self): objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [1], 'y': [1]})] with raises_regex(ValueError, 'indexes along dimension'): combine_nested(objs, concat_dim='x', join='exact') def test_infer_order_from_coords(self): data = create_test_data() objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))] actual = combine_by_coords(objs) expected = data assert expected.broadcast_equals(actual) def test_combine_leaving_bystander_dimensions(self): # Check non-monotonic bystander dimension coord doesn't raise # ValueError on combine (https://github.com/pydata/xarray/issues/3150) ycoord = ['a', 'c', 'b'] data = np.random.rand(7, 3) ds1 = Dataset(data_vars=dict(data=(['x', 'y'], data[:3, :])), coords=dict(x=[1, 2, 3], y=ycoord)) ds2 = Dataset(data_vars=dict(data=(['x', 'y'], data[3:, :])), coords=dict(x=[4, 5, 6, 7], y=ycoord)) expected = Dataset(data_vars=dict(data=(['x', 'y'], data)), coords=dict(x=[1, 2, 3, 4, 5, 6, 7], y=ycoord)) actual = combine_by_coords((ds1, ds2)) assert_identical(expected, actual) def test_combine_by_coords_previously_failed(self): # In the above scenario, one file is missing, containing the data for # one year's data for one variable. datasets = [ Dataset({ 'a': ('x', [0]), 'x': [0] }), Dataset({ 'b': ('x', [0]), 'x': [0] }), Dataset({ 'a': ('x', [1]), 'x': [1] }) ] expected = Dataset({ 'a': ('x', [0, 1]), 'b': ('x', [0, np.nan]) }, {'x': [0, 1]}) actual = combine_by_coords(datasets) assert_identical(expected, actual) def test_combine_by_coords_still_fails(self): # concat can't handle new variables (yet): # https://github.com/pydata/xarray/issues/508 datasets = [ Dataset({'x': 0}, {'y': 0}), Dataset({'x': 1}, { 'y': 1, 'z': 1 }) ] with pytest.raises(ValueError): combine_by_coords(datasets, 'y') def test_combine_by_coords_no_concat(self): objs = [Dataset({'x': 0}), Dataset({'y': 1})] actual = combine_by_coords(objs) expected = Dataset({'x': 0, 'y': 1}) assert_identical(expected, actual) objs = [Dataset({'x': 0, 'y': 1}), Dataset({'y': np.nan, 'z': 2})] actual = combine_by_coords(objs) expected = Dataset({'x': 0, 'y': 1, 'z': 2}) assert_identical(expected, actual) def test_check_for_impossible_ordering(self): ds0 = Dataset({'x': [0, 1, 5]}) ds1 = Dataset({'x': [2, 3]}) with raises_regex( ValueError, "does not have monotonic global indexes" " along dimension x"): combine_by_coords([ds1, ds0])
def test_invalid_time_units_raises_eagerly(self): ds = Dataset({'time': ('time', [0, 1], {'units': 'foobar since 123'})}) with raises_regex(ValueError, 'unable to decode time'): decode_cf(ds)
def test_lazy_dataset(self): lazy_ds = Dataset({'foo': (('x', 'y'), self.data)}) assert isinstance(lazy_ds.foo.variable.data, da.Array)
def test_combine_coords_join(self, join, expected): objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})] actual = combine_nested(objs, concat_dim="x", join=join) assert_identical(expected, actual)
def merge_datasets( datasets: Iterable[Dataset], bounds: Union[Tuple, None] = None, res: Union[Tuple, None] = None, nodata: Union[float, None] = None, precision: Union[float, None] = None, method: Union[str, Callable, None] = None, ) -> DataArray: """ Merge datasets geospatially. Uses rasterio.merge.merge: https://rasterio.readthedocs.io/en/stable/api/rasterio.merge.html#rasterio.merge.merge Parameters ---------- datasets: list List of xarray.Dataset's with all geo attributes. The first one is assumed to have the same CRS, dtype, dimensions, and data_vars as the others in the array. bounds: tuple, optional Bounds of the output image (left, bottom, right, top). If not set, bounds are determined from bounds of input Dataset. res: tuple, optional Output resolution in units of coordinate reference system. If not set, the resolution of the first Dataset is used. If a single value is passed, output pixels will be square. nodata: float, optional nodata value to use in output file. If not set, uses the nodata value in the first input Dataset. precision: float, optional Number of decimal points of precision when computing inverse transform. method: str or callable, optional See rasterio docs. Returns ------- :obj:`xarray.Dataset`: The geospatially merged data. """ representative_ds = datasets[0] merged_data = {} for data_var in representative_ds.data_vars: merged_data[data_var] = merge_arrays( [dataset[data_var] for dataset in datasets], bounds=bounds, res=res, nodata=nodata, precision=precision, method=method, parse_coordinates=False, ) data_var = list(representative_ds.data_vars)[0] xds = Dataset( merged_data, coords=_make_coords( merged_data[data_var], merged_data[data_var].rio.transform(), merged_data[data_var].shape[-1], merged_data[data_var].shape[-2], ), attrs=representative_ds.attrs, ) xds.rio.write_crs(representative_ds.rio.crs, inplace=True) return xds
def test_nested_concat_too_many_dims_at_once(self): objs = [Dataset({"x": [0], "y": [1]}), Dataset({"y": [0], "x": [1]})] with pytest.raises(ValueError, match="not equal across datasets"): combine_nested(objs, concat_dim="x", coords="minimal")
def test_combine_by_coords(self): objs = [Dataset({"x": [0]}), Dataset({"x": [1]})] actual = combine_by_coords(objs) expected = Dataset({"x": [0, 1]}) assert_identical(expected, actual) actual = combine_by_coords([actual]) assert_identical(expected, actual) objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})] actual = combine_by_coords(objs) expected = Dataset({"x": [0, 1, 2]}) assert_identical(expected, actual) # ensure auto_combine handles non-sorted variables objs = [ Dataset({ "x": ("a", [0]), "y": ("a", [0]), "a": [0] }), Dataset({ "x": ("a", [1]), "y": ("a", [1]), "a": [1] }), ] actual = combine_by_coords(objs) expected = Dataset({ "x": ("a", [0, 1]), "y": ("a", [0, 1]), "a": [0, 1] }) assert_identical(expected, actual) objs = [Dataset({"x": [0], "y": [0]}), Dataset({"y": [1], "x": [1]})] actual = combine_by_coords(objs) expected = Dataset({"x": [0, 1], "y": [0, 1]}) assert_equal(actual, expected) objs = [Dataset({"x": 0}), Dataset({"x": 1})] with raises_regex(ValueError, "Could not find any dimension coordinates"): combine_by_coords(objs) objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})] with raises_regex(ValueError, "Every dimension needs a coordinate"): combine_by_coords(objs) def test_empty_input(self): assert_identical(Dataset(), combine_by_coords([]))
def test_auto_combine_without_coords(self): objs = [Dataset({'foo': ('x', [0])}), Dataset({'foo': ('x', [1])})] with pytest.warns(FutureWarning, match="supplied do not have global"): auto_combine(objs)
def test_min_count_dataset(func): da = construct_dataarray(2, dtype=float, contains_nan=True, dask=False) ds = Dataset({"var1": da}, coords={"scalar": 0}) actual = getattr(ds, func)(dim="x", skipna=True, min_count=3)["var1"] expected = getattr(ds["var1"], func)(dim="x", skipna=True, min_count=3) assert_allclose(actual, expected)
def test_auto_combine_no_concat(self): objs = [Dataset({'x': 0}), Dataset({'y': 1})] actual = auto_combine(objs) expected = Dataset({'x': 0, 'y': 1}) assert_identical(expected, actual) objs = [Dataset({'x': 0, 'y': 1}), Dataset({'y': np.nan, 'z': 2})] actual = auto_combine(objs) expected = Dataset({'x': 0, 'y': 1, 'z': 2}) assert_identical(expected, actual) data = Dataset({'x': 0}) actual = auto_combine([data, data, data], concat_dim=None) assert_identical(data, actual) # Single object, with a concat_dim explicitly provided # Test the issue reported in GH #1988 objs = [Dataset({'x': 0, 'y': 1})] dim = DataArray([100], name='baz', dims='baz') actual = auto_combine(objs, concat_dim=dim) expected = Dataset({ 'x': ('baz', [0]), 'y': ('baz', [1]) }, {'baz': [100]}) assert_identical(expected, actual) # Just making sure that auto_combine is doing what is # expected for non-scalar values, too. objs = [Dataset({'x': ('z', [0, 1]), 'y': ('z', [1, 2])})] dim = DataArray([100], name='baz', dims='baz') actual = auto_combine(objs, concat_dim=dim) expected = Dataset( { 'x': (('baz', 'z'), [[0, 1]]), 'y': (('baz', 'z'), [[1, 2]]) }, {'baz': [100]}) assert_identical(expected, actual)
def test_auto_combine_with_concat_dim(self): objs = [Dataset({'x': [0]}), Dataset({'x': [1]})] with pytest.warns(FutureWarning, match="`concat_dim`"): auto_combine(objs, concat_dim='x')
def test_auto_combine(self): objs = [Dataset({'x': [0]}), Dataset({'x': [1]})] actual = auto_combine(objs) expected = Dataset({'x': [0, 1]}) assert_identical(expected, actual) actual = auto_combine([actual]) assert_identical(expected, actual) objs = [Dataset({'x': [0, 1]}), Dataset({'x': [2]})] actual = auto_combine(objs) expected = Dataset({'x': [0, 1, 2]}) assert_identical(expected, actual) # ensure auto_combine handles non-sorted variables objs = [ Dataset(OrderedDict([('x', ('a', [0])), ('y', ('a', [0]))])), Dataset(OrderedDict([('y', ('a', [1])), ('x', ('a', [1]))])) ] actual = auto_combine(objs) expected = Dataset({'x': ('a', [0, 1]), 'y': ('a', [0, 1])}) assert_identical(expected, actual) objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'y': [1], 'x': [1]})] with raises_regex(ValueError, 'too many .* dimensions'): auto_combine(objs) objs = [Dataset({'x': 0}), Dataset({'x': 1})] with raises_regex(ValueError, 'cannot infer dimension'): auto_combine(objs) objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [0]})] with pytest.raises(KeyError): auto_combine(objs)
def test_combine_coords_join_exact(self): objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [1], 'y': [1]})] with raises_regex(ValueError, 'indexes along dimension'): combine_nested(objs, concat_dim='x', join='exact')
def test_combine_by_coords(self): objs = [Dataset({'x': [0]}), Dataset({'x': [1]})] actual = combine_by_coords(objs) expected = Dataset({'x': [0, 1]}) assert_identical(expected, actual) actual = combine_by_coords([actual]) assert_identical(expected, actual) objs = [Dataset({'x': [0, 1]}), Dataset({'x': [2]})] actual = combine_by_coords(objs) expected = Dataset({'x': [0, 1, 2]}) assert_identical(expected, actual) # ensure auto_combine handles non-sorted variables objs = [ Dataset({ 'x': ('a', [0]), 'y': ('a', [0]), 'a': [0] }), Dataset({ 'x': ('a', [1]), 'y': ('a', [1]), 'a': [1] }) ] actual = combine_by_coords(objs) expected = Dataset({ 'x': ('a', [0, 1]), 'y': ('a', [0, 1]), 'a': [0, 1] }) assert_identical(expected, actual) objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'y': [1], 'x': [1]})] actual = combine_by_coords(objs) expected = Dataset({'x': [0, 1], 'y': [0, 1]}) assert_equal(actual, expected) objs = [Dataset({'x': 0}), Dataset({'x': 1})] with raises_regex(ValueError, 'Could not find any dimension ' 'coordinates'): combine_by_coords(objs) objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [0]})] with raises_regex(ValueError, 'Every dimension needs a coordinate'): combine_by_coords(objs) def test_empty_input(self): assert_identical(Dataset(), combine_by_coords([]))
def test_nested_concat(self): objs = [Dataset({"x": [0]}), Dataset({"x": [1]})] expected = Dataset({"x": [0, 1]}) actual = combine_nested(objs, concat_dim="x") assert_identical(expected, actual) actual = combine_nested(objs, concat_dim=["x"]) assert_identical(expected, actual) actual = combine_nested([actual], concat_dim=None) assert_identical(expected, actual) actual = combine_nested([actual], concat_dim="x") assert_identical(expected, actual) objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})] actual = combine_nested(objs, concat_dim="x") expected = Dataset({"x": [0, 1, 2]}) assert_identical(expected, actual) # ensure combine_nested handles non-sorted variables objs = [ Dataset({ "x": ("a", [0]), "y": ("a", [0]) }), Dataset({ "y": ("a", [1]), "x": ("a", [1]) }), ] actual = combine_nested(objs, concat_dim="a") expected = Dataset({"x": ("a", [0, 1]), "y": ("a", [0, 1])}) assert_identical(expected, actual) objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1]})] actual = combine_nested(objs, concat_dim="x") expected = Dataset({"x": [0, 1], "y": [0]}) assert_identical(expected, actual)
def test_invalid_coordinates(self): # regression test for GH308 original = Dataset({"foo": ("t", [1, 2], {"coordinates": "invalid"})}) actual = conventions.decode_cf(original) assert_identical(original, actual)
def test_empty_input(self): assert_identical(Dataset(), combine_nested([], concat_dim="x"))
def test_invalid_time_units_raises_eagerly(self): ds = Dataset({"time": ("time", [0, 1], {"units": "foobar since 123"})}) with raises_regex(ValueError, "unable to decode time"): decode_cf(ds)
class TestCombineAuto: def test_combine_by_coords(self): objs = [Dataset({"x": [0]}), Dataset({"x": [1]})] actual = combine_by_coords(objs) expected = Dataset({"x": [0, 1]}) assert_identical(expected, actual) actual = combine_by_coords([actual]) assert_identical(expected, actual) objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})] actual = combine_by_coords(objs) expected = Dataset({"x": [0, 1, 2]}) assert_identical(expected, actual) # ensure auto_combine handles non-sorted variables objs = [ Dataset({ "x": ("a", [0]), "y": ("a", [0]), "a": [0] }), Dataset({ "x": ("a", [1]), "y": ("a", [1]), "a": [1] }), ] actual = combine_by_coords(objs) expected = Dataset({ "x": ("a", [0, 1]), "y": ("a", [0, 1]), "a": [0, 1] }) assert_identical(expected, actual) objs = [Dataset({"x": [0], "y": [0]}), Dataset({"y": [1], "x": [1]})] actual = combine_by_coords(objs) expected = Dataset({"x": [0, 1], "y": [0, 1]}) assert_equal(actual, expected) objs = [Dataset({"x": 0}), Dataset({"x": 1})] with raises_regex(ValueError, "Could not find any dimension coordinates"): combine_by_coords(objs) objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})] with raises_regex(ValueError, "Every dimension needs a coordinate"): combine_by_coords(objs) def test_empty_input(self): assert_identical(Dataset(), combine_by_coords([])) @pytest.mark.parametrize( "join, expected", [ ("outer", Dataset({ "x": [0, 1], "y": [0, 1] })), ("inner", Dataset({ "x": [0, 1], "y": [] })), ("left", Dataset({ "x": [0, 1], "y": [0] })), ("right", Dataset({ "x": [0, 1], "y": [1] })), ], ) def test_combine_coords_join(self, join, expected): objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})] actual = combine_nested(objs, concat_dim="x", join=join) assert_identical(expected, actual) def test_combine_coords_join_exact(self): objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})] with raises_regex(ValueError, "indexes along dimension"): combine_nested(objs, concat_dim="x", join="exact") @pytest.mark.parametrize( "combine_attrs, expected", [ ("drop", Dataset({ "x": [0, 1], "y": [0, 1] }, attrs={})), ( "no_conflicts", Dataset({ "x": [0, 1], "y": [0, 1] }, attrs={ "a": 1, "b": 2 }), ), ("override", Dataset({ "x": [0, 1], "y": [0, 1] }, attrs={"a": 1})), ], ) def test_combine_coords_combine_attrs(self, combine_attrs, expected): objs = [ Dataset({ "x": [0], "y": [0] }, attrs={"a": 1}), Dataset({ "x": [1], "y": [1] }, attrs={ "a": 1, "b": 2 }), ] actual = combine_nested(objs, concat_dim="x", join="outer", combine_attrs=combine_attrs) assert_identical(expected, actual) if combine_attrs == "no_conflicts": objs[1].attrs["a"] = 2 with raises_regex(ValueError, "combine_attrs='no_conflicts'"): actual = combine_nested(objs, concat_dim="x", join="outer", combine_attrs=combine_attrs) def test_combine_coords_combine_attrs_identical(self): objs = [ Dataset({ "x": [0], "y": [0] }, attrs={"a": 1}), Dataset({ "x": [1], "y": [1] }, attrs={"a": 1}), ] expected = Dataset({"x": [0, 1], "y": [0, 1]}, attrs={"a": 1}) actual = combine_nested(objs, concat_dim="x", join="outer", combine_attrs="identical") assert_identical(expected, actual) objs[1].attrs["b"] = 2 with raises_regex(ValueError, "combine_attrs='identical'"): actual = combine_nested(objs, concat_dim="x", join="outer", combine_attrs="identical") def test_infer_order_from_coords(self): data = create_test_data() objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))] actual = combine_by_coords(objs) expected = data assert expected.broadcast_equals(actual) def test_combine_leaving_bystander_dimensions(self): # Check non-monotonic bystander dimension coord doesn't raise # ValueError on combine (https://github.com/pydata/xarray/issues/3150) ycoord = ["a", "c", "b"] data = np.random.rand(7, 3) ds1 = Dataset( data_vars=dict(data=(["x", "y"], data[:3, :])), coords=dict(x=[1, 2, 3], y=ycoord), ) ds2 = Dataset( data_vars=dict(data=(["x", "y"], data[3:, :])), coords=dict(x=[4, 5, 6, 7], y=ycoord), ) expected = Dataset( data_vars=dict(data=(["x", "y"], data)), coords=dict(x=[1, 2, 3, 4, 5, 6, 7], y=ycoord), ) actual = combine_by_coords((ds1, ds2)) assert_identical(expected, actual) def test_combine_by_coords_previously_failed(self): # In the above scenario, one file is missing, containing the data for # one year's data for one variable. datasets = [ Dataset({ "a": ("x", [0]), "x": [0] }), Dataset({ "b": ("x", [0]), "x": [0] }), Dataset({ "a": ("x", [1]), "x": [1] }), ] expected = Dataset({ "a": ("x", [0, 1]), "b": ("x", [0, np.nan]) }, {"x": [0, 1]}) actual = combine_by_coords(datasets) assert_identical(expected, actual) def test_combine_by_coords_still_fails(self): # concat can't handle new variables (yet): # https://github.com/pydata/xarray/issues/508 datasets = [ Dataset({"x": 0}, {"y": 0}), Dataset({"x": 1}, { "y": 1, "z": 1 }) ] with pytest.raises(ValueError): combine_by_coords(datasets, "y") def test_combine_by_coords_no_concat(self): objs = [Dataset({"x": 0}), Dataset({"y": 1})] actual = combine_by_coords(objs) expected = Dataset({"x": 0, "y": 1}) assert_identical(expected, actual) objs = [Dataset({"x": 0, "y": 1}), Dataset({"y": np.nan, "z": 2})] actual = combine_by_coords(objs) expected = Dataset({"x": 0, "y": 1, "z": 2}) assert_identical(expected, actual) def test_check_for_impossible_ordering(self): ds0 = Dataset({"x": [0, 1, 5]}) ds1 = Dataset({"x": [2, 3]}) with raises_regex( ValueError, "does not have monotonic global indexes" " along dimension x"): combine_by_coords([ds1, ds0]) def test_combine_by_coords_incomplete_hypercube(self): # test that this succeeds with default fill_value x1 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [0]}) x2 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [1], "x": [0]}) x3 = Dataset({"a": (("y", "x"), [[1]])}, coords={"y": [0], "x": [1]}) actual = combine_by_coords([x1, x2, x3]) expected = Dataset( {"a": (("y", "x"), [[1, 1], [1, np.nan]])}, coords={ "y": [0, 1], "x": [0, 1] }, ) assert_identical(expected, actual) # test that this fails if fill_value is None with pytest.raises(ValueError): combine_by_coords([x1, x2, x3], fill_value=None)
def test_lazy_dataset(self): lazy_ds = Dataset({"foo": (("x", "y"), self.data)}) assert isinstance(lazy_ds.foo.variable.data, da.Array)
def test_empty_input(self): assert_identical(Dataset(), combine_by_coords([]))
def _write_netcdf(self, ): ''' ''' data_dict = { 'sparce_objects': (('sparce_1d_indx', ), self.csr_data), 'single_layer_flag': (('allObjects_unq', ), self.single_layer_clouds), 'extent': ( ('allObjects_unq', 'stats'), self.objStatistics.extent_stats, ), 'top': ( ('allObjects_unq', 'stats'), self.objStatistics.topHeight[self.unq_msk].astype(float32) / 1000., ), 'base': ( ('allObjects_unq', 'stats'), self.objStatistics.baseHeight[self.unq_msk].astype(float32) / 1000., ), 'thickness': ( ('allObjects_unq', 'stats'), self.objStatistics.thickness[self.unq_msk].astype(float32) / 1000., ), 'lat_bounds': (('allObjects_unq', 'geo_bounds'), self.lat_bounds), 'lon_bounds': (('allObjects_unq', 'geo_bounds'), self.lon_bounds), } coords_dict = { 'sparce_1d_indx': self.sparce_flat_indx.astype(int32), 'allObjects_unq': self.unq_clds, 'cloudSat_shape': array(self.cloudObjects.shape), 'stats': [1, 2, 3, 4], } _outData = Dataset(data_vars=data_dict, coords=coords_dict) #_extent_offset,_extent_scale = self._add_scale_and_offset(_outData.extent.values) #_top_offset,_top_scale = self._add_scale_and_offset(_outData.top.values) #_base_offset,_base_scale = self._add_scale_and_offset(_outData.base.values) #_thick_offset,_thick_scale = self._add_scale_and_offset(_outData.thickness.values) _outData.cloudSat_shape.attrs = self.set_var_attributes( _outData.cloudSat_shape, description='Shape of cloudsat orbit', ) _outData.sparce_1d_indx.attrs = self.set_var_attributes( _outData.sparce_1d_indx, description='flattend indices of sparce_objects variable', ) _outData.sparce_objects.attrs = self.set_var_attributes( _outData.sparce_objects, description= 'cloud objects corresponding to 1d coordinates from 2d cloudsat field', ) _outData.allObjects_unq.attrs = self.set_var_attributes( _outData.allObjects_unq, description='unique list of cloud objects', ) _outData.stats.attrs = self.set_var_attributes( _outData.stats, col_1='min', col_2='mean', col_3='median', col_4='max', ) _outData.extent.attrs = self.set_var_attributes( _outData.extent, description='cloud object along-track extent', ) _outData.top.attrs = self.set_var_attributes( _outData.top, description='cloud object top height', ) _outData.base.attrs = self.set_var_attributes( _outData.base, description='cloud object base height', ) _outData.thickness.attrs = self.set_var_attributes( _outData.thickness, description='cloud object thickness', ) _outData.lat_bounds.attrs = self.set_var_attributes( _outData.lat_bounds, long_name='cloud object latitude bounds', units='degrees north', ) _outData.lon_bounds.attrs = self.set_var_attributes( _outData.lon_bounds, long_name='cloud object longitude bounds', units='degrees east', ) _outData.to_netcdf( self._output_file, format='NETCDF4', encoding={ 'sparce_objects': { 'dtype': 'int32' }, 'single_layer_flag': { 'dtype': 'uint8' }, 'stats': { 'dtype': 'uint8' }, 'cloudSat_shape': { 'dtype': 'uint16' }, 'extent': { 'dtype': 'float32', '_FillValue': -999., #'scale_factor' : _extent_scale, #'add_offset' : _extent_offset, }, 'top': { 'dtype': 'float32', '_FillValue': -999., #'scale_factor' : _top_scale, #'add_offset' : _top_offset, }, 'base': { 'dtype': 'float32', '_FillValue': -999., #'scale_factor' : _base_scale, #'add_offset' : _base_offset, }, 'thickness': { 'dtype': 'float32', '_FillValue': -999., #'scale_factor' : _thick_scale, #'add_offset' : _thick_offset, }, 'lon_bounds': { 'dtype': 'float32', '_FillValue': -999., #'scale_factor' : _thick_scale, #'add_offset' : _thick_offset, }, 'lat_bounds': { 'dtype': 'float32', '_FillValue': -999., #'scale_factor' : _thick_scale, #'add_offset' : _thick_offset, }, })
def test_combine_coords_join_exact(self): objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})] with raises_regex(ValueError, "indexes along dimension"): combine_nested(objs, concat_dim="x", join="exact")
def test_no_dimension_coords(self): ds0 = Dataset({"foo": ("x", [0, 1])}) ds1 = Dataset({"foo": ("x", [2, 3])}) with raises_regex(ValueError, "Could not find any dimension"): _infer_concat_order_from_coords([ds1, ds0])
def test_invalid_coordinates(self): # regression test for GH308 original = Dataset({'foo': ('t', [1, 2], {'coordinates': 'invalid'})}) actual = conventions.decode_cf(original) assert_identical(original, actual)
def test_no_concatenation_needed(self): ds = Dataset({"foo": ("x", [0, 1])}) expected = {(): ds} actual, concat_dims = _infer_concat_order_from_coords([ds]) assert_combined_tile_ids_equal(expected, actual) assert concat_dims == []
def test_remap_label_indexers(self): def test_indexer(data, x, expected_pos, expected_idx=None): pos, idx = indexing.remap_label_indexers(data, {"x": x}) assert_array_equal(pos.get("x"), expected_pos) assert_array_equal(idx.get("x"), expected_idx) data = Dataset({"x": ("x", [1, 2, 3])}) mindex = pd.MultiIndex.from_product([["a", "b"], [1, 2], [-1, -2]], names=("one", "two", "three")) mdata = DataArray(range(8), [("x", mindex)]) test_indexer(data, 1, 0) test_indexer(data, np.int32(1), 0) test_indexer(data, Variable([], 1), 0) test_indexer(mdata, ("a", 1, -1), 0) test_indexer( mdata, ("a", 1), [True, True, False, False, False, False, False, False], [-1, -2], ) test_indexer( mdata, "a", slice(0, 4, None), pd.MultiIndex.from_product([[1, 2], [-1, -2]]), ) test_indexer( mdata, ("a", ), [True, True, True, True, False, False, False, False], pd.MultiIndex.from_product([[1, 2], [-1, -2]]), ) test_indexer(mdata, [("a", 1, -1), ("b", 2, -2)], [0, 7]) test_indexer(mdata, slice("a", "b"), slice(0, 8, None)) test_indexer(mdata, slice(("a", 1), ("b", 1)), slice(0, 6, None)) test_indexer(mdata, {"one": "a", "two": 1, "three": -1}, 0) test_indexer( mdata, { "one": "a", "two": 1 }, [True, True, False, False, False, False, False, False], [-1, -2], ) test_indexer( mdata, { "one": "a", "three": -1 }, [True, False, True, False, False, False, False, False], [1, 2], ) test_indexer( mdata, {"one": "a"}, [True, True, True, True, False, False, False, False], pd.MultiIndex.from_product([[1, 2], [-1, -2]]), )
class TestNestedCombine: def test_nested_concat(self): objs = [Dataset({"x": [0]}), Dataset({"x": [1]})] expected = Dataset({"x": [0, 1]}) actual = combine_nested(objs, concat_dim="x") assert_identical(expected, actual) actual = combine_nested(objs, concat_dim=["x"]) assert_identical(expected, actual) actual = combine_nested([actual], concat_dim=None) assert_identical(expected, actual) actual = combine_nested([actual], concat_dim="x") assert_identical(expected, actual) objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})] actual = combine_nested(objs, concat_dim="x") expected = Dataset({"x": [0, 1, 2]}) assert_identical(expected, actual) # ensure combine_nested handles non-sorted variables objs = [ Dataset({ "x": ("a", [0]), "y": ("a", [0]) }), Dataset({ "y": ("a", [1]), "x": ("a", [1]) }), ] actual = combine_nested(objs, concat_dim="a") expected = Dataset({"x": ("a", [0, 1]), "y": ("a", [0, 1])}) assert_identical(expected, actual) objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1]})] actual = combine_nested(objs, concat_dim="x") expected = Dataset({"x": [0, 1], "y": [0]}) assert_identical(expected, actual) @pytest.mark.parametrize( "join, expected", [ ("outer", Dataset({ "x": [0, 1], "y": [0, 1] })), ("inner", Dataset({ "x": [0, 1], "y": [] })), ("left", Dataset({ "x": [0, 1], "y": [0] })), ("right", Dataset({ "x": [0, 1], "y": [1] })), ], ) def test_combine_nested_join(self, join, expected): objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})] actual = combine_nested(objs, concat_dim="x", join=join) assert_identical(expected, actual) def test_combine_nested_join_exact(self): objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1], "y": [1]})] with raises_regex(ValueError, "indexes along dimension"): combine_nested(objs, concat_dim="x", join="exact") def test_empty_input(self): assert_identical(Dataset(), combine_nested([], concat_dim="x")) # Fails because of concat's weird treatment of dimension coords, see #2975 @pytest.mark.xfail def test_nested_concat_too_many_dims_at_once(self): objs = [Dataset({"x": [0], "y": [1]}), Dataset({"y": [0], "x": [1]})] with pytest.raises(ValueError, match="not equal across datasets"): combine_nested(objs, concat_dim="x", coords="minimal") def test_nested_concat_along_new_dim(self): objs = [ Dataset({ "a": ("x", [10]), "x": [0] }), Dataset({ "a": ("x", [20]), "x": [0] }), ] expected = Dataset({"a": (("t", "x"), [[10], [20]]), "x": [0]}) actual = combine_nested(objs, concat_dim="t") assert_identical(expected, actual) # Same but with a DataArray as new dim, see GH #1988 and #2647 dim = DataArray([100, 150], name="baz", dims="baz") expected = Dataset({ "a": (("baz", "x"), [[10], [20]]), "x": [0], "baz": [100, 150] }) actual = combine_nested(objs, concat_dim=dim) assert_identical(expected, actual) def test_nested_merge(self): data = Dataset({"x": 0}) actual = combine_nested([data, data, data], concat_dim=None) assert_identical(data, actual) ds1 = Dataset({"a": ("x", [1, 2]), "x": [0, 1]}) ds2 = Dataset({"a": ("x", [2, 3]), "x": [1, 2]}) expected = Dataset({"a": ("x", [1, 2, 3]), "x": [0, 1, 2]}) actual = combine_nested([ds1, ds2], concat_dim=None) assert_identical(expected, actual) actual = combine_nested([ds1, ds2], concat_dim=[None]) assert_identical(expected, actual) tmp1 = Dataset({"x": 0}) tmp2 = Dataset({"x": np.nan}) actual = combine_nested([tmp1, tmp2], concat_dim=None) assert_identical(tmp1, actual) actual = combine_nested([tmp1, tmp2], concat_dim=[None]) assert_identical(tmp1, actual) # Single object, with a concat_dim explicitly provided # Test the issue reported in GH #1988 objs = [Dataset({"x": 0, "y": 1})] dim = DataArray([100], name="baz", dims="baz") actual = combine_nested(objs, concat_dim=[dim]) expected = Dataset({ "x": ("baz", [0]), "y": ("baz", [1]) }, {"baz": [100]}) assert_identical(expected, actual) # Just making sure that auto_combine is doing what is # expected for non-scalar values, too. objs = [Dataset({"x": ("z", [0, 1]), "y": ("z", [1, 2])})] dim = DataArray([100], name="baz", dims="baz") actual = combine_nested(objs, concat_dim=[dim]) expected = Dataset( { "x": (("baz", "z"), [[0, 1]]), "y": (("baz", "z"), [[1, 2]]) }, {"baz": [100]}, ) assert_identical(expected, actual) def test_concat_multiple_dims(self): objs = [ [ Dataset({"a": (("x", "y"), [[0]])}), Dataset({"a": (("x", "y"), [[1]])}) ], [ Dataset({"a": (("x", "y"), [[2]])}), Dataset({"a": (("x", "y"), [[3]])}) ], ] actual = combine_nested(objs, concat_dim=["x", "y"]) expected = Dataset({"a": (("x", "y"), [[0, 1], [2, 3]])}) assert_identical(expected, actual) def test_concat_name_symmetry(self): """Inspired by the discussion on GH issue #2777""" da1 = DataArray(name="a", data=[[0]], dims=["x", "y"]) da2 = DataArray(name="b", data=[[1]], dims=["x", "y"]) da3 = DataArray(name="a", data=[[2]], dims=["x", "y"]) da4 = DataArray(name="b", data=[[3]], dims=["x", "y"]) x_first = combine_nested([[da1, da2], [da3, da4]], concat_dim=["x", "y"]) y_first = combine_nested([[da1, da3], [da2, da4]], concat_dim=["y", "x"]) assert_identical(x_first, y_first) def test_concat_one_dim_merge_another(self): data = create_test_data() data1 = data.copy(deep=True) data2 = data.copy(deep=True) objs = [ [ data1.var1.isel(dim2=slice(4)), data2.var1.isel(dim2=slice(4, 9)) ], [ data1.var2.isel(dim2=slice(4)), data2.var2.isel(dim2=slice(4, 9)) ], ] expected = data[["var1", "var2"]] actual = combine_nested(objs, concat_dim=[None, "dim2"]) assert expected.identical(actual) def test_auto_combine_2d(self): ds = create_test_data partway1 = concat([ds(0), ds(3)], dim="dim1") partway2 = concat([ds(1), ds(4)], dim="dim1") partway3 = concat([ds(2), ds(5)], dim="dim1") expected = concat([partway1, partway2, partway3], dim="dim2") datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4), ds(5)]] result = combine_nested(datasets, concat_dim=["dim1", "dim2"]) assert_equal(result, expected) def test_auto_combine_2d_combine_attrs_kwarg(self): ds = create_test_data partway1 = concat([ds(0), ds(3)], dim="dim1") partway2 = concat([ds(1), ds(4)], dim="dim1") partway3 = concat([ds(2), ds(5)], dim="dim1") expected = concat([partway1, partway2, partway3], dim="dim2") expected_dict = {} expected_dict["drop"] = expected.copy(deep=True) expected_dict["drop"].attrs = {} expected_dict["no_conflicts"] = expected.copy(deep=True) expected_dict["no_conflicts"].attrs = { "a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, } expected_dict["override"] = expected.copy(deep=True) expected_dict["override"].attrs = {"a": 1} datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4), ds(5)]] datasets[0][0].attrs = {"a": 1} datasets[0][1].attrs = {"a": 1, "b": 2} datasets[0][2].attrs = {"a": 1, "c": 3} datasets[1][0].attrs = {"a": 1, "d": 4} datasets[1][1].attrs = {"a": 1, "e": 5} datasets[1][2].attrs = {"a": 1, "f": 6} with raises_regex(ValueError, "combine_attrs='identical'"): result = combine_nested(datasets, concat_dim=["dim1", "dim2"], combine_attrs="identical") for combine_attrs in expected_dict: result = combine_nested(datasets, concat_dim=["dim1", "dim2"], combine_attrs=combine_attrs) assert_identical(result, expected_dict[combine_attrs]) def test_combine_nested_missing_data_new_dim(self): # Your data includes "time" and "station" dimensions, and each year's # data has a different set of stations. datasets = [ Dataset({ "a": ("x", [2, 3]), "x": [1, 2] }), Dataset({ "a": ("x", [1, 2]), "x": [0, 1] }), ] expected = Dataset( {"a": (("t", "x"), [[np.nan, 2, 3], [1, 2, np.nan]])}, {"x": [0, 1, 2]}) actual = combine_nested(datasets, concat_dim="t") assert_identical(expected, actual) def test_invalid_hypercube_input(self): ds = create_test_data datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4)]] with raises_regex(ValueError, "sub-lists do not have " "consistent lengths"): combine_nested(datasets, concat_dim=["dim1", "dim2"]) datasets = [[ds(0), ds(1)], [[ds(3), ds(4)]]] with raises_regex(ValueError, "sub-lists do not have " "consistent depths"): combine_nested(datasets, concat_dim=["dim1", "dim2"]) datasets = [[ds(0), ds(1)], [ds(3), ds(4)]] with raises_regex(ValueError, "concat_dims has length"): combine_nested(datasets, concat_dim=["dim1"]) def test_merge_one_dim_concat_another(self): objs = [ [ Dataset({"foo": ("x", [0, 1])}), Dataset({"bar": ("x", [10, 20])}) ], [ Dataset({"foo": ("x", [2, 3])}), Dataset({"bar": ("x", [30, 40])}) ], ] expected = Dataset({ "foo": ("x", [0, 1, 2, 3]), "bar": ("x", [10, 20, 30, 40]) }) actual = combine_nested(objs, concat_dim=["x", None], compat="equals") assert_identical(expected, actual) # Proving it works symmetrically objs = [ [Dataset({"foo": ("x", [0, 1])}), Dataset({"foo": ("x", [2, 3])})], [ Dataset({"bar": ("x", [10, 20])}), Dataset({"bar": ("x", [30, 40])}) ], ] actual = combine_nested(objs, concat_dim=[None, "x"], compat="equals") assert_identical(expected, actual) def test_combine_concat_over_redundant_nesting(self): objs = [[Dataset({"x": [0]}), Dataset({"x": [1]})]] actual = combine_nested(objs, concat_dim=[None, "x"]) expected = Dataset({"x": [0, 1]}) assert_identical(expected, actual) objs = [[Dataset({"x": [0]})], [Dataset({"x": [1]})]] actual = combine_nested(objs, concat_dim=["x", None]) expected = Dataset({"x": [0, 1]}) assert_identical(expected, actual) objs = [[Dataset({"x": [0]})]] actual = combine_nested(objs, concat_dim=[None, None]) expected = Dataset({"x": [0]}) assert_identical(expected, actual) @pytest.mark.parametrize("fill_value", [dtypes.NA, 2, 2.0, { "a": 2, "b": 1 }]) def test_combine_nested_fill_value(self, fill_value): datasets = [ Dataset({ "a": ("x", [2, 3]), "b": ("x", [-2, 1]), "x": [1, 2] }), Dataset({ "a": ("x", [1, 2]), "b": ("x", [3, -1]), "x": [0, 1] }), ] if fill_value == dtypes.NA: # if we supply the default, we expect the missing value for a # float array fill_value_a = fill_value_b = np.nan elif isinstance(fill_value, dict): fill_value_a = fill_value["a"] fill_value_b = fill_value["b"] else: fill_value_a = fill_value_b = fill_value expected = Dataset( { "a": (("t", "x"), [[fill_value_a, 2, 3], [1, 2, fill_value_a]]), "b": (("t", "x"), [[fill_value_b, -2, 1], [3, -1, fill_value_b]]), }, {"x": [0, 1, 2]}, ) actual = combine_nested(datasets, concat_dim="t", fill_value=fill_value) assert_identical(expected, actual)
def test_concat_loads_variables(self): # Test that concat() computes not-in-memory variables at most once # and loads them in the output, while leaving the input unaltered. d1 = build_dask_array('d1') c1 = build_dask_array('c1') d2 = build_dask_array('d2') c2 = build_dask_array('c2') d3 = build_dask_array('d3') c3 = build_dask_array('c3') # Note: c is a non-index coord. # Index coords are loaded by IndexVariable.__init__. ds1 = Dataset(data_vars={'d': ('x', d1)}, coords={'c': ('x', c1)}) ds2 = Dataset(data_vars={'d': ('x', d2)}, coords={'c': ('x', c2)}) ds3 = Dataset(data_vars={'d': ('x', d3)}, coords={'c': ('x', c3)}) assert kernel_call_count == 0 out = xr.concat([ds1, ds2, ds3], dim='n', data_vars='different', coords='different') # each kernel is computed exactly once assert kernel_call_count == 6 # variables are loaded in the output assert isinstance(out['d'].data, np.ndarray) assert isinstance(out['c'].data, np.ndarray) out = xr.concat([ds1, ds2, ds3], dim='n', data_vars='all', coords='all') # no extra kernel calls assert kernel_call_count == 6 assert isinstance(out['d'].data, dask.array.Array) assert isinstance(out['c'].data, dask.array.Array) out = xr.concat([ds1, ds2, ds3], dim='n', data_vars=['d'], coords=['c']) # no extra kernel calls assert kernel_call_count == 6 assert isinstance(out['d'].data, dask.array.Array) assert isinstance(out['c'].data, dask.array.Array) out = xr.concat([ds1, ds2, ds3], dim='n', data_vars=[], coords=[]) # variables are loaded once as we are validing that they're identical assert kernel_call_count == 12 assert isinstance(out['d'].data, np.ndarray) assert isinstance(out['c'].data, np.ndarray) out = xr.concat([ds1, ds2, ds3], dim='n', data_vars='different', coords='different', compat='identical') # compat=identical doesn't do any more kernel calls than compat=equals assert kernel_call_count == 18 assert isinstance(out['d'].data, np.ndarray) assert isinstance(out['c'].data, np.ndarray) # When the test for different turns true halfway through, # stop computing variables as it would not have any benefit ds4 = Dataset(data_vars={'d': ('x', [2.0])}, coords={'c': ('x', [2.0])}) out = xr.concat([ds1, ds2, ds4, ds3], dim='n', data_vars='different', coords='different') # the variables of ds1 and ds2 were computed, but those of ds3 didn't assert kernel_call_count == 22 assert isinstance(out['d'].data, dask.array.Array) assert isinstance(out['c'].data, dask.array.Array) # the data of ds1 and ds2 was loaded into numpy and then # concatenated to the data of ds3. Thus, only ds3 is computed now. out.compute() assert kernel_call_count == 24 # Finally, test that riginals are unaltered assert ds1['d'].data is d1 assert ds1['c'].data is c1 assert ds2['d'].data is d2 assert ds2['c'].data is c2 assert ds3['d'].data is d3 assert ds3['c'].data is c3
def test_combine_nested_but_need_auto_combine(self): objs = [Dataset({'x': [0, 1]}), Dataset({'x': [2], 'wall': [0]})] with raises_regex(ValueError, 'cannot be combined'): combine_nested(objs, concat_dim='x')