def test_infer_from_datasets(self): ds = create_test_data input = [ds(0), ds(1)] expected = {(0, ): ds(0), (1, ): ds(1)} actual, concat_dims = _infer_concat_order_from_positions( input, ['dim1']) assert_combined_tile_ids_equal(expected, actual) input = [ds(0), ds(1)] with pytest.raises(ValueError): _infer_concat_order_from_positions(input, ['dim1', 'extra_dim'])
def test_infer_from_datasets(self): ds = create_test_data input = [ds(0), ds(1)] expected = {(0,): ds(0), (1,): ds(1)} actual, concat_dims = _infer_concat_order_from_positions(input, [ 'dim1']) assert_combined_tile_ids_equal(expected, actual) input = [ds(0), ds(1)] with pytest.raises(ValueError): _infer_concat_order_from_positions(input, ['dim1', 'extra_dim'])
def test_redundant_nesting(self): ds = create_test_data input = [[ds(0)], [ds(1)]] expected = {(0, 0): ds(0), (1, 0): ds(1)} actual = _infer_concat_order_from_positions(input) assert_combined_tile_ids_equal(expected, actual)
def test_single_dataset(self): ds = create_test_data(0) input = [ds] expected = {(0, ): ds} actual = _infer_concat_order_from_positions(input) assert_combined_tile_ids_equal(expected, actual)
def test_1d(self): ds = create_test_data input = [ds(0), ds(1)] expected = {(0, ): ds(0), (1, ): ds(1)} actual = _infer_concat_order_from_positions(input) assert_combined_tile_ids_equal(expected, actual)
def test_uneven_length_input(self): # Auto_combine won't work on ragged input # but this is just to increase test coverage ds = create_test_data input = [[ds(0)], [ds(1), ds(2)]] expected = {(0, 0): ds(0), (1, 0): ds(1), (1, 1): ds(2)} actual = _infer_concat_order_from_positions(input) assert_combined_tile_ids_equal(expected, actual)
def test_2d(self): ds = create_test_data input = [[ds(0), ds(1)], [ds(2), ds(3)], [ds(4), ds(5)]] expected = { (0, 0): ds(0), (0, 1): ds(1), (1, 0): ds(2), (1, 1): ds(3), (2, 0): ds(4), (2, 1): ds(5), } actual = _infer_concat_order_from_positions(input) assert_combined_tile_ids_equal(expected, actual)
def test_ignore_empty_list(self): ds = create_test_data(0) input = [ds, []] expected = {(0, ): ds} actual = _infer_concat_order_from_positions(input) assert_combined_tile_ids_equal(expected, actual)
def my_open_mfdataset(paths, chnks=None, concat_dim='time', compat='no_conflicts', data_vars='all', coords='different', join='outer'): """ Trying to address the limitations of the existing xr.open_mfdataset function. This is my modification using the existing function and tweaking to resolve the issues i've found. (see https://github.com/pydata/xarray/blob/master/xarray/backends/api.py) Current issues with open_mfdataset (1/8/2020): 1. open_mfdataset only uses the attrs from the first nc file 2. open_mfdataset will not run with parallel=True or with the distributed.LocalCluster running 3. open_mfdataset infers time order from position. (I could just sort outside of the function, but i kinda like it this way anyway. Also a re-indexing would probably resolve this.) Only resolved item=1 so far. See https://github.com/pydata/xarray/issues/3684 Returns ------- combined: Xarray Dataset - with attributes, variables, dimensions of combined netCDF files. Returns dask arrays, compute to access local numpy array. """ # ensure file paths are valid pth_chk = np.all([os.path.exists(x) for x in paths]) if not pth_chk: raise ValueError( 'Check paths supplied to function. Some/all files do not exist.') # sort by filename index, e.g. rangeangle_0.nc, rangeangle_1.nc, rangeangle_2.nc, etc. idxs = [ int(os.path.splitext(os.path.split(x)[1])[0].split('_')[1]) for x in paths ] sortorder = sorted(range(len(idxs)), key=lambda k: idxs[k]) # sort_paths are the paths in sorted order by the filename index sort_paths = [paths[p] for p in sortorder] # build out the arugments for the nested combine if isinstance(concat_dim, (str, xr.DataArray)) or concat_dim is None: concat_dim = [concat_dim] combined_ids_paths = _infer_concat_order_from_positions(sort_paths) ids, paths = (list(combined_ids_paths.keys()), list(combined_ids_paths.values())) if chnks is None: chnks = {} datasets = [ xr.open_dataset(p, engine='netcdf4', chunks=chnks, lock=None, autoclose=None) for p in paths ] combined = _nested_combine(datasets, concat_dims=concat_dim, compat=compat, data_vars=data_vars, coords=coords, ids=ids, join=join) combined.attrs = combine_xr_attributes(datasets) return combined