Ejemplo n.º 1
0
    def test_infer_from_datasets(self):
        ds = create_test_data
        input = [ds(0), ds(1)]

        expected = {(0, ): ds(0), (1, ): ds(1)}
        actual, concat_dims = _infer_concat_order_from_positions(
            input, ['dim1'])
        assert_combined_tile_ids_equal(expected, actual)

        input = [ds(0), ds(1)]
        with pytest.raises(ValueError):
            _infer_concat_order_from_positions(input, ['dim1', 'extra_dim'])
Ejemplo n.º 2
0
    def test_infer_from_datasets(self):
        ds = create_test_data
        input = [ds(0), ds(1)]

        expected = {(0,): ds(0), (1,): ds(1)}
        actual, concat_dims = _infer_concat_order_from_positions(input, [
                                                                 'dim1'])
        assert_combined_tile_ids_equal(expected, actual)

        input = [ds(0), ds(1)]
        with pytest.raises(ValueError):
            _infer_concat_order_from_positions(input, ['dim1', 'extra_dim'])
Ejemplo n.º 3
0
    def test_redundant_nesting(self):
        ds = create_test_data
        input = [[ds(0)], [ds(1)]]

        expected = {(0, 0): ds(0), (1, 0): ds(1)}
        actual = _infer_concat_order_from_positions(input)
        assert_combined_tile_ids_equal(expected, actual)
Ejemplo n.º 4
0
    def test_single_dataset(self):
        ds = create_test_data(0)
        input = [ds]

        expected = {(0, ): ds}
        actual = _infer_concat_order_from_positions(input)
        assert_combined_tile_ids_equal(expected, actual)
Ejemplo n.º 5
0
    def test_1d(self):
        ds = create_test_data
        input = [ds(0), ds(1)]

        expected = {(0, ): ds(0), (1, ): ds(1)}
        actual = _infer_concat_order_from_positions(input)
        assert_combined_tile_ids_equal(expected, actual)
Ejemplo n.º 6
0
    def test_uneven_length_input(self):
        # Auto_combine won't work on ragged input
        # but this is just to increase test coverage
        ds = create_test_data
        input = [[ds(0)], [ds(1), ds(2)]]

        expected = {(0, 0): ds(0), (1, 0): ds(1), (1, 1): ds(2)}
        actual = _infer_concat_order_from_positions(input)
        assert_combined_tile_ids_equal(expected, actual)
Ejemplo n.º 7
0
    def test_2d(self):
        ds = create_test_data
        input = [[ds(0), ds(1)], [ds(2), ds(3)], [ds(4), ds(5)]]

        expected = {
            (0, 0): ds(0),
            (0, 1): ds(1),
            (1, 0): ds(2),
            (1, 1): ds(3),
            (2, 0): ds(4),
            (2, 1): ds(5),
        }
        actual = _infer_concat_order_from_positions(input)
        assert_combined_tile_ids_equal(expected, actual)
Ejemplo n.º 8
0
 def test_ignore_empty_list(self):
     ds = create_test_data(0)
     input = [ds, []]
     expected = {(0, ): ds}
     actual = _infer_concat_order_from_positions(input)
     assert_combined_tile_ids_equal(expected, actual)
Ejemplo n.º 9
0
def my_open_mfdataset(paths,
                      chnks=None,
                      concat_dim='time',
                      compat='no_conflicts',
                      data_vars='all',
                      coords='different',
                      join='outer'):
    """
    Trying to address the limitations of the existing xr.open_mfdataset function.  This is my modification using
    the existing function and tweaking to resolve the issues i've found.

    (see https://github.com/pydata/xarray/blob/master/xarray/backends/api.py)

    Current issues with open_mfdataset (1/8/2020):
    1. open_mfdataset only uses the attrs from the first nc file
    2. open_mfdataset will not run with parallel=True or with the distributed.LocalCluster running
    3. open_mfdataset infers time order from position.  (I could just sort outside of the function, but i kinda
         like it this way anyway.  Also a re-indexing would probably resolve this.)

    Only resolved item=1 so far.  See https://github.com/pydata/xarray/issues/3684

    Returns
    -------
    combined: Xarray Dataset - with attributes, variables, dimensions of combined netCDF files.  Returns dask
            arrays, compute to access local numpy array.

    """
    # ensure file paths are valid
    pth_chk = np.all([os.path.exists(x) for x in paths])
    if not pth_chk:
        raise ValueError(
            'Check paths supplied to function.  Some/all files do not exist.')

    # sort by filename index, e.g. rangeangle_0.nc, rangeangle_1.nc, rangeangle_2.nc, etc.
    idxs = [
        int(os.path.splitext(os.path.split(x)[1])[0].split('_')[1])
        for x in paths
    ]
    sortorder = sorted(range(len(idxs)), key=lambda k: idxs[k])

    # sort_paths are the paths in sorted order by the filename index
    sort_paths = [paths[p] for p in sortorder]

    # build out the arugments for the nested combine
    if isinstance(concat_dim, (str, xr.DataArray)) or concat_dim is None:
        concat_dim = [concat_dim]
    combined_ids_paths = _infer_concat_order_from_positions(sort_paths)
    ids, paths = (list(combined_ids_paths.keys()),
                  list(combined_ids_paths.values()))
    if chnks is None:
        chnks = {}

    datasets = [
        xr.open_dataset(p,
                        engine='netcdf4',
                        chunks=chnks,
                        lock=None,
                        autoclose=None) for p in paths
    ]

    combined = _nested_combine(datasets,
                               concat_dims=concat_dim,
                               compat=compat,
                               data_vars=data_vars,
                               coords=coords,
                               ids=ids,
                               join=join)
    combined.attrs = combine_xr_attributes(datasets)
    return combined