Esempi in Python per auto_combine, esempi in Python per xarray.auto_combine

Esempio n. 1

0

Mostra file

File: test_combine.py Progetto: yutiansut/xarray

 def test_auto_combine_with_coords(self):
     objs = [
         Dataset({"foo": ("x", [0])}, coords={"x": ("x", [0])}),
         Dataset({"foo": ("x", [1])}, coords={"x": ("x", [1])}),
     ]
     with pytest.warns(FutureWarning, match="supplied have global"):
         auto_combine(objs)

Esempio n. 2

0

Mostra file

 def test_auto_combine_with_coords(self):
     objs = [
         Dataset({'foo': ('x', [0])}, coords={'x': ('x', [0])}),
         Dataset({'foo': ('x', [1])}, coords={'x': ('x', [1])})
     ]
     with pytest.warns(FutureWarning, match="supplied have global"):
         auto_combine(objs)

Esempio n. 3

0

Mostra file

File: test_combine.py Progetto: pollackscience/xarray

    def test_merge_one_dim_concat_another(self):
        objs = [[
            Dataset({'foo': ('x', [0, 1])}),
            Dataset({'bar': ('x', [10, 20])})
        ], [
            Dataset({'foo': ('x', [2, 3])}),
            Dataset({'bar': ('x', [30, 40])})
        ]]
        expected = Dataset({
            'foo': ('x', [0, 1, 2, 3]),
            'bar': ('x', [10, 20, 30, 40])
        })

        actual = auto_combine(objs, concat_dim=['x', None], compat='equals')
        assert_identical(expected, actual)

        actual = auto_combine(objs)
        assert_identical(expected, actual)

        # Proving it works symmetrically
        objs = [[
            Dataset({'foo': ('x', [0, 1])}),
            Dataset({'foo': ('x', [2, 3])})
        ],
                [
                    Dataset({'bar': ('x', [10, 20])}),
                    Dataset({'bar': ('x', [30, 40])})
                ]]
        actual = auto_combine(objs, concat_dim=[None, 'x'], compat='equals')
        assert_identical(expected, actual)

Esempio n. 4

0

Mostra file

    def test_auto_combine(self):
        objs = [Dataset({'x': [0]}), Dataset({'x': [1]})]
        actual = auto_combine(objs)
        expected = Dataset({'x': [0, 1]})
        self.assertDatasetIdentical(expected, actual)

        actual = auto_combine([actual])
        self.assertDatasetIdentical(expected, actual)

        objs = [Dataset({'x': [0, 1]}), Dataset({'x': [2]})]
        actual = auto_combine(objs)
        expected = Dataset({'x': [0, 1, 2]})
        self.assertDatasetIdentical(expected, actual)

        # ensure auto_combine handles non-sorted variables
        objs = [Dataset(OrderedDict([('x', ('a', [0])), ('y', ('a', [0]))])),
                Dataset(OrderedDict([('y', ('a', [1])), ('x', ('a', [1]))]))]
        actual = auto_combine(objs)
        expected = Dataset({'x': ('a', [0, 1]), 'y': ('a', [0, 1])})
        self.assertDatasetIdentical(expected, actual)

        objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'y': [1], 'x': [1]})]
        with self.assertRaisesRegexp(ValueError, 'too many .* dimensions'):
            auto_combine(objs)

        objs = [Dataset({'x': 0}), Dataset({'x': 1})]
        with self.assertRaisesRegexp(ValueError, 'cannot infer dimension'):
            auto_combine(objs)

        objs = [Dataset({'x': [0], 'y': [0]}), Dataset({'x': [0]})]
        with self.assertRaises(KeyError):
            auto_combine(objs)

Esempio n. 5

0

Mostra file

File: test_combine.py Progetto: yutiansut/xarray

    def test_auto_combine(self):
        objs = [Dataset({"x": [0]}), Dataset({"x": [1]})]
        actual = auto_combine(objs)
        expected = Dataset({"x": [0, 1]})
        assert_identical(expected, actual)

        actual = auto_combine([actual])
        assert_identical(expected, actual)

        objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})]
        actual = auto_combine(objs)
        expected = Dataset({"x": [0, 1, 2]})
        assert_identical(expected, actual)

        # ensure auto_combine handles non-sorted variables
        objs = [
            Dataset(OrderedDict([("x", ("a", [0])), ("y", ("a", [0]))])),
            Dataset(OrderedDict([("y", ("a", [1])), ("x", ("a", [1]))])),
        ]
        actual = auto_combine(objs)
        expected = Dataset({"x": ("a", [0, 1]), "y": ("a", [0, 1])})
        assert_identical(expected, actual)

        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"y": [1], "x": [1]})]
        with raises_regex(ValueError, "too many .* dimensions"):
            auto_combine(objs)

        objs = [Dataset({"x": 0}), Dataset({"x": 1})]
        with raises_regex(ValueError, "cannot infer dimension"):
            auto_combine(objs)

        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})]
        with pytest.raises(KeyError):
            auto_combine(objs)

Esempio n. 6

0

Mostra file

File: test_combine.py Progetto: edoddridge/xarray

    def test_auto_combine_no_concat(self):
        objs = [Dataset({'x': 0}), Dataset({'y': 1})]
        actual = auto_combine(objs)
        expected = Dataset({'x': 0, 'y': 1})
        assert_identical(expected, actual)

        objs = [Dataset({'x': 0, 'y': 1}), Dataset({'y': np.nan, 'z': 2})]
        actual = auto_combine(objs)
        expected = Dataset({'x': 0, 'y': 1, 'z': 2})
        assert_identical(expected, actual)

        data = Dataset({'x': 0})
        actual = auto_combine([data, data, data], concat_dim=None)
        assert_identical(data, actual)

        # Single object, with a concat_dim explicitly provided
        # Test the issue reported in GH #1988
        objs = [Dataset({'x': 0, 'y': 1})]
        dim = DataArray([100], name='baz', dims='baz')
        actual = auto_combine(objs, concat_dim=dim)
        expected = Dataset({'x': ('baz', [0]), 'y': ('baz', [1])},
                           {'baz': [100]})
        assert_identical(expected, actual)

        # Just making sure that auto_combine is doing what is
        # expected for non-scalar values, too.
        objs = [Dataset({'x': ('z', [0, 1]), 'y': ('z', [1, 2])})]
        dim = DataArray([100], name='baz', dims='baz')
        actual = auto_combine(objs, concat_dim=dim)
        expected = Dataset({'x': (('baz', 'z'), [[0, 1]]),
                            'y': (('baz', 'z'), [[1, 2]])},
                           {'baz': [100]})
        assert_identical(expected, actual)

Esempio n. 7

0

Mostra file

 def test_auto_combine_still_fails(self):
     # concat can't handle new variables (yet):
     # https://github.com/pydata/xarray/issues/508
     datasets = [Dataset({'x': 0}, {'y': 0}),
                 Dataset({'x': 1}, {'y': 1, 'z': 1})]
     with self.assertRaises(ValueError):
         auto_combine(datasets, 'y')

Esempio n. 8

0

Mostra file

File: test_combine.py Progetto: spencerahill/xarray

    def test_auto_combine(self):
        objs = [Dataset({"x": [0]}), Dataset({"x": [1]})]
        actual = auto_combine(objs)
        expected = Dataset({"x": [0, 1]})
        self.assertDatasetIdentical(expected, actual)

        actual = auto_combine([actual])
        self.assertDatasetIdentical(expected, actual)

        objs = [Dataset({"x": [0, 1]}), Dataset({"x": [2]})]
        actual = auto_combine(objs)
        expected = Dataset({"x": [0, 1, 2]})
        self.assertDatasetIdentical(expected, actual)

        # ensure auto_combine handles non-sorted dimensions
        objs = [
            Dataset(OrderedDict([("x", ("a", [0])), ("y", ("a", [0]))])),
            Dataset(OrderedDict([("y", ("a", [1])), ("x", ("a", [1]))])),
        ]
        actual = auto_combine(objs)
        expected = Dataset({"x": ("a", [0, 1]), "y": ("a", [0, 1]), "a": [0, 0]})
        self.assertDatasetIdentical(expected, actual)

        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"y": [1], "x": [1]})]
        with self.assertRaisesRegexp(ValueError, "too many .* dimensions"):
            auto_combine(objs)

        objs = [Dataset({"x": 0}), Dataset({"x": 1})]
        with self.assertRaisesRegexp(ValueError, "cannot infer dimension"):
            auto_combine(objs)

        objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})]
        with self.assertRaises(KeyError):
            auto_combine(objs)

Esempio n. 9

0

Mostra file

File: test_combine.py Progetto: shikharsg/xarray

    def test_auto_combine_no_concat(self):
        objs = [Dataset({"x": 0}), Dataset({"y": 1})]
        actual = auto_combine(objs)
        expected = Dataset({"x": 0, "y": 1})
        assert_identical(expected, actual)

        objs = [Dataset({"x": 0, "y": 1}), Dataset({"y": np.nan, "z": 2})]
        actual = auto_combine(objs)
        expected = Dataset({"x": 0, "y": 1, "z": 2})
        assert_identical(expected, actual)

        data = Dataset({"x": 0})
        actual = auto_combine([data, data, data], concat_dim=None)
        assert_identical(data, actual)

        # Single object, with a concat_dim explicitly provided
        # Test the issue reported in GH #1988
        objs = [Dataset({"x": 0, "y": 1})]
        dim = DataArray([100], name="baz", dims="baz")
        actual = auto_combine(objs, concat_dim=dim)
        expected = Dataset({"x": ("baz", [0]), "y": ("baz", [1])}, {"baz": [100]})
        assert_identical(expected, actual)

        # Just making sure that auto_combine is doing what is
        # expected for non-scalar values, too.
        objs = [Dataset({"x": ("z", [0, 1]), "y": ("z", [1, 2])})]
        dim = DataArray([100], name="baz", dims="baz")
        actual = auto_combine(objs, concat_dim=dim)
        expected = Dataset(
            {"x": (("baz", "z"), [[0, 1]]), "y": (("baz", "z"), [[1, 2]])},
            {"baz": [100]},
        )
        assert_identical(expected, actual)

Esempio n. 10

0

Mostra file

File: test_combine.py Progetto: yutiansut/xarray

 def test_auto_combine_with_merge_and_concat(self):
     objs = [
         Dataset({"x": [0]}),
         Dataset({"x": [1]}),
         Dataset({"z": ((), 99)})
     ]
     with pytest.warns(FutureWarning, match="require both concatenation"):
         auto_combine(objs)

Esempio n. 11

0

Mostra file

File: test_combine.py Progetto: pollackscience/xarray

    def test_single_dataset(self):
        objs = [Dataset({'x': [0]}), Dataset({'x': [1]})]
        actual = auto_combine(objs)
        expected = Dataset({'x': [0, 1]})
        assert_identical(expected, actual)

        actual = auto_combine(actual)
        assert_identical(expected, actual)

Esempio n. 12

0

Mostra file

File: test_combine.py Progetto: benbovy/xarray

    def test_single_dataset(self):
        objs = [Dataset({'x': [0]}), Dataset({'x': [1]})]
        actual = auto_combine(objs)
        expected = Dataset({'x': [0, 1]})
        assert_identical(expected, actual)

        actual = auto_combine(actual)
        assert_identical(expected, actual)

Esempio n. 13

0

Mostra file

File: base.py Progetto: zejiang-unsw/ml_drought

    def calculate_static_means_ds(
        self,
        static_ds: xr.Dataset,
        test_year: Union[int, List[int]],
        global_means_bool: bool = False,
        pixel_means_bool: bool = True,
    ) -> xr.Dataset:
        dynamic_ds = self._make_dataset(static=False, overwrite_dims=False)

        # ignore test years in calculation of means
        min_year = dynamic_ds.time.min()
        test_year = [test_year
                     ] if not isinstance(test_year, Iterable) else test_year
        dynamic_ds = dynamic_ds.sel(
            time=slice(min_year, str(min(test_year) - 1))  # type: ignore
        )
        assert all(
            int(yr) not in np.unique(dynamic_ds["time.year"].values)
            for yr in test_year)

        ones = xr.ones_like(static_ds)
        ones_da = ones[[v for v in ones.data_vars][0]]

        if global_means_bool:
            # 1. create global means ds
            global_means = dynamic_ds.mean(dim=["lat", "lon", "time"])
            global_means = ones_da * global_means
            # rename variables
            rename_map = {
                v: f"{v}_global_mean"
                for v in global_means.data_vars
            }
            global_means = global_means.rename(rename_map)

        if pixel_means_bool:
            # 2. create pixel means ds
            pixel_means = dynamic_ds.mean(dim=["time"])
            pixel_means = ones_da * pixel_means
            # rename variables
            rename_map = {v: f"{v}_pixel_mean" for v in pixel_means.data_vars}
            pixel_means = pixel_means.rename(rename_map)

        # TODO: this can be cleaned
        if global_means_bool & pixel_means_bool:
            static_mean_ds = xr.auto_combine([global_means, pixel_means])
        elif global_means_bool & ~pixel_means_bool:
            static_mean_ds = xr.auto_combine([global_means])
        elif ~global_means_bool & pixel_means_bool:
            static_mean_ds = xr.auto_combine([pixel_means])
        else:
            # return an empty dataset
            static_mean_ds = xr.Dataset()

        return static_mean_ds

Esempio n. 14

0

Mostra file

File: test_combine.py Progetto: yutiansut/xarray

 def test_auto_combine_still_fails(self):
     # concat can't handle new variables (yet):
     # https://github.com/pydata/xarray/issues/508
     datasets = [
         Dataset({"x": 0}, {"y": 0}),
         Dataset({"x": 1}, {
             "y": 1,
             "z": 1
         })
     ]
     with pytest.raises(ValueError):
         auto_combine(datasets, "y")

Esempio n. 15

0

Mostra file

    def test_auto_combine_no_concat(self):
        objs = [Dataset({'x': 0}), Dataset({'y': 1})]
        actual = auto_combine(objs)
        expected = Dataset({'x': 0, 'y': 1})
        self.assertDatasetIdentical(expected, actual)

        objs = [Dataset({'x': 0, 'y': 1}), Dataset({'y': np.nan, 'z': 2})]
        actual = auto_combine(objs)
        expected = Dataset({'x': 0, 'y': 1, 'z': 2})
        self.assertDatasetIdentical(expected, actual)

        data = Dataset({'x': 0})
        actual = auto_combine([data, data, data], concat_dim=None)
        self.assertDatasetIdentical(data, actual)

Esempio n. 16

0

Mostra file

def convert_clusters_to_ds(
    ks: List[int],
    static_clusters: Dict[int, np.array],
    pixels: np.ndarray,
    latitudes: np.ndarray,
    longitudes: np.ndarray,
    time: Union[pd.Timestamp, int] = 1,
) -> xr.Dataset:
    """calculate a spatial xr.Dataset object with the lat, lon, time
    coordinates restored (for plotting KMeans results on a map).
    """
    out = []
    for k in ks:
        cluster = np.array([v for v in static_clusters[k].values()])
        coords = {"pixel": pixels}
        dims = ["pixel"]
        cluster_ds = xr.Dataset({
            f"cluster_{k}": (dims, cluster),
            "lat": (dims, latitudes),
            "lon": (dims, longitudes),
            "time": (dims, [time for _ in range(len(latitudes))]),
        })
        out.append(cluster_ds)

    static_cluster_ds = xr.auto_combine(out)
    static_cluster_ds = (static_cluster_ds.to_dataframe().set_index(
        ["time", "lat", "lon"]).to_xarray())

    return static_cluster_ds

Esempio n. 17

0

Mostra file

File: test_combine.py Progetto: benbovy/xarray

 def test_infer_order_from_coords(self):
     # Should pass once inferring order from coords is implemented
     data = create_test_data()
     objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))]
     actual = auto_combine(objs)  # but with infer_order_from_coords=True
     expected = data
     assert_identical(expected, actual)

Esempio n. 18

0

Mostra file

def select(data, names, **kwargs):
    """Select only *names* from *data*

    Returns an xarray.Dataset containing the variables *names*. *data* is a
    dict of {level: xr.Dataset}, such as returned by load_ceic(). Optional
    *kwargs* are used to further subset the data.

    Data at the national level is assigned 'gbcode' 0.
    """
    # TODO merge metadata in Dataset- and variable-level attrs
    #      - can use logic like in _make_attrs()

    # Process arguments
    if isinstance(names, str):
        names = [names]

    to_combine = []

    for level in sorted(data.keys()):
        # Select only the variable of interest
        level_vars = list(filter(lambda v: v in data[level].data_vars, names))
        ds = data[level][level_vars]

        # Add the 'gbcode' dimension to data at the national level
        #   https://github.com/pydata/xarray/issues/170
        if level == 0:
            ds = xr.concat([ds], dim=pd.Index([0], name='gbcode'))

        to_combine.append(ds)

    return xr.auto_combine(to_combine, concat_dim='gbcode').sel(**kwargs)

Esempio n. 19

0

Mostra file

    def create_variables_for_n_timesteps_predictions(
            self,
            ds: xr.Dataset,
            tstep_coord_name: str = "months_ahead") -> xr.Dataset:
        """Drop the forecast_horizon & initialisation_date variables"""
        assert all(
            np.isin(
                ["initialisation_date", "forecast_horizon", tstep_coord_name],
                [c for c in ds.coords],
            )
        ), ("Expecting to have "
            f"initialisation_date forecast_horizon {tstep_coord_name} in ds.coords"
            f"currently: {[c for c in ds.coords]}")

        timesteps = np.unique(ds[tstep_coord_name])
        variables = [v for v in ds.data_vars]

        all_timesteps = []
        for step in timesteps:
            d = self.get_n_timestep_ahead_data(
                ds, step, tstep_coord_name=tstep_coord_name)
            d = d.drop(
                ["initialisation_date", "forecast_horizon", tstep_coord_name])
            # drop the old variables too (so not duplicated)
            d = d.drop(variables)
            all_timesteps.append(d)

        return xr.auto_combine(all_timesteps)

Esempio n. 20

0

Mostra file

def convert_clusters_to_ds(
    ks: List[int],
    static_clusters: Dict[int, np.array],
    pixels: np.ndarray,
    latitudes: np.ndarray,
    longitudes: np.ndarray,
    time: Union[pd.Timestamp, int] = 1,
) -> xr.Dataset:
    """Create an xr.Dataset object from the output of the static
    embedding clustering. Allows for easy plotting, subsetting
    and all the other goodness of xarray objects.
    """
    out = []
    for k in ks:
        cluster = np.array([v for v in static_clusters[k].values()])
        coords = {"pixel": pixels}
        dims = ["pixel"]
        cluster_ds = xr.Dataset({
            f"cluster_{k}": (dims, cluster),
            "lat": (dims, latitudes),
            "lon": (dims, longitudes),
            "time": (dims, [time for _ in range(len(latitudes))]),
        })
        out.append(cluster_ds)

    static_cluster_ds = xr.auto_combine(out)
    static_cluster_ds = (static_cluster_ds.to_dataframe().set_index(
        ["time", "lat", "lon"]).to_xarray())

    return static_cluster_ds

Esempio n. 21

0

Mostra file

File: datadl_pack.py Progetto: YinZhaokai/BMA

 def wind_composite(self, uv_files):
     ini_time = self.time['ini'].format('YYYYMMDDHH')
     shift_time = str(self.time['shift']).zfill(2)
     composite_name = 'ws_{}_{}.nc'.format(ini_time, shift_time)
     composite_file = self.data_path + self.base_path + composite_name
     # os.system('rm {}'.format(composite_file))
     dataset = []
     if not os.path.exists(composite_file):
         for uv_file in uv_files:
             try:
                 uv = xr.open_dataset(uv_file,
                                      engine='cfgrib',
                                      backend_kwargs={
                                          'filter_by_keys': {
                                              'typeOfLevel':
                                              'heightAboveGround',
                                              'level': 10
                                          }
                                      })
             except Exception as e:
                 print(
                     'GEFS fcst Wind Composite Failed: no uv files {}: {}'.
                     format(uv_file, e))
                 return
             else:
                 print('GEFS fcst wind composite: {}'.format(uv_file))
                 ws = xr.Dataset({'ws': (uv["u10"]**2 + uv["v10"]**2)**0.5})
                 ws = ws.expand_dims(['valid_time', 'number']).drop(
                     ['time', 'step']).rename({'valid_time': 'time'})
                 dataset.append(ws)
         ws_ens = xr.auto_combine(dataset)
         ws_ens.to_netcdf(composite_file)
     idx_files = glob.glob(self.data_path + self.base_path + '*.idx')
     for file in idx_files:
         os.remove(file)

Esempio n. 22

0

Mostra file

File: base.py Progetto: zejiang-unsw/ml_drought

    def _process_static(
        self,
        test_year: Union[int, List[int]],
        global_means: bool = True,
        pixel_means: bool = True,
    ) -> None:
        """
        Note:
        requires `test_year` so that can ignore test years in the calculation
        of spatial means and global means of dynamic variables.
        """
        # this function assumes the static data has only two dimensions,
        # lat and lon

        output_file = self.static_output_folder / "data.nc"
        if output_file.exists():
            warnings.warn("A static data file already exists!")
            return None

        # here, we overwrite the dims because topography (a static variable)
        # uses CDO for regridding, which yields very slightly different
        # coordinates (it seems from rounding)
        try:
            static_ds = self._make_dataset(static=True, overwrite_dims=True)
        except ValueError:
            print("No static data features included! Creating static ds")
            static_ds = None
        # create dynamic_variable means for input to static data
        static_mean_ds = self.calculate_static_means_ds(
            static_ds=static_ds,
            test_year=test_year,
            global_means_bool=global_means,
            pixel_means_bool=pixel_means,
        )

        if static_ds is None:
            static_ds = static_mean_ds
        else:
            static_ds = xr.auto_combine([static_ds, static_mean_ds])

        normalization_values: DefaultDict[str, Dict[str,
                                                    float]] = defaultdict(dict)

        for var in static_ds.data_vars:
            if var.endswith("one_hot"):
                mean = 0.0
                std = 1.0
            else:
                mean = float(static_ds[var].mean(dim=["lat", "lon"],
                                                 skipna=True).values)
                std = float(static_ds[var].std(dim=["lat", "lon"],
                                               skipna=True).values)

            normalization_values[var]["mean"] = mean
            normalization_values[var]["std"] = std

        static_ds.to_netcdf(self.static_output_folder / "data.nc")
        savepath = self.static_output_folder / "normalizing_dict.pkl"
        with savepath.open("wb") as f:
            pickle.dump(normalization_values, f)

Esempio n. 23

0

Mostra file

File: test_combine.py Progetto: pollackscience/xarray

 def test_infer_order_from_coords(self):
     # Should pass once inferring order from coords is implemented
     data = create_test_data()
     objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))]
     actual = auto_combine(objs)  # but with infer_order_from_coords=True
     expected = data
     assert_identical(expected, actual)

Esempio n. 24

0

Mostra file

File: test_combine.py Progetto: yutiansut/xarray

 def test_auto_combine_order_by_appearance_not_coords(self):
     objs = [
         Dataset({"foo": ("x", [0])}, coords={"x": ("x", [1])}),
         Dataset({"foo": ("x", [1])}, coords={"x": ("x", [0])}),
     ]
     actual = auto_combine(objs)
     expected = Dataset({"foo": ("x", [0, 1])}, coords={"x": ("x", [1, 0])})
     assert_identical(expected, actual)

Esempio n. 25

0

Mostra file

File: cloudnet.py Progetto: zzwei1/radcomp

def ds_date_range(t_start, t_end, datadir=DATA_DIR, product='gdas1', **kws):
    """Load model data from a date range as Dataset."""
    filename_fmt = FILENAME_FMT.format(product=product)
    filepath_fmt = path.join(datadir, product, filename_fmt)
    filepaths = strftime_date_range(t_start, t_end, filepath_fmt)
    ds = xr.auto_combine([xr.open_dataset(fp) for fp in filepaths], **kws)
    _, i = np.unique(ds.time, return_index=True)
    return ds.isel(time=i)

Esempio n. 26

0

Mostra file

File: test_combine.py Progetto: jsignell/xarray

    def test_merge_one_dim_concat_another(self):
        objs = [[Dataset({'foo': ('x', [0, 1])}), Dataset({'bar': ('x', [10, 20])})],
                [Dataset({'foo': ('x', [2, 3])}), Dataset({'bar': ('x', [30, 40])})]]
        expected = Dataset({'foo': ('x', [0, 1, 2, 3]),
                            'bar': ('x', [10, 20, 30, 40])})

        actual = auto_combine(objs, concat_dim=['x', None])
        assert_identical(expected, actual)

        actual = auto_combine(objs)
        assert_identical(expected, actual)

        # Proving it works symmetrically
        objs = [[Dataset({'foo': ('x', [0, 1])}), Dataset({'foo': ('x', [2, 3])})],
                [Dataset({'bar': ('x', [10, 20])}), Dataset({'bar': ('x', [30, 40])})]]
        actual = auto_combine(objs, concat_dim=[None, 'x'])
        assert_identical(expected, actual)

Esempio n. 27

0

Mostra file

File: test_combine.py Progetto: serazing/xarray

    def test_auto_combine_previously_failed(self):
        # In the above scenario, one file is missing, containing the data for
        # one year's data for one variable.
        datasets = [Dataset({'a': ('x', [0]), 'x': [0]}),
                    Dataset({'b': ('x', [0]), 'x': [0]}),
                    Dataset({'a': ('x', [1]), 'x': [1]})]
        expected = Dataset({'a': ('x', [0, 1]), 'b': ('x', [0, np.nan])})
        actual = auto_combine(datasets)
        self.assertDatasetIdentical(expected, actual)

        # Your data includes "time" and "station" dimensions, and each year's
        # data has a different set of stations.
        datasets = [Dataset({'a': ('x', [2, 3]), 'x': [1, 2]}),
                    Dataset({'a': ('x', [1, 2]), 'x': [0, 1]})]
        expected = Dataset({'a': (('t', 'x'),
                                  [[np.nan, 2, 3], [1, 2, np.nan]])})
        actual = auto_combine(datasets, concat_dim='t')
        self.assertDatasetIdentical(expected, actual)

Esempio n. 28

0

Mostra file

File: swan.py Progetto: benfreeston/wavespectra

def read_hotswan(fileglob, dirorder=True):
    """Read multiple swan hotfiles into single gridded Dataset.

    Args:
        - fileglob (str, list): glob pattern specifying hotfiles to read and
          merge.
        - dirorder (bool): if True ensures directions are sorted.

    Returns:
        - dset (SpecDataset): spectra dataset object with different grid parts
          merged.

    Note:
        - SWAN hotfiles from mpi runs are split by the number of cores over the
          largest dim of (lat, lon) with overlapping rows or columns that are
          computed in only one of the split hotfiles. Here overlappings are
          merged so that those with higher values are kept which assumes
          non-computed overlapping rows or columns are filled with zeros.

    """
    hotfiles = sorted(fileglob) if isinstance(fileglob, list) else sorted(
        glob.glob(fileglob))
    assert hotfiles, 'No SWAN file identified with fileglob %s' % (fileglob)

    dsets = [read_swan(hotfiles[0])]
    for hotfile in hotfiles[1:]:
        dset = read_swan(hotfile)
        # Ensure we keep non-zeros in overlapping rows or columns
        overlap = {
            attrs.LONNAME:
            set(dsets[-1].lon.values).intersection(dset.lon.values),
            attrs.LATNAME:
            set(dsets[-1].lat.values).intersection(dset.lat.values)
        }
        concat_dim = min(overlap, key=lambda x: len(overlap[x]))
        for concat_val in overlap[concat_dim]:
            slc = {concat_dim: [concat_val]}
            if dsets[-1].efth.loc[slc].sum() > dset.efth.loc[slc].sum():
                dset.efth.loc[slc] = dsets[-1].efth.loc[slc]
            else:
                dsets[-1].efth.loc[slc] = dset.efth.loc[slc]
        dsets.append(dset)
    dset = xr.auto_combine(dsets)
    set_spec_attributes(dset)
    if attrs.DIRNAME in dset and len(dset.dir) > 1:
        dset[attrs.SPECNAME].attrs.update({
            '_units': 'm^{2}.s.degree^{-1}',
            '_variable_name': 'VaDens'
        })
    else:
        dset[attrs.SPECNAME].attrs.update({
            'units': 'm^{2}.s',
            '_units': 'm^{2}.s',
            '_variable_name': 'VaDens'
        })

    return dset

Esempio n. 29

0

Mostra file

 def merge_time(self, date_list):
     merge_name = 'ws_{}_{}-{}_{}.nc'.format(
         date_list[0]['ini'].format('YYYYMMDD'),
         str(date_list[0]['shift']).zfill(2),
         date_list[-1]['ini'].format('YYYYMMDD'),
         str(date_list[-1]['shift']).zfill(2))
     merge_file = self.data_path + merge_name
     for time in date_list[:]:
         ini_time = time['ini'].format('YYYYMMDDHH')
         shift_time = str(time['shift']).zfill(2)
         single_file = self.data_path + ini_time + '/' + 'ws_{}_{}.nc'.format(
             ini_time, shift_time)
         if os.path.exists(single_file):
             ws_exist = xr.open_dataset(single_file)
             break
         else:
             ws_exist = None
             continue
     dataset = []
     for time in date_list[:]:
         ini_time = time['ini'].format('YYYYMMDDHH')
         shift_time = str(time['shift']).zfill(2)
         single_file = self.data_path + ini_time + '/' + 'ws_{}_{}.nc'.format(
             ini_time, shift_time)
         try:
             ws = xr.open_dataset(single_file)
         except IOError as e:
             if self.label == 'enforced':
                 try:
                     ws = ws_exist.isel(time=0, drop=True)
                 except Exception as e:
                     Logger(_log, level='debug').logger.warning(
                         'merge file failed {} -> {}'.format(merge_file, e))
                     return None, None
                 else:
                     ws_time = pd.to_datetime(time['ini'].shift(
                         hours=time['shift']).format('YYYYMMDDHH'),
                                              format='%Y%m%d%H')
                     ws.coords['time'] = ws_time
                     ws = ws.expand_dims('time', 0)
                     ws['ws'].values[:, :, :, :] = 9999.
             else:
                 Logger(_log,
                        level='debug').logger.info('{}: {}'.format(time, e))
                 merge_file = None
                 ens_num = 0
                 return merge_file, ens_num
         dataset.append(ws)
     # Logger(_log, level='debug').logger.info('merge time: {}'.format(merge_file))
     ws_all = xr.auto_combine(dataset)
     for key in ws_all.dims.keys():
         if key not in ['latitude', 'longitude', 'time', 'number']:
             ws_all = ws_all.squeeze(key)
     ens_num = ws_all['number'].shape[0]
     ws_all.to_netcdf(merge_file)
     return merge_file, ens_num

Esempio n. 30

0

Mostra file

def main(torch_file, input_path, forcing_path, output):
    model = ForcedStepper.load_from_saved(torch.load(torch_file))
    inputs = xr.open_dataset(input_path).pipe(sel)
    forcings = xr.open_dataset(forcing_path).pipe(sel)
    test_error = get_test_error(model, inputs, forcings)
    src_error = get_src_error(model, inputs, forcings)

    return xr.auto_combine((test_error, src_error))\
             .assign(p=inputs.p, w=inputs.w)\
             .to_netcdf(output)

Esempio n. 31

0

Mostra file

 def test_internal_ordering(self):
     # This gives a MergeError if _auto_combine_1d is not sorting by
     # data_vars correctly, see GH #2662
     objs = [Dataset({'foo': ('x', [0, 1])}),
             Dataset({'bar': ('x', [10, 20])}),
             Dataset({'foo': ('x', [2, 3])}),
             Dataset({'bar': ('x', [30, 40])})]
     actual = auto_combine(objs, concat_dim='x', compat='equals')
     expected = Dataset({'foo': ('x', [0, 1, 2, 3]),
                         'bar': ('x', [10, 20, 30, 40])})
     assert_identical(expected, actual)

Esempio n. 32

0

Mostra file

File: openifs_pp_main.py Progetto: aopp-pred/openifs-scmtiles

def pp_cell(cell, timestamp, coordinate_templates, drop_list, config):
    """
    Post-process an individual cell.

    **Aarguments:**

    * cell
        A `~scmtiles.grid_manager.Cell` instance identifying the cell.

    * timestamp
        A string timestamp used as part of the filename for the cell output
        file.

    * coordiate_templates
        A dictionary mapping coordinate names to xarray coordinate objects, as
        returned from `load_coorindate_templates`. This is used to lookup the
        latitude and longitude of the cell from its indices.

    * config
        A `~scmtiles.config.SCMTilesConfig` instance describing the run being
        post-processed.

    **Returns:**

    * (cell_ds, filepath)
        A 2-tuple containing the cell data in an `xarray.Dataset` and the full
        path to the file the cell data were loaded from.

    """
    cell_id = 'y{:04d}x{:04d}'.format(cell.y_global, cell.x_global)
    dirname = '{}.{}'.format(timestamp, cell_id)
    dirpath = os.path.join(config.output_directory, dirname)
    filepaths = [os.path.join(dirpath, filename) for filename in SCM_OUT_FILES]
    # Load the cell dataset from file into memory, then close the input
    # file to free the file handle.
    try:
        # Work-around for problem using open_mfdataset inside a
        # multiprocessing pool where the load just waits indefinitely.
        ds_list = [xr.open_dataset(fp, drop_variables=drop_list)
                   for fp in filepaths]
        cell_ds = xr.auto_combine(ds_list)
        cell_ds.load()
        cell_ds.close()
        for ds in ds_list:
            ds.close()
    except (OSError, RuntimeError):
        msg = 'The input files "{!s}" cannot be read, do they exist?'
        raise Error(msg.format(filepaths))
    # Add scalar latitude and longitude coordinates and return the
    # modified cell dataset:
    x_value = coordinate_templates[config.xname][cell.x_global]
    y_value = coordinate_templates[config.yname][cell.y_global]
    cell_ds.coords.update({config.yname: y_value, config.xname: x_value})
    return cell_ds, dirpath

Esempio n. 33

0

Mostra file

File: test_combine.py Progetto: CCI-Tools/xarray

    def test_auto_combine_previously_failed(self):
        # In the above scenario, one file is missing, containing the data for
        # one year's data for one variable.
        datasets = [Dataset({'a': ('x', [0]), 'x': [0]}),
                    Dataset({'b': ('x', [0]), 'x': [0]}),
                    Dataset({'a': ('x', [1]), 'x': [1]})]
        expected = Dataset({'a': ('x', [0, 1]), 'b': ('x', [0, np.nan])},
                           {'x': [0, 1]})
        actual = auto_combine(datasets)
        self.assertDatasetIdentical(expected, actual)

        # Your data includes "time" and "station" dimensions, and each year's
        # data has a different set of stations.
        datasets = [Dataset({'a': ('x', [2, 3]), 'x': [1, 2]}),
                    Dataset({'a': ('x', [1, 2]), 'x': [0, 1]})]
        expected = Dataset({'a': (('t', 'x'),
                                  [[np.nan, 2, 3], [1, 2, np.nan]])},
                           {'x': [0, 1, 2]})
        actual = auto_combine(datasets, concat_dim='t')
        self.assertDatasetIdentical(expected, actual)

Esempio n. 34

0

Mostra file

 def test_auto_combine_with_new_variables(self):
     datasets = [
         Dataset({"x": 0}, {"y": 0}),
         Dataset({"x": 1}, {
             "y": 1,
             "z": 1
         })
     ]
     actual = auto_combine(datasets, "y")
     expected = Dataset({"x": ("y", [0, 1])}, {"y": [0, 1], "z": 1})
     assert_identical(expected, actual)

Esempio n. 35

0

Mostra file

File: test_combine.py Progetto: crusaderky/xarray

 def test_internal_ordering(self):
     # This gives a MergeError if _auto_combine_1d is not sorting by
     # data_vars correctly, see GH #2662
     objs = [Dataset({'foo': ('x', [0, 1])}),
             Dataset({'bar': ('x', [10, 20])}),
             Dataset({'foo': ('x', [2, 3])}),
             Dataset({'bar': ('x', [30, 40])})]
     actual = auto_combine(objs, concat_dim='x', compat='equals')
     expected = Dataset({'foo': ('x', [0, 1, 2, 3]),
                         'bar': ('x', [10, 20, 30, 40])})
     assert_identical(expected, actual)

Esempio n. 36

0

Mostra file

File: test_combine.py Progetto: benbovy/xarray

    def test_combine_concat_over_redundant_nesting(self):
        objs = [[Dataset({'x': [0]}), Dataset({'x': [1]})]]
        actual = auto_combine(objs, concat_dim=[None, 'x'])
        expected = Dataset({'x': [0, 1]})
        assert_identical(expected, actual)

        objs = [[Dataset({'x': [0]})], [Dataset({'x': [1]})]]
        actual = auto_combine(objs, concat_dim=['x', None])
        expected = Dataset({'x': [0, 1]})
        assert_identical(expected, actual)

        objs = [[Dataset({'x': [0]})]]
        actual = auto_combine(objs, concat_dim=[None, None])
        expected = Dataset({'x': [0]})
        assert_identical(expected, actual)

        objs = [[Dataset({'x': [0]})]]
        actual = auto_combine(objs, concat_dim=None)
        expected = Dataset({'x': [0]})
        assert_identical(expected, actual)

Esempio n. 37

0

Mostra file

File: test_combine.py Progetto: pollackscience/xarray

    def test_combine_concat_over_redundant_nesting(self):
        objs = [[Dataset({'x': [0]}), Dataset({'x': [1]})]]
        actual = auto_combine(objs, concat_dim=[None, 'x'])
        expected = Dataset({'x': [0, 1]})
        assert_identical(expected, actual)

        objs = [[Dataset({'x': [0]})], [Dataset({'x': [1]})]]
        actual = auto_combine(objs, concat_dim=['x', None])
        expected = Dataset({'x': [0, 1]})
        assert_identical(expected, actual)

        objs = [[Dataset({'x': [0]})]]
        actual = auto_combine(objs, concat_dim=[None, None])
        expected = Dataset({'x': [0]})
        assert_identical(expected, actual)

        objs = [[Dataset({'x': [0]})]]
        actual = auto_combine(objs, concat_dim=None)
        expected = Dataset({'x': [0]})
        assert_identical(expected, actual)

Esempio n. 38

0

Mostra file

File: test_combine.py Progetto: yutiansut/xarray

    def test_auto_combine_previously_failed(self):
        # In the above scenario, one file is missing, containing the data for
        # one year's data for one variable.
        datasets = [
            Dataset({
                "a": ("x", [0]),
                "x": [0]
            }),
            Dataset({
                "b": ("x", [0]),
                "x": [0]
            }),
            Dataset({
                "a": ("x", [1]),
                "x": [1]
            }),
        ]
        expected = Dataset({
            "a": ("x", [0, 1]),
            "b": ("x", [0, np.nan])
        }, {"x": [0, 1]})
        actual = auto_combine(datasets)
        assert_identical(expected, actual)

        # Your data includes "time" and "station" dimensions, and each year's
        # data has a different set of stations.
        datasets = [
            Dataset({
                "a": ("x", [2, 3]),
                "x": [1, 2]
            }),
            Dataset({
                "a": ("x", [1, 2]),
                "x": [0, 1]
            }),
        ]
        expected = Dataset(
            {"a": (("t", "x"), [[np.nan, 2, 3], [1, 2, np.nan]])},
            {"x": [0, 1, 2]})
        actual = auto_combine(datasets, concat_dim="t")
        assert_identical(expected, actual)

Esempio n. 39

0

Mostra file

File: test_combine.py Progetto: benbovy/xarray

    def test_auto_combine_2d(self):
        ds = create_test_data

        partway1 = concat([ds(0), ds(3)], dim='dim1')
        partway2 = concat([ds(1), ds(4)], dim='dim1')
        partway3 = concat([ds(2), ds(5)], dim='dim1')
        expected = concat([partway1, partway2, partway3], dim='dim2')

        datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4), ds(5)]]
        result = auto_combine(datasets, concat_dim=['dim1', 'dim2'])

        assert_equal(result, expected)

Esempio n. 40

0

Mostra file

File: test_combine.py Progetto: pollackscience/xarray

    def test_auto_combine_2d(self):
        ds = create_test_data

        partway1 = concat([ds(0), ds(3)], dim='dim1')
        partway2 = concat([ds(1), ds(4)], dim='dim1')
        partway3 = concat([ds(2), ds(5)], dim='dim1')
        expected = concat([partway1, partway2, partway3], dim='dim2')

        datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4), ds(5)]]
        result = auto_combine(datasets, concat_dim=['dim1', 'dim2'])

        assert_equal(result, expected)

Esempio n. 41

0

Mostra file

File: test_combine.py Progetto: benbovy/xarray

    def test_invalid_hypercube_input(self):
        ds = create_test_data

        datasets = [[ds(0), ds(1), ds(2)], [ds(3), ds(4)]]
        with raises_regex(ValueError, 'sub-lists do not have '
                                      'consistent lengths'):
            auto_combine(datasets, concat_dim=['dim1', 'dim2'])

        datasets = [[ds(0), ds(1)], [[ds(3), ds(4)]]]
        with raises_regex(ValueError, 'sub-lists do not have '
                                      'consistent depths'):
            auto_combine(datasets, concat_dim=['dim1', 'dim2'])

        datasets = [[ds(0), ds(1)], [ds(3), ds(4)]]
        with raises_regex(ValueError, 'concat_dims has length'):
            auto_combine(datasets, concat_dim=['dim1'])

Esempio n. 42

0

Mostra file

File: mds_store.py Progetto: xgcm/xgcm

def open_mdsdataset(dirname, iters='all', prefix=None, read_grid=True,
                    delta_t=1, ref_date=None, calendar='gregorian',
                    geometry='sphericalpolar',
                    grid_vars_to_coords=True, swap_dims=False,
                    endian=">", chunks=None,
                    ignore_unknown_vars=False,):
    """Open MITgcm-style mds (.data / .meta) file output as xarray datset.

    Parameters
    ----------
    dirname : string
        Path to the directory where the mds .data and .meta files are stored
    iters : list, optional
        The iterations numbers of the files to be read. If `None`, no data
        files will be read.
    prefix : list, optional
        List of different filename prefixes to read. Default is to read all
        available files.
    read_grid : bool, optional
        Whether to read the grid data
    deltaT : number, optional
        The timestep used in the model. (Can't be inferred.)
    ref_date : string, optional
        A date string corresponding to the zero timestep. E.g. "1990-1-1 0:0:0".
        See CF conventions [1]_
    calendar : string, optional
        A calendar allowed by CF conventions [1]_
    geometry : {'sphericalpolar', 'cartesian', 'llc'}
        MITgcm grid geometry specifier.
    swap_dims : boolean, optional
        Whether to swap the logical dimensions for physical ones.
    endian : {'=', '>', '<'}, optional
        Endianness of variables. Default for MITgcm is ">" (big endian)
    chunks : int or dict, optional
        If chunks is provided, it used to load the new dataset into dask arrays.
    ignore_unknown_vars : boolean, optional
        Don't raise an error if unknown variables are encountered while reading
        the dataset.

    Returns
    -------
    dset : xarray.Dataset
        Dataset object containing all coordinates and variables.

    References
    ----------
    .. [1] http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/build/ch04s04.html
    """

    # get frame info for history
    frame = inspect.currentframe()
    _, _, _, arg_values = inspect.getargvalues(frame)
    del arg_values['frame']
    function_name = inspect.getframeinfo(frame)[2]

    # some checks for argument consistency
    if swap_dims and not read_grid:
        raise ValueError("If swap_dims==True, read_grid must be True.")

    # We either have a single iter, in which case we create a fresh store,
    # or a list of iters, in which case we combine.
    if iters == 'all':
        iters = _get_all_iternums(dirname, file_prefixes=prefix)
    if iters is None:
        iternum = None
    else:
        try:
            iternum = int(iters)
        # if not we probably have some kind of list
        except TypeError:
            if len(iters) == 1:
                iternum = int(iters[0])
            else:
                # We have to check to make sure we have the same prefixes at
                # each timestep...otherwise we can't combine the datasets.
                first_prefixes = prefix or _get_all_matching_prefixes(
                                                        dirname, iters[0])
                for iternum in iters:
                    these_prefixes = _get_all_matching_prefixes(
                        dirname, iternum, prefix
                    )
                    # don't care about order
                    if set(these_prefixes) != set(first_prefixes):
                        raise IOError("Could not find the expected file "
                                      "prefixes %s at iternum %g. (Instead "
                                      "found %s)" % (repr(first_prefixes),
                                                     iternum,
                                                     repr(these_prefixes)))

                # chunk at least by time
                chunks = chunks or {}

                # recursively open each dataset at a time
                datasets = [open_mdsdataset(
                        dirname, iters=iternum, delta_t=delta_t,
                        read_grid=False, swap_dims=False,
                        prefix=prefix, ref_date=ref_date, calendar=calendar,
                        geometry=geometry,
                        grid_vars_to_coords=grid_vars_to_coords,
                        endian=endian, chunks=chunks,
                        ignore_unknown_vars=ignore_unknown_vars)
                    for iternum in iters]
                # now add the grid
                if read_grid:
                    datasets.insert(0, open_mdsdataset(
                        dirname, iters=None, delta_t=delta_t,
                        read_grid=True, swap_dims=False,
                        prefix=prefix, ref_date=ref_date, calendar=calendar,
                        geometry=geometry,
                        grid_vars_to_coords=grid_vars_to_coords,
                        endian=endian, chunks=chunks,
                        ignore_unknown_vars=ignore_unknown_vars))
                # apply chunking
                ds = xr.auto_combine(datasets)
                if swap_dims:
                    ds = _swap_dimensions(ds, geometry)
                return ds

    store = _MDSDataStore(dirname, iternum, delta_t, read_grid,
                          prefix, ref_date, calendar,
                          geometry, endian,
                          ignore_unknown_vars=ignore_unknown_vars)
    ds = xr.Dataset.load_store(store)

    if swap_dims:
        ds = _swap_dimensions(ds, geometry)

    if grid_vars_to_coords:
        ds = _set_coords(ds)

    # turn all the auxilliary grid variables into coordinates
    # if grid_vars_to_coords:
    #     for k in _grid_variables:
    #         ds.set_coords(k, inplace=True)
    #     ds.set_coords('iter', inplace=True)

    if ref_date:
        ds = xr.decode_cf(ds)

    # do we need more fancy logic (like open_dataset), or is this enough
    if chunks is not None:
        ds = ds.chunk(chunks)

    # set attributes for CF conventions
    ds.attrs['Conventions'] = "CF-1.6"
    ds.attrs['title'] = "netCDF wrapper of MITgcm MDS binary data"
    ds.attrs['source'] = "MITgcm"
    arg_string = ', '.join(['%s=%s' % (str(k), repr(v))
                            for (k, v) in arg_values.items()])
    ds.attrs['history'] = ('Created by calling '
                           '`%s(%s)`'% (function_name, arg_string))

    return ds

Esempio n. 43

0

Mostra file

File: plot_movie_vorticity.py Progetto: kthyng/txla_plots

land_10m = cfeature.NaturalEarthFeature('physical', 'land', '10m',
                                        edgecolor='face',
                                        facecolor=cfeature.COLORS['land'])
states_provinces = cfeature.NaturalEarthFeature(
    category='cultural',
    name='admin_1_states_provinces_lines',
    scale='50m',
    facecolor='none')

## River forcing ##
Files = sorted(glob('/copano/d1/shared/TXLA_ROMS/inputs/rivers/txla2_river_????_AR_newT_SWpass_weekly.nc'))
ds = [xr.open_dataset(File) for File in Files]
# need to drop extra variable from 2016:
ds[-1] = ds[-1].drop('river_flag')
rds = xr.auto_combine(ds)  # all output here
# take 2/3 of total river inflow as mississippi river discharge
r = (np.abs(rds['river_transport']).sum(axis=1)*2.0/3.0).to_pandas()

# for calculating vorticity
pm = resize(resize(m.pm.data, 1), 0)
pn = resize(resize(m.pn.data, 1), 0)
f = resize(resize(m.f.data, 1), 0)

base = 'figures/' + var + '/movies/'
years = np.arange(1993, 2017)

for year in years:

    # Time period to use
    plotdates = m['ocean_time'].sel(ocean_time=str(year))

Esempio n. 44

0

Mostra file

File: sio.py Progetto: fmaussion/salem

def open_mf_wrf_dataset(paths, chunks=None,  compat='no_conflicts', lock=None,
                        preprocess=None):
    """Open multiple WRF files as a single WRF dataset.

    Requires dask to be installed. Note that if your files are sliced by time,
    certain diagnostic variable computed out of accumulated variables (e.g.
    PRCP) won't be available, because not computable lazily.

    This code is adapted from xarray's open_mfdataset function. The xarray
    license is reproduced in the salem/licenses directory.

    Parameters
    ----------
    paths : str or sequence
        Either a string glob in the form "path/to/my/files/*.nc" or an explicit
        list of files to open.
    chunks : int or dict, optional
        Dictionary with keys given by dimension names and values given by chunk
        sizes. In general, these should divide the dimensions of each dataset.
        If int, chunk each dimension by ``chunks``.
        By default, chunks will be chosen to load entire input files into
        memory at once. This has a major impact on performance: please see
        xarray's full documentation for more details.
    compat : {'identical', 'equals', 'broadcast_equals',
              'no_conflicts'}, optional
        String indicating how to compare variables of the same name for
        potential conflicts when merging:

        - 'broadcast_equals': all values must be equal when variables are
          broadcast against each other to ensure common dimensions.
        - 'equals': all values and dimensions must be the same.
        - 'identical': all values, dimensions and attributes must be the
          same.
        - 'no_conflicts': only values which are not null in both datasets
          must be equal. The returned dataset then contains the combination
          of all non-null values.
    preprocess : callable, optional
        If provided, call this function on each dataset prior to concatenation.
    lock : False, True or threading.Lock, optional
        This argument is passed on to :py:func:`dask.array.from_array`. By
        default, a per-variable lock is used when reading data from netCDF
        files with the netcdf4 and h5netcdf engines to avoid issues with
        concurrent access when using dask's multithreaded backend.

    Returns
    -------
    xarray.Dataset

    """
    if isinstance(paths, basestring):
        paths = sorted(glob(paths))
    if not paths:
        raise IOError('no files to open')

    # TODO: current workaround to dask thread problems
    dask.set_options(get=dask.async.get_sync)

    if lock is None:
        lock = _default_lock(paths[0], 'netcdf4')
    datasets = [open_wrf_dataset(p, chunks=chunks or {}, lock=lock)
                for p in paths]
    file_objs = [ds._file_obj for ds in datasets]

    if preprocess is not None:
        datasets = [preprocess(ds) for ds in datasets]

    # TODO: add compat=compat when xarray 9.0 is out
    combined = xr.auto_combine(datasets, concat_dim='time')
    combined._file_obj = _MultiFileCloser(file_objs)
    combined.attrs = datasets[0].attrs

    # drop accumulated vars if needed (TODO: make this not hard coded)
    vns = ['PRCP', 'PRCP_C', 'PRCP_NC']
    vns = [vn for vn in vns if vn in combined.variables]
    combined = combined.drop(vns)

    return combined

Esempio n. 45

0

Mostra file

File: concat_many_files.py Progetto: nbren12/gnl

def concat(xs):
    print("concatenating")
    len(xs)
    xr.auto_combine(xs, concat_dim='time').sortby('time').to_netcdf(snakemake.output[0])