Python open_mfdataset Examples, xray.open_mfdataset Python Examples

Example #1

0

Show file

def open_files(ncfiles, return_dsvar=False):
    """Open netCDF files, either with xray or netCDF4"""
    try:
        if _ncmodule == 'xray':
            # open files with xray
            try:
                ds = xray.open_mfdataset(ncfiles)
            except ValueError:
                ds = xray.open_mfdataset(ncfiles, decode_times=False)
                print('Warning: Using decode_times=False')
            dsvar = ds
        else:
            # open files with netCDF4
            if len(ncfiles) > 1:
                ds = netCDF4.MFDataset(ncfiles)
            else:
                ds = netCDF4.Dataset(ncfiles[0])
            dsvar = ds.variables
    except RuntimeError as err:
        traceback.print_exc(err)
        print('Warning: File(s) could not be opened: {}'.format(ncfiles))
        dsvar = None
    if return_dsvar:
        return ds, dsvar
    else:
        return ds

Example #2

0

Show file

File: nclook.py Project: j08lue/poppy

def open_files(ncfiles, return_dsvar=False):
    """Open netCDF files, either with xray or netCDF4"""
    try:
        if _ncmodule == 'xray':
            # open files with xray
            try:
                ds = xray.open_mfdataset(ncfiles)
            except ValueError:
                ds = xray.open_mfdataset(ncfiles, decode_times=False)
                print('Warning: Using decode_times=False')
            dsvar = ds
        else:
            # open files with netCDF4
            if len(ncfiles) > 1:
                ds = netCDF4.MFDataset(ncfiles)
            else:
                ds = netCDF4.Dataset(ncfiles[0])
            dsvar = ds.variables
    except RuntimeError as err:
        traceback.print_exc(err)
        print('Warning: File(s) could not be opened: {}'.format(ncfiles))
        dsvar = None
    if return_dsvar:
        return ds, dsvar
    else:
        return ds

Example #3

0

Show file

File: test_backends.py Project: gyenney/Tools

 def test_deterministic_names(self):
     with create_tmp_file() as tmp:
         data = create_test_data()
         data.to_netcdf(tmp)
         with open_mfdataset(tmp) as ds:
             original_names = dict((k, v.data.name) for k, v in ds.items())
         with open_mfdataset(tmp) as ds:
             repeat_names = dict((k, v.data.name) for k, v in ds.items())
         for var_name, dask_name in original_names.items():
             self.assertIn(var_name, dask_name)
             self.assertIn(tmp, dask_name)
         self.assertEqual(original_names, repeat_names)

Example #4

0

Show file

File: test_backends.py Project: petercable/xray

 def test_deterministic_names(self):
     with create_tmp_file() as tmp:
         data = create_test_data()
         data.to_netcdf(tmp)
         with open_mfdataset(tmp) as ds:
             original_names = dict((k, v.data.name) for k, v in ds.items())
         with open_mfdataset(tmp) as ds:
             repeat_names = dict((k, v.data.name) for k, v in ds.items())
         for var_name, dask_name in original_names.items():
             self.assertIn(var_name, dask_name)
             self.assertIn(tmp, dask_name)
         self.assertEqual(original_names, repeat_names)

Example #5

0

Show file

File: test_backends.py Project: jjhelmus/xray

 def test_lock(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     with create_tmp_file() as tmp:
         original.to_netcdf(tmp, format='NETCDF3_CLASSIC')
         with open_dataset(tmp, chunks=10) as ds:
             task = ds.foo.data.dask[ds.foo.data.name, 0]
             self.assertIsInstance(task[-1], type(Lock()))
         with open_mfdataset(tmp) as ds:
             task = ds.foo.data.dask[ds.foo.data.name, 0]
             self.assertIsInstance(task[-1], type(Lock()))
         with open_mfdataset(tmp, engine='scipy') as ds:
             task = ds.foo.data.dask[ds.foo.data.name, 0]
             self.assertNotIsInstance(task[-1], type(Lock()))

Example #6

0

Show file

File: test_backends.py Project: petercable/xray

 def test_lock(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     with create_tmp_file() as tmp:
         original.to_netcdf(tmp, format='NETCDF3_CLASSIC')
         with open_dataset(tmp, chunks=10) as ds:
             task = ds.foo.data.dask[ds.foo.data.name, 0]
             self.assertIsInstance(task[-1], type(Lock()))
         with open_mfdataset(tmp) as ds:
             task = ds.foo.data.dask[ds.foo.data.name, 0]
             self.assertIsInstance(task[-1], type(Lock()))
         with open_mfdataset(tmp, engine='scipy') as ds:
             task = ds.foo.data.dask[ds.foo.data.name, 0]
             self.assertNotIsInstance(task[-1], type(Lock()))

Example #7

0

Show file

File: test_backends.py Project: jjhelmus/xray

 def test_open_and_do_math(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     with create_tmp_file() as tmp:
         original.to_netcdf(tmp)
         with open_mfdataset(tmp) as ds:
             actual = 1.0 * ds
             self.assertDatasetAllClose(original, actual)

Example #8

0

Show file

File: test_backends.py Project: petercable/xray

 def test_open_and_do_math(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     with create_tmp_file() as tmp:
         original.to_netcdf(tmp)
         with open_mfdataset(tmp) as ds:
             actual = 1.0 * ds
             self.assertDatasetAllClose(original, actual)

Example #9

0

Show file

 def test_preprocess_mfdataset(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     with create_tmp_file() as tmp:
         original.to_netcdf(tmp)
         preprocess = lambda ds: ds.assign_coords(z=0)
         expected = preprocess(original)
         with open_mfdataset(tmp, preprocess=preprocess) as actual:
             self.assertDatasetIdentical(expected, actual)

Example #10

0

Show file

File: test_backends.py Project: jjhelmus/xray

    def test_open_mfdataset(self):
        original = Dataset({'foo': ('x', np.random.randn(10))})
        with create_tmp_file() as tmp1:
            with create_tmp_file() as tmp2:
                original.isel(x=slice(5)).to_netcdf(tmp1)
                original.isel(x=slice(5, 10)).to_netcdf(tmp2)
                with open_mfdataset([tmp1, tmp2]) as actual:
                    self.assertIsInstance(actual.foo.variable.data, da.Array)
                    self.assertEqual(actual.foo.variable.data.chunks,
                                     ((5, 5),))
                    self.assertDatasetAllClose(original, actual)
                with open_mfdataset([tmp1, tmp2], chunks={'x': 3}) as actual:
                    self.assertEqual(actual.foo.variable.data.chunks,
                                     ((3, 2, 3, 2),))

        with self.assertRaisesRegexp(IOError, 'no files to open'):
            open_mfdataset('foo-bar-baz-*.nc')

Example #11

0

Show file

File: test_backends.py Project: jjhelmus/xray

 def test_preprocess_mfdataset(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     with create_tmp_file() as tmp:
         original.to_netcdf(tmp)
         preprocess = lambda ds: ds.assign_coords(z=0)
         expected = preprocess(original)
         with open_mfdataset(tmp, preprocess=preprocess) as actual:
             self.assertDatasetIdentical(expected, actual)

Example #12

0

Show file

File: test_backends.py Project: petercable/xray

 def test_save_mfdataset_roundtrip(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     datasets = [original.isel(x=slice(5)), original.isel(x=slice(5, 10))]
     with create_tmp_file() as tmp1:
         with create_tmp_file() as tmp2:
             save_mfdataset(datasets, [tmp1, tmp2])
             with open_mfdataset([tmp1, tmp2]) as actual:
                 self.assertDatasetIdentical(actual, original)

Example #13

0

Show file

File: test_backends.py Project: petercable/xray

    def test_open_mfdataset(self):
        original = Dataset({'foo': ('x', np.random.randn(10))})
        with create_tmp_file() as tmp1:
            with create_tmp_file() as tmp2:
                original.isel(x=slice(5)).to_netcdf(tmp1)
                original.isel(x=slice(5, 10)).to_netcdf(tmp2)
                with open_mfdataset([tmp1, tmp2]) as actual:
                    self.assertIsInstance(actual.foo.variable.data, da.Array)
                    self.assertEqual(actual.foo.variable.data.chunks,
                                     ((5, 5), ))
                    self.assertDatasetAllClose(original, actual)
                with open_mfdataset([tmp1, tmp2], chunks={'x': 3}) as actual:
                    self.assertEqual(actual.foo.variable.data.chunks,
                                     ((3, 2, 3, 2), ))

        with self.assertRaisesRegexp(IOError, 'no files to open'):
            open_mfdataset('foo-bar-baz-*.nc')

Example #14

0

Show file

File: test_backends.py Project: jjhelmus/xray

 def test_save_mfdataset_roundtrip(self):
     original = Dataset({'foo': ('x', np.random.randn(10))})
     datasets = [original.isel(x=slice(5)),
                 original.isel(x=slice(5, 10))]
     with create_tmp_file() as tmp1:
         with create_tmp_file() as tmp2:
             save_mfdataset(datasets, [tmp1, tmp2])
             with open_mfdataset([tmp1, tmp2]) as actual:
                 self.assertDatasetIdentical(actual, original)

Example #15

0

Show file

File: model.py Project: spencerkclark/aospy-db

 def _get_grid_files(self):
     """Get the files holding grid data for an aospy object."""
     datasets = []
     for path in self.grid_file_paths:
         try:
             ds = xray.open_dataset(path, decode_times=False)
         except TypeError:
             ds = xray.open_mfdataset(path, decode_times=False)
         datasets.append(ds)
     return tuple(datasets)

Example #16

0

Show file

 def _get_grid_files(self):
     """Get the files holding grid data for an aospy object."""
     datasets = []
     for path in self.grid_file_paths:
         try:
             ds = xray.open_dataset(path, decode_times=False)
         except TypeError:
             ds = xray.open_mfdataset(path, decode_times=False)
         datasets.append(ds)
     return tuple(datasets)

Example #17

0

Show file

def load_experiment(exp_dir, name="single_timestep", format='csv'):
    """ Load the results from a complete experiment into a DataFrame.

    Parameters
    ----------
    exp_dir : str
        The path to the directory containing the output CSV files
        from the simulations.
    name : str
        The name of the experiment files; default is "single_timestep"
    format : str
        Either "csv" or "nc" for loading the correct input

    """

    fns = sorted(glob.glob(os.path.join(exp_dir, "%s*.%s" % (name, format))))
    print("Found %d files" % len(fns))

    if format == 'csv':

        dfs = []
        print("Reading...")
        for fn in fns:
            print ("   ", fn)
            dfs.append(pd.read_csv(fn, index_col=0))

        df = pd.concat(dfs, ignore_index=True)

        return df

    elif format == 'nc':

        print("Reading...")
        ds = xray.open_mfdataset(fns)
        ds.set_coords(["lat", "lon", "lev"], inplace=True)
        return ds

    else:
        raise ValueError("Format should either be 'nc' or 'csv'.")

Example #18

0

Show file

File: lec8_xray.py Project: bjansens/atsc500

    out_tuple = In_tup(**the_dict)
    return out_tuple


if __name__ == "__main__":

    the_files = glob.glob('mar*nc')
    the_files.sort(key=sort_name)

    #
    #  put the 10 ensembles together along a new "ens" dimension
    #  checkpoint the output fields for future runs
    #
    firstrun = False
    if firstrun:
        ds = xray.open_mfdataset(the_files, engine='netcdf4', concat_dim='ens')

        # dump the structure
        print(ds)
        #
        #  3-d ensemble average for temp
        #
        x = ds['x']
        y = ds['y']
        z = ds['z']
        temp = ds['TABS']
        mean_temp = temp[:, 0, :, :, :].mean(dim='ens')
        #
        # same for velocity
        #
        wvel = ds['W']

Example #19

0

Show file

File: metrics.py Project: subond/poppy

def get_timeseries(ncfiles, varn, grid, 
        reducefunc=np.nanmean, 
        latlim=None, lonlim=None, k=0):
    """Get time series of any 2D POP field reduced by a numpy function
    
    Parameters
    ----------
    ncfiles : list of str
        paths to input files
    varn : str
        variable name
    grid : str ('T' or 'U')
        which grid the variable is on
    reducefunc : function
        function to reduce the selected region
        NOTE: must be NaN-aware
    latlim : tup
        latitude limits for maximum
    lonlim : tup
        longitude limits for maximum
    k : int
        layer
    """
    n = len(ncfiles)
    _nfiles_diag(n)
    maxn = get_ulimitn()

    # get mask
    with xray.open_dataset(ncfiles[0], decode_times=False) as ds:
        if latlim is None and lonlim is None:
            mask = None
        else:
            mask = poppygrid.get_grid_mask(
                    lon = ds[grid+'LONG'], 
                    lat = ds[grid+'LAT'],
                    lonlim=lonlim, latlim=latlim)
            mask &= ds.variables['KM'+grid][:]>0

    # read data
    if n <= maxn:
        with xray.open_mfdataset(ncfiles, decode_times=False) as ds:
            # select variable
            ds = ds[varn]
            # select level
            try:
                ds = ds.isel(z_t=k)
            except ValueError:
                pass
            # apply mask
            if mask is not None:
                ds = ds.where(mask)
            tseries = ds.reduce(reducefunc, ['nlon', 'nlat']).values
            timevar = ds['time']
            timeax = utils.get_time_decimal_year(timevar)
    else:
        timeax = np.zeros(n)
        tseries = np.zeros((n))
        for i,fname in enumerate(ncfiles):
            with xray.open_dataset(fname, decode_times=False) as ds:
                # select variable
                ds = ds[varn]
                # select level
                try:
                    ds = ds.isel(z_t=k)
                except ValueError:
                    pass
                # apply mask
                if mask is not None:
                    ds = ds.where(mask)
                tseries[i] = ds.reduce(reducefunc, ['nlon', 'nlat']).values
                timevar = ds['time']
                timeax[i] = utils.get_time_decimal_year(timevar)

    # output
    if use_pandas:
        index = pd.Index(timeax, name='ModelYear')
        ts = pd.Series(tseries, index=index, name=varn)
        _pandas_add_meta_data(ts, meta=dict(
            latlim = latlim,
            lonlim = lonlim,
            varn = varn,
            reducefunc = str(reducefunc),
            k = k,
            grid = grid,
            ))
        return ts
    else:
        return tseries, timeax

Example #20

0

Show file

File: tem_relativehumidity_downscaling_ar5.py Project: EarthScientist/alfresco_inputs

import numpy as np
import os, sys, re, xray
from rasterio import Affine as A
from rasterio.warp import reproject, RESAMPLING
from osgeo import gdal
from mpl_toolkits.basemap import Basemap, addcyclic, shiftgrid

# some setup pathing
input_dir = '~/Documents/hur'
os.chdir( input_dir )

# the level of the atmosphere we want to use
atmos_level = 11

# open multiple datasets as a single file
xds = xray.open_mfdataset( 'hur_Amon_GFDL-CM3_historical_r1i1p1_*.nc' )
xds_hur = xds.hur.loc['1900-01-01':'2005-12-12'] # slice the dataset using the time variable in xray object
hur_lev = xds_hur[ :, atmos_level, ... ]

# calculate climatology and anomalies
climatology = hur_lev.loc[ '1961-01-01':'1990-12-31' ].groupby( 'time.month' ).mean( 'time' )
anomalies = hur_lev.groupby( 'time.month' ) - climatology

# # # REPROJECT AND CROP EXTENT
# what do we need to do to properly resample the data
time_len, rows, cols = hur_lev.shape
# NOTE: geotransform = [left, res, 0.0, top, 0.0, res]
height = rows
width = cols
crs = 'epsg:4326'
affine = A( *[np.diff( xds.lon )[ 0 ], 0.0, -180.0, 0.0, -np.diff( xds.lat )[ 0 ], 90.0] )

Example #21

0

Show file

File: EKE_calc.py Project: wmingch1992/eventTracking

#import Ngl
#import math
from scipy import stats
from rhwhitepackages.readwrite import shiftlons
from rhwhitepackages.readwrite import xrayOpen
from rhwhitepackages.stats import regressmaps
from rhwhitepackages.readwrite import getdenfilename

# plotting
import xray.plot as xplt

uvindir = '/home/disk/eos4/rachel/Obs/ERAI/uv'
startyr = 1998
endyr = 2015
for iyear in range(startyr, endyr):
    uvfile = xr.open_mfdataset(uvindir + '/interim_daily_' + str(iyear) +
                               '*.grb')
    ulev, vlev = uvfile['u'], uvfile['v']
    udash = ulev - ulev.mean(dim='longitude')
    vdash = vlev - vlev.mean(dim='longitude')
    EKEall = 0.5 * ((ulev * ulev) + (vlev * vlev))
    EKEyears = EKEall.groupby('time.month').sum(dim='time')
    EKEyears = EKEyears.rename({'month': 'time'})
    EKEyears = EKEyears.rename({'latitude': 'lat'})
    EKEyears = EKEyears.rename({'longitude': 'lon'})
    EKEds = xr.Dataset({'EKE': EKEyears})
    EKEds.to_netcdf(uvindir + '/EKE_' + str(iyear) + '.nc', mode='w')

uvindir = '/home/disk/eos4/rachel/Obs/ERAI/uv'
startyr = 1998
endyr = 2015
for iyear in range(startyr, endyr):