Example #1
0
def test_decode_cf_time_bounds():

    da = DataArray(np.arange(6, dtype='int64').reshape((3, 2)),
                   coords={'time': [1, 2, 3]},
                   dims=('time', 'nbnd'), name='time_bnds')

    attrs = {'units': 'days since 2001-01',
             'calendar': 'standard',
             'bounds': 'time_bnds'}

    ds = da.to_dataset()
    ds['time'].attrs.update(attrs)
    _update_bounds_attributes(ds.variables)
    assert ds.variables['time_bnds'].attrs == {'units': 'days since 2001-01',
                                               'calendar': 'standard'}
    dsc = decode_cf(ds)
    assert dsc.time_bnds.dtype == np.dtype('M8[ns]')
    dsc = decode_cf(ds, decode_times=False)
    assert dsc.time_bnds.dtype == np.dtype('int64')

    # Do not overwrite existing attrs
    ds = da.to_dataset()
    ds['time'].attrs.update(attrs)
    bnd_attr = {'units': 'hours since 2001-01', 'calendar': 'noleap'}
    ds['time_bnds'].attrs.update(bnd_attr)
    _update_bounds_attributes(ds.variables)
    assert ds.variables['time_bnds'].attrs == bnd_attr

    # If bounds variable not available do not complain
    ds = da.to_dataset()
    ds['time'].attrs.update(attrs)
    ds['time'].attrs['bounds'] = 'fake_var'
    _update_bounds_attributes(ds.variables)
 def test_sweep_data(self, get_loader):
     if isinstance(self, MeasuredDataVolume):
         pytest.skip("requires synthetic data")
     if get_loader == "netcdf4" and self.format == "GAMIC":
         pytest.skip("gamic needs hdf-based loader")
     with self.get_volume_data(
             get_loader,
             decode_coords=False,
             mask_and_scale=False,
             decode_times=False,
             chunks=None,
             parallel=False,
     ) as vol:
         for i, ts in enumerate(vol):
             if "02" in self.name:
                 ds = create_dataset(i, nrays=361)
             else:
                 ds = create_dataset(i)
             for j, swp in enumerate(ts):
                 xr.testing.assert_equal(swp.data, ds)
     with self.get_volume_data(
             get_loader,
             decode_coords=True,
             mask_and_scale=False,
             decode_times=True,
             chunks=None,
             parallel=False,
     ) as vol:
         for i, ts in enumerate(vol):
             for j, swp in enumerate(ts):
                 data = create_dataset(i)
                 data = data.assign_coords(create_coords(i).coords)
                 data = data.assign_coords(
                     create_site(self.data["where"]["attrs"]).coords)
                 data = data.assign_coords(
                     {"sweep_mode": "azimuth_surveillance"})
                 data = xr.decode_cf(data, mask_and_scale=False)
                 xr.testing.assert_equal(swp.data, data)
     with self.get_volume_data(
             get_loader,
             decode_coords=True,
             mask_and_scale=True,
             decode_times=True,
             chunks=None,
             parallel=False,
     ) as vol:
         for i, ts in enumerate(vol):
             for j, swp in enumerate(ts):
                 data = create_dataset(i, type=self.format)
                 data = data.assign_coords(create_coords(i).coords)
                 data = data.assign_coords(
                     create_site(self.data["where"]["attrs"]).coords)
                 data = data.assign_coords(
                     {"sweep_mode": "azimuth_surveillance"})
                 data = xr.decode_cf(data)
                 xr.testing.assert_equal(swp.data, data)
     del swp
     del ts
     del vol
     gc.collect()
Example #3
0
def test_decode_cf_time_bounds():

    da = DataArray(np.arange(6, dtype='int64').reshape((3, 2)),
                   coords={'time': [1, 2, 3]},
                   dims=('time', 'nbnd'), name='time_bnds')

    attrs = {'units': 'days since 2001-01',
             'calendar': 'standard',
             'bounds': 'time_bnds'}

    ds = da.to_dataset()
    ds['time'].attrs.update(attrs)
    _update_bounds_attributes(ds.variables)
    assert ds.variables['time_bnds'].attrs == {'units': 'days since 2001-01',
                                               'calendar': 'standard'}
    dsc = decode_cf(ds)
    assert dsc.time_bnds.dtype == np.dtype('M8[ns]')
    dsc = decode_cf(ds, decode_times=False)
    assert dsc.time_bnds.dtype == np.dtype('int64')

    # Do not overwrite existing attrs
    ds = da.to_dataset()
    ds['time'].attrs.update(attrs)
    bnd_attr = {'units': 'hours since 2001-01', 'calendar': 'noleap'}
    ds['time_bnds'].attrs.update(bnd_attr)
    _update_bounds_attributes(ds.variables)
    assert ds.variables['time_bnds'].attrs == bnd_attr

    # If bounds variable not available do not complain
    ds = da.to_dataset()
    ds['time'].attrs.update(attrs)
    ds['time'].attrs['bounds'] = 'fake_var'
    _update_bounds_attributes(ds.variables)
Example #4
0
    async def get_shaped_resultcube(self,
                                    shape_query: ShapeQuery) -> xr.DataArray:
        fs = set()
        ps = await asyncio.gather(*[
            self.dataset_files(when) for when in shape_query.temporal.dates()
        ])
        [fs.add(f) for f in ps if (f is not None and Path(f).is_file())]

        [logger.debug("Confirmed: %s" % f) for f in fs]

        if len(fs) == 1:
            with xr.open_dataset(*fs) as ds:
                ds = xr.decode_cf(ds)
                ds.attrs['var_name'] = "fmc_mean"
                tr = ds.sel(time=slice(
                    shape_query.temporal.start.strftime("%Y-%m-%d"),
                    shape_query.temporal.finish.strftime("%Y-%m-%d")))
                return tr

        elif len(fs) > 1:
            fs = list(set(fs))
            with xr.open_mfdataset(*fs) as ds:
                ds = xr.decode_cf(ds)
                ds.attrs['var_name'] = "fmc_mean"
                ts = ds.sel(time=slice(
                    shape_query.temporal.start.strftime("%Y-%m-%d"),
                    shape_query.temporal.finish.strftime("%Y-%m-%d")))

                return ts
        else:
            logger.debug("No files available/gathered for that space/time.")

            return xr.DataArray([])
Example #5
0
def time_decoder(xds):
    if 'time' not in xds:
        return xds

    if xds.time.attrs.get('units', None):
        xds = xr.decode_cf(xds)
        xds['time'] = xr.DataArray(xds.time.values.astype('datetime64[D]'),
                                   dims=('time', ))
        return xds

    t = xds.time.values.astype(int)

    # nies workaround (they use seconds since 1980)
    if t[0] > 10000:
        t = (t / 86400).astype(int)
        add_processing(xds, f'time units converted to days since from seconds')

    xds['time'] = xr.DataArray(data=t,
                               dims=('time', ),
                               coords={'time': t},
                               attrs=dict(units='days since 1980',
                                          calendar='gregorian'))
    xds = xr.decode_cf(xds)

    return xds
Example #6
0
    def test_moment_data(self, get_loader):
        if isinstance(self, MeasuredDataVolume):
            pytest.skip("requires synthetic data")
        if get_loader == 'netcdf4' and self.format == 'GAMIC':
            pytest.skip("gamic needs hdf-based loader")
        with self.get_volume_data(get_loader,
                                  decode_coords=False,
                                  mask_and_scale=False,
                                  decode_times=False,
                                  chunks=None,
                                  parallel=False) as vol:
            for i, ts in enumerate(vol):
                if '02' in self.name:
                    ds = create_dataset(i, nrays=361)['DBZH']
                else:
                    ds = create_dataset(i)['DBZH']
                for j, swp in enumerate(ts):
                    for k, mom in enumerate(swp):
                        xr.testing.assert_equal(mom.data, ds)
        with self.get_volume_data(get_loader,
                                  decode_coords=True,
                                  mask_and_scale=False,
                                  decode_times=True,
                                  chunks=None,
                                  parallel=False) as vol:
            for i, ts in enumerate(vol):
                for j, swp in enumerate(ts):
                    for k, mom in enumerate(swp):
                        data = create_dataset(i)
                        data = data.assign_coords(create_coords(i).coords)
                        data = data.assign_coords(
                            create_site(self.data['where']['attrs']).coords)
                        data = data.assign_coords(
                            {'sweep_mode': 'azimuth_surveillance'})
                        data = xr.decode_cf(data, mask_and_scale=False)
                        xr.testing.assert_equal(mom.data, data['DBZH'])

        with self.get_volume_data(get_loader,
                                  decode_coords=True,
                                  mask_and_scale=True,
                                  decode_times=True,
                                  chunks=None,
                                  parallel=False) as vol:
            for i, ts in enumerate(vol):
                for j, swp in enumerate(ts):
                    for k, mom in enumerate(swp):
                        data = create_dataset(i, type=self.format)
                        data = data.assign_coords(create_coords(i).coords)
                        data = data.assign_coords(
                            create_site(self.data['where']['attrs']).coords)
                        data = data.assign_coords(
                            {'sweep_mode': 'azimuth_surveillance'})
                        data = xr.decode_cf(data)
                        xr.testing.assert_equal(mom.data, data['DBZH'])
        del mom
        del swp
        del ts
        del vol
        gc.collect()
Example #7
0
 def open(self, *args, **kwargs):
     kwargs["decode_times"] = False
     da = super().open(*args, **kwargs)
     da["time"], _ = fix_time_units(da["time"])
     if hasattr(da, "to_dataset"):
         return xr.decode_cf(da.to_dataset())
     else:
         return xr.decode_cf(da)
Example #8
0
def _load(f, v):
    dataset = xr.open_dataset(f, decode_cf=False)
    if "time_bnds" in dataset:
        tb = dataset['time_bnds'].mean(axis=1)
        dataset['time'].values = tb
        dataset = xr.decode_cf(dataset)
    else:
        dataset = xr.decode_cf(dataset)
    return dataset[v]
Example #9
0
    def read(self, file_info, fields=None, mapping=None, **kwargs):
        """Read SEVIRI HDF5 files and load them to a xarray.Dataset

        Args:
            file_info: Path and name of the file as string or FileInfo object.
                This can also be a tuple/list of file names or a path with
                asterisk.
            fields: ...
            **kwargs: Additional keyword arguments that are valid for
                :class:`typhon.files.handlers.common.NetCDF4`.

        Returns:
            A xrarray.Dataset object.
        """

        self._ensure_local_filesystem(file_info)
        # Here, the user fields overwrite the standard fields:
        if fields is None:
            raise NotImplementedError(
                "Loading complete HDF5 files without giving explicit field "
                "names is not yet implemented!")

        # keys are dimension size, values are dimension names
        dim_dict = {}

        # Load the dataset from the file:
        with h5py.File(file_info.path, 'r') as file:
            dataset = xr.Dataset()

            for field in fields:
                if field not in file:
                    raise KeyError(f"No field named '{field}'!")

                dims = []
                for dim_size in file[field].shape:
                    dim_name = dim_dict.get(dim_size, None)
                    if dim_name is None:
                        dim_name = f"dim_{len(dim_dict)}"
                        dim_dict[dim_size] = dim_name

                    dims.append(dim_name)

                dataset[field] = xr.DataArray(
                    file[field],
                    dims=dims,
                    # Currently, some attributes may contain byte-strings that
                    # are not nice for further processing
                    attrs={},  #dict(file[field].attrs)
                )

            xr.decode_cf(dataset, **kwargs)
            dataset.load()

        return _xarray_rename_fields(dataset, mapping)
Example #10
0
    def decode_cf(self, dataset: xr.Dataset) -> xr.Dataset:
        """------------------------------------------------------------------------------------
        Decodes the dataset according to CF conventions. This helps ensure that the dataset
        is formatted correctly after it has been constructed from unstandardized sources or
        heavily modified.

        Args:
            dataset (xr.Dataset): The dataset to decode.

        Returns:
            xr.Dataset: The decoded dataset.

        ------------------------------------------------------------------------------------"""
        # We have to make sure that time variables do not have units set as attrs, and
        # instead have units set on the encoding or else xarray will crash when trying
        # to save: https://github.com/pydata/xarray/issues/3739
        for variable in dataset.variables.values():
            if variable.data.dtype.type == np.datetime64 and "units" in variable.attrs:
                units = variable.attrs["units"]
                del variable.attrs["units"]
                variable.encoding["units"] = units

        # Leaving the "dtype" entry in the encoding causes a crash when calling
        # `dataset.to_netcdf`. Related to but not fixed by https://github.com/pydata/xarray/pull/4684
        ds = xr.decode_cf(dataset)
        for variable in ds.variables.values():
            if variable.data.dtype.type == np.datetime64:
                if "dtype" in variable.encoding:
                    del variable.encoding["dtype"]
        return ds
Example #11
0
def test_maybe_apply_time_shift_ts(gfdl_data_loader, ds, var_name,
                                   generate_file_set_args):
    ds = xr.decode_cf(ds)
    da = ds[var_name]
    result = gfdl_data_loader._maybe_apply_time_shift(
        da.copy(), **generate_file_set_args)[TIME_STR]
    assert result.identical(da[TIME_STR])
Example #12
0
def test_sel_time():
    time_bounds = np.array([[0, 31], [31, 59], [59, 90]])
    nv = np.array([0, 1])
    time = np.array([15, 46, 74])
    data = np.zeros((3))
    var_name = 'a'
    ds = xr.DataArray(data,
                      coords=[time],
                      dims=[TIME_STR],
                      name=var_name).to_dataset()
    ds[TIME_BOUNDS_STR] = xr.DataArray(time_bounds,
                                       coords=[time, nv],
                                       dims=[TIME_STR, BOUNDS_STR],
                                       name=TIME_BOUNDS_STR)
    units_str = 'days since 2000-01-01 00:00:00'
    ds[TIME_STR].attrs['units'] = units_str
    ds = ensure_time_avg_has_cf_metadata(ds)
    ds = set_grid_attrs_as_coords(ds)
    ds = xr.decode_cf(ds)
    da = ds[var_name]

    start_date = np.datetime64('2000-02-01')
    end_date = np.datetime64('2000-03-31')
    result = sel_time(da, start_date, end_date)
    assert result[SUBSET_START_DATE_STR].values == start_date
    assert result[SUBSET_END_DATE_STR].values == end_date
Example #13
0
def test_assert_has_data_for_time_str_input():
    time_bounds = np.array([[0, 31], [31, 59], [59, 90]])
    nv = np.array([0, 1])
    time = np.array([15, 46, 74])
    data = np.zeros((3))
    var_name = 'a'
    ds = xr.DataArray(data,
                      coords=[time],
                      dims=[TIME_STR],
                      name=var_name).to_dataset()
    ds[TIME_BOUNDS_STR] = xr.DataArray(time_bounds,
                                       coords=[time, nv],
                                       dims=[TIME_STR, BOUNDS_STR],
                                       name=TIME_BOUNDS_STR)
    units_str = 'days since 2000-01-01 00:00:00'
    ds[TIME_STR].attrs['units'] = units_str
    ds = ensure_time_avg_has_cf_metadata(ds)
    ds = set_grid_attrs_as_coords(ds)
    ds = xr.decode_cf(ds)
    da = ds[var_name]

    start_date = '2000-01-01'
    end_date = '2000-03-31'
    _assert_has_data_for_time(da, start_date, end_date)

    start_date_bad = '1999-12-31'
    end_date_bad = '2000-04-01'

    # With strings these checks are disabled
    _assert_has_data_for_time(da, start_date_bad, end_date)
    _assert_has_data_for_time(da, start_date, end_date_bad)
    _assert_has_data_for_time(da, start_date_bad, end_date_bad)
Example #14
0
def preprocess_time(x):
    # TODO: if monthly, use begining of period time step
    ''' Convert time to initialization and foreast lead time (to fit into orthoganal matrices)
    Input Dims: lat x lon x Time
    Output Dims: lat x lon x init_time x fore_time_i'''
    
    # Set record dimension of 'time' to the beinging of averaging period 'average_T1'
    x['time'] = x.average_T1
    
    # Grab forecast times
    xtimes = xr.decode_cf(x).time.values;
    
    # Get initialization time
    x.coords['init_time'] = xtimes[0] # get first one
    x.coords['init_time'].attrs['comments'] = 'Initilzation time of forecast'

    # Get forecast time in days from initilization
    x.rename({'time':'fore_time_i'}, inplace=True);
    x.coords['fore_time_i'] = np.arange(0,12,1)
    x.fore_time_i.attrs['units'] = 'Index of forecast dates'
    
    # Store actual forecast dates
    x.coords['fore_time'] = xr.DataArray(xtimes, dims=('fore_time_i'), coords={'fore_time_i':x.fore_time_i})
    x.fore_time.attrs['comments'] = 'Date of forecast'
    
    return x
Example #15
0
    def to_xarray(self, enhance=False):
        """
        Convert Wave data from a Pandas DataFrame to an xarray Dataset.

        Args:
            enhance (bool, optional): Rename variables to something meaningful and add useful attributes. Defaults to False.

        Returns:
            xarray.Dataset: xarray dataset containing converted wave data.
        """
        logging.info("Converting wave data to xarray dataset")

        # Set dataframe to indexes defined during class initialization
        tdf = self.data.set_index(self.df_index).drop(
            ["TIME", "TYRS", "TMON", "TDAY", "THRS", "TMIN", "TSEC"], axis=1)

        # Intitialize xarray dataset
        ds = tdf.to_xarray()

        # Assign header data to global attributes
        ds = ds.assign_attrs(self.metadata)

        if enhance is True:
            # global_attr = required_global_attributes(required_attributes, time_start, time_end)
            ds = self.enhance_xarray(ds)
            ds = xr.decode_cf(ds)

        return ds
Example #16
0
    def read(self, filename, **kwargs):
        """Read and parse a NetCDF file and load it to a xarray.Dataset

        Args:
            filename: Path and name of the file as string or FileInfo object.
            **kwargs: Additional key word arguments that are allowed for the
                :class:`~typhon.files.handlers.common.NetCDF4` class.

        Returns:
            A xarray.Dataset object.
        """

        # Make sure that the standard fields are always gonna be imported:
        fields = kwargs.pop("fields", None)
        if fields is not None:
            fields = {"time", "lat", "lon"} | set(fields)

        # xarray has problems with decoding the time variable correctly. Hence,
        # we disable it here:
        decode_cf = kwargs.pop("decode_cf", True)

        data = super().read(filename, fields=fields, decode_cf=False, **kwargs)

        # Then we fix the problem (we need integer64 instead of integer 32):
        attrs = data["time"].attrs.copy()
        data["time"] = data["time"].astype(int)
        data["time"].attrs = attrs

        # Do decoding now (just if the user wanted it!)
        if decode_cf:
            return xr.decode_cf(data)

        return data
Example #17
0
def diagnosis(name, path_int, path_out, varname):
    def decode_month_since(time):
        start = time.attrs['units'].split(' ')[2]
        return pd.date_range(start, periods=len(time), freq='1M')

    hr = xr.open_mfdataset(path_in, decode_times=False)
    var = hr[varname].copy(deep=True)
    if 'month' in hr['time'].attrs['units']:
        var['time'] = decode_month_since(hr['time'])
    else:
        var['time'] = xr.decode_cf(hr)['time'].to_index()
    hr.close()

    fig = plt.figure(figsize=(6.5, 8))
    gs = gridspec.GridSpec(2, 1, hspace=0.2, height_ratios=[0.8, 1.2])

    # Time series
    ax = plt.subplot(gs[0])
    ax.plot(var['time'].to_index(), var.mean(dim=['lat', 'lon']).values)
    ax.set_title(name + ' time series')

    # Map
    ax = plt.subplot(gs[1], projection=ccrs.PlateCarree())
    ax.coastlines()
    ax.gridlines()
    cf = ax.contourf(var.lon, var.lat, var.mean(dim='time'), cmap='Spectral')
    plt.colorbar(cf, ax=ax, orientation='horizontal', pad=0.05)
    ax.set_title(name + ' climatology')

    fig.savefig(path_out, dpi=600., bbox_inches='tight')
    plt.close(fig)
Example #18
0
def prep_time_data(ds):
    """Prepare time coordinate information in Dataset for use in aospy.

    1. If the Dataset contains a time bounds coordinate, add attributes
       representing the true beginning and end dates of the time interval used
       to construct the Dataset
    2. If the Dataset contains a time bounds coordinate, overwrite the time
       coordinate values with the averages of the time bounds at each timestep
    3. Decode the times into np.datetime64 objects for time indexing

    Parameters
    ----------
    ds : Dataset
        Pre-processed Dataset with time coordinate renamed to
        internal_names.TIME_STR

    Returns
    -------
    Dataset
        The processed Dataset

    """
    ds = ensure_time_as_index(ds)
    if TIME_BOUNDS_STR in ds:
        ds = ensure_time_avg_has_cf_metadata(ds)
        ds[TIME_STR] = average_time_bounds(ds)
    else:
        logging.warning("dt array not found.  Assuming equally spaced "
                        "values in time, even though this may not be "
                        "the case")
        ds = add_uniform_time_weights(ds)
    return xr.decode_cf(ds,
                        decode_times=True,
                        decode_coords=False,
                        mask_and_scale=True)
Example #19
0
def read_mls_o3(year='2005', day='0*', min_lat=-10, max_lat=10):
    """
    Load several days of MLS O3 data...files are large. A better computer might be able to handle a year.
     Filter to input latitude band, remove low quality data (re. MLS documentation).
     **** Monthly mean is not calculated cause full months are not necessarily loaded. ***
    :param year: string. Default is '2005'.
    :param day: string. Default is '0*'  which loads data for days 1 to 99 of year.
    :param min_lat: minimum latitude to include in means. Default is -10.
    :param max_lat: maximum latitude to include in means. Default is 10.
    :return: Save a new netcdf file that is smaller and quicker to load.
    """
    o3 = xr.open_mfdataset(
        r'/home/kimberlee/ValhallaData/MLS/L2O3v04-20/MLS-Aura_L2GP-O3_v04-20-c01_%sd%s.he5' % (year, day),
        group='HDFEOS/SWATHS/O3/Data Fields/', concat_dim='nTimes')
    geo = xr.open_mfdataset(
        r'/home/kimberlee/ValhallaData/MLS/L2O3v04-20/MLS-Aura_L2GP-O3_v04-20-c01_%sd%s.he5' % (year, day),
        group='HDFEOS/SWATHS/O3/Geolocation Fields/', concat_dim='nTimes')

    mls = xr.merge([o3, geo])
    mls = mls.drop(['AscDescMode', 'L2gpPrecision', 'L2gpValue', 'ChunkNumber', 'LineOfSightAngle', 'LocalSolarTime',
                    'OrbitGeodeticAngle', 'SolarZenithAngle'])

    mls.Time.attrs['units'] = 'Seconds since 01-01-1993'
    mls = xr.decode_cf(mls)
    mls = mls.swap_dims({'nTimes': 'Time'}, inplace=True)
    mls = mls.dropna(dim="Time")
    mls = mls.where((mls.Latitude > min_lat) & (mls.Latitude < max_lat))
    mls = mls.where(((mls.Status % 2) == 0) & (mls.Quality > 1.0) & (mls.Convergence < 1.03) & (mls.O3Precision > 0))
    mls = mls.resample('MS', dim='Time', how='mean')

    mls.to_netcdf(path='/home/kimberlee/Masters/NO2/MLS_O3_monthlymeans/quarters/MLS-O3-%s-%s.nc' %
                       (year, day), mode='w')
    return
Example #20
0
def test_maybe_apply_time_shift_ts(gfdl_data_loader, ds_with_time_bounds,
                                   var_name, generate_file_set_args):
    ds = xr.decode_cf(ds_with_time_bounds)
    da = ds[var_name]
    result = gfdl_data_loader._maybe_apply_time_shift(
        da.copy(), **generate_file_set_args)[TIME_STR]
    assert result.identical(da[TIME_STR])
Example #21
0
def read_mls_n2o(year='2005', min_lat=-10, max_lat=10):
    """
    Load a year worth of MLS N2O data. Filter to input latitude band, remove low quality data (re. MLS documentation)
     and calculate monthly mean.
    :param year: string. Default is '2005'.
    :param min_lat: minimum latitude to include in means. Default is -10.
    :param max_lat: maximum latitude to include in means. Default is 10.
    :return: Save a new netcdf file that is smaller and quicker to load.
    """
    n2o = xr.open_mfdataset(
        r'/home/kimberlee/ValhallaData/MLS/L2N2Ov-04-23/MLS-Aura_L2GP-N2O_v04-20-c01_%s*.he5' % year,
        group='HDFEOS/SWATHS/N2O/Data Fields/', concat_dim='nTimes')
    geo = xr.open_mfdataset(
        r'/home/kimberlee/ValhallaData/MLS/L2N2Ov-04-23/MLS-Aura_L2GP-N2O_v04-20-c01_%s*.he5' % year,
        group='HDFEOS/SWATHS/N2O/Geolocation Fields/', concat_dim='nTimes')

    mls = xr.merge([n2o, geo])
    mls = mls.drop(['AscDescMode', 'L2gpPrecision', 'L2gpValue', 'ChunkNumber', 'LineOfSightAngle',
                    'LocalSolarTime', 'OrbitGeodeticAngle', 'SolarZenithAngle'])
    mls.Time.attrs['units'] = 'Seconds since 01-01-1993'
    mls = xr.decode_cf(mls)
    mls = mls.swap_dims({'nTimes': 'Time'}, inplace=True)
    mls = mls.dropna(dim="Time")
    mls = mls.where((mls.Latitude > min_lat) & (mls.Latitude < max_lat))
    mls = mls.where((mls.Status % 2) == 0)
    mls = mls.where(mls.Quality > 1.4)
    mls = mls.where(mls.Convergence < 1.01)
    mls = mls.resample('MS', dim='Time', how='mean')

    mls.to_netcdf(path='/home/kimberlee/Masters/NO2/MLS_N2O_monthlymeans/MLS-N2O-%s.nc' % year, mode='w')
    return
Example #22
0
    def test_maybe_apply_time_offset_ts(self):
        ds = xr.decode_cf(self.ds)
        da = ds[self.var_name]

        result = self.DataLoader._maybe_apply_time_shift(
            da.copy(), **self.generate_file_set_args)[TIME_STR]
        assert result.identical(da[TIME_STR])
Example #23
0
def test_multiplication():
    HEIGHT = 100
    POINTSPEC = 3
    BTIME = 5
    scale_factor = (1 / HEIGHT) * 1000
    fds = create_flexdust_test_data(seed=None)
    fpds = create_test_data(seed=None)

    ds_orr, pre_ds, out_data = process_per_pointspec(fpds,
                                                     fds,
                                                     x0=None,
                                                     x1=None,
                                                     y0=None,
                                                     y1=None,
                                                     height=HEIGHT)
    produced_ds = xr.decode_cf(pre_ds.to_dataset(name='spec001_mr'))
    fpds = fpds.rename({'latitude': 'lat', 'longitude': 'lon'})

    produced_ds = produced_ds.isel(time=POINTSPEC, btime=BTIME)
    test_time = produced_ds.time + produced_ds.btime

    fpds = fpds.sel(time=test_time.values,
                    pointspec=POINTSPEC,
                    height=HEIGHT,
                    nageclass=0)['spec001_mr']
    fds = fds.sel(time=test_time.values)['Emission']
    fpds_times_fds = (fpds * fds).values * scale_factor
    assert pytest.approx(
        produced_ds['spec001_mr'].sum(dim=['lon', 'lat']).values,
        0.01) == fpds_times_fds.sum()
Example #24
0
def preprocess_time_monthly(x):
    ''' Preprocesses time variables from GFDL format to SIPN2 format.

    Convert time to initialization and forecast lead time (to fit into orthogonal matrices)
    Input Dims: lat x lon x Time
    Output Dims: lat x lon x init_time x fore_time

    Where we represent fore_time as monthly increments

    '''

    Nmons = x.average_T1.size
    m_i = np.arange(0, Nmons)
    m_dt = ['month' for x in m_i]  # list of 'month'

    # Set record dimension of 'time' to the beginning of averaging period 'average_T1'
    x['time'] = x.average_T1

    # Grab forecast times
    xtimes = xr.decode_cf(x).time.values

    # Get initialization time
    x.coords['init_time'] = xtimes[0]  # get first one
    x.coords['init_time'].attrs['comments'] = 'Initilzation time of forecast'

    # Get forecast time (as timedeltas from init_time)
    x.rename({'time': 'fore_time'}, inplace=True)
    x.coords['fore_time'] = xr.DataArray(m_i, dims='fore_time')

    # Set time offset for index in fore_time
    x.coords['fore_offset'] = xr.DataArray(m_dt,
                                           dims='fore_time',
                                           coords={'fore_time': x.fore_time})

    return x
Example #25
0
    def perform_cmip6_query(self, config, query_string: str) -> xr.Dataset:
        df_sub = config.df.query(query_string)
        if df_sub.zstore.values.size == 0:
            return df_sub

        mapper = config.fs.get_mapper(df_sub.zstore.values[-1])
        logging.debug("[CMIP6_IO] df_sub: {}".format(df_sub))

        ds = xr.open_zarr(mapper, consolidated=True, mask_and_scale=True)

        # print("Time encoding: {} - {}".format(ds.indexes['time'], ds.indexes['time'].dtype))
        if not ds.indexes["time"].dtype in ["datetime64[ns]", "object"]:

            time_object = ds.indexes['time'].to_datetimeindex(
            )  # pd.DatetimeIndex([ds["time"].values[0]])

            # Convert if necessary
            if time_object[0].year == 1:

                times = ds.indexes['time'].to_datetimeindex(
                )  # pd.DatetimeIndex([ds["time"].values])
                times_plus_2000 = []
                for t in times:
                    times_plus_2000.append(
                        cftime.DatetimeNoLeap(t.year + 2000, t.month, t.day,
                                              t.hour))
                ds["time"].values = times_plus_2000
                ds = xr.decode_cf(ds)

        return ds
Example #26
0
def test_assert_has_data_for_time():
    time_bounds = np.array([[0, 31], [31, 59], [59, 90]])
    nv = np.array([0, 1])
    time = np.array([15, 46, 74])
    data = np.zeros((3))
    var_name = 'a'
    ds = xr.DataArray(data, coords=[time], dims=[TIME_STR],
                      name=var_name).to_dataset()
    ds[TIME_BOUNDS_STR] = xr.DataArray(time_bounds,
                                       coords=[time, nv],
                                       dims=[TIME_STR, BOUNDS_STR],
                                       name=TIME_BOUNDS_STR)
    units_str = 'days since 2000-01-01 00:00:00'
    ds[TIME_STR].attrs['units'] = units_str
    ds = ensure_time_avg_has_cf_metadata(ds)
    ds = set_grid_attrs_as_coords(ds)
    ds = xr.decode_cf(ds)
    da = ds[var_name]

    start_date = np.datetime64('2000-01-01')
    end_date = np.datetime64('2000-03-31')
    _assert_has_data_for_time(da, start_date, end_date)

    start_date_bad = np.datetime64('1999-12-31')
    end_date_bad = np.datetime64('2000-04-01')

    with pytest.raises(AssertionError):
        _assert_has_data_for_time(da, start_date_bad, end_date)

    with pytest.raises(AssertionError):
        _assert_has_data_for_time(da, start_date, end_date_bad)

    with pytest.raises(AssertionError):
        _assert_has_data_for_time(da, start_date_bad, end_date_bad)
Example #27
0
def test_assert_has_data_for_time_str_input():
    time_bounds = np.array([[0, 31], [31, 59], [59, 90]])
    nv = np.array([0, 1])
    time = np.array([15, 46, 74])
    data = np.zeros((3))
    var_name = 'a'
    ds = xr.DataArray(data, coords=[time], dims=[TIME_STR],
                      name=var_name).to_dataset()
    ds[TIME_BOUNDS_STR] = xr.DataArray(time_bounds,
                                       coords=[time, nv],
                                       dims=[TIME_STR, BOUNDS_STR],
                                       name=TIME_BOUNDS_STR)
    units_str = 'days since 2000-01-01 00:00:00'
    ds[TIME_STR].attrs['units'] = units_str
    ds = ensure_time_avg_has_cf_metadata(ds)
    ds = set_grid_attrs_as_coords(ds)
    ds = xr.decode_cf(ds)
    da = ds[var_name]

    start_date = '2000-01-01'
    end_date = '2000-03-31'
    _assert_has_data_for_time(da, start_date, end_date)

    start_date_bad = '1999-12-31'
    end_date_bad = '2000-04-01'

    # With strings these checks are disabled
    _assert_has_data_for_time(da, start_date_bad, end_date)
    _assert_has_data_for_time(da, start_date, end_date_bad)
    _assert_has_data_for_time(da, start_date_bad, end_date_bad)
Example #28
0
def perform_cmip6_query(conf, query_string):
    df_sub = conf.df.query(query_string)
    if (df_sub.zstore.values.size == 0):
        return df_sub

    mapper = conf.fs.get_mapper(df_sub.zstore.values[-1])
    ds = xr.open_zarr(mapper, consolidated=True)
    print("Time encoding: {} - {}".format(ds.indexes['time'],
                                          ds.indexes['time'].dtype))
    if not ds.indexes["time"].dtype in ["datetime64[ns]", "object"]:

        time_object = ds.indexes['time'].to_datetimeindex(
        )  #pd.DatetimeIndex([ds["time"].values[0]])
        print(time_object, time_object.year)
        # Convert if necesssary
        if time_object[0].year == 1:

            times = ds.indexes['time'].to_datetimeindex(
            )  # pd.DatetimeIndex([ds["time"].values])
            times_plus_2000 = []
            for t in times:
                times_plus_2000.append(
                    cftime.DatetimeNoLeap(t.year + 2000, t.month, t.day,
                                          t.hour))
            ds["time"].values = times_plus_2000
            ds = xr.decode_cf(ds)
    return ds
Example #29
0
def getRadarVar(filePath, refTime, varName):
    """ This function reads the radar netCDF files and
        extract the desired variable.

        Arguments
        ---------

        filePath : Path to the netCDF file

        refTime : String specifying the starting time
           for example 1970-01-01 00:00:00

        varName : Name of the desired variable

        Returns
        -------

        dataArray : xarray DataArray
            The extracted DataArray

    """

    timeAtt = 'seconds since {refTime} UTC'.format(refTime=refTime)
    tempDS = xr.open_dataset(filePath)
    tempDS.time.attrs['units'] = timeAtt
    tempDS = xr.decode_cf(tempDS)
    tempDSZe = tempDS[varName]

    return tempDSZe
Example #30
0
def load_dataset(path, ens_mems=40):
    logger.info('Extract data from {0:s} for {1:03d} ensemble members'.format(
        path, ens_mems))
    ds_ens = []
    pbar_mem = tqdm(range(ens_mems))
    for mem in pbar_mem:
        path_mem = path.format(mem + 1)
        pbar_mem.write('Extract {0:s}'.format(path_mem))
        found_paths = sorted(list(glob.glob(path_mem)))
        ds_mem = xr.open_mfdataset(found_paths,
                                   parallel=True,
                                   combine='nested',
                                   concat_dim='time',
                                   chunks={'time': 1},
                                   decode_cf=False,
                                   decode_times=False)
        ds_ens.append(ds_mem)
    logger.info('Starting to concat ensemble')
    ds_ens = xr.concat(ds_ens, dim='ensemble')
    ds_ens = ds_ens.chunk({'ensemble': 1, 'time': 1})
    logger.info('Starting to decode cf-conventions')
    ds_ens = xr.decode_cf(ds_ens)
    logger.info('Concatenated data from {0:s} for {1:03d} ensemble '
                'members'.format(path, ens_mems))
    return ds_ens
Example #31
0
def pop_decode_time(var):
    varname = var.name
    time = var.time
    time.values = time.values - 16
    #var = var.assign_coords(time=time)
    ds = xr.decode_cf(var.to_dataset(), decode_times=True)
    return ds[varname]
Example #32
0
def create_ray_time(i, decode=False, nrays=360):
    time_data = (create_startazT(i, nrays=nrays) +
                 create_stopazT(i, nrays=nrays)) / 2.
    da = xr.DataArray(time_data, dims=['azimuth'], attrs=io.xarray.time_attrs)
    if decode:
        da = xr.decode_cf(xr.Dataset({'arr': da})).arr
    return da
Example #33
0
def import_nc_file(filepath, variables):

    # TODO: Specify whether a variable is constant wrt. a dimension via the
    #       command line.
    # It seems that the whole "cell_methods" and "units" heuristic doesn't
    # really work. So we're back to being simple again. If there's a
    # "cell_methods" attribute containing "time", the value is aggregatet,
    # otherwise it's istantaneous. Being constant wrt. time has to be specified
    # manually.

    dataset = xr.open_dataset(filepath, decode_cf=False)
    if ("time" in dataset.variables
            and "units" not in dataset[dataset["time"].attrs["bounds"]].attrs):
        dataset["time_bnds"].attrs["units"] = dataset["time"].units
    dataset = xr.decode_cf(dataset)

    vs = [v for v in variables if v in dataset.variables.keys()]

    return [{
        "name":
        v,
        "dataset":
        dataset,
        "time":
        dataset["time"].attrs["bounds"] if "time:" in dataset[v].attrs.get(
            "cell_methods", "") else "time",
    } for v in vs]
Example #34
0
def open_flatds(filename, writeable=False, with_dask=False):
    data = np.memmap(filename, dtype="uint8", mode="r+" if writeable else "r")
    if np.any(data[:len(MAGIC)] != MAGIC):
        raise ValueError("file \"{}\" is not a flatds file".format(filename))
    if data[len(MAGIC)] != 0:
        raise ValueError("unknown header location")
    # header is in the back
    header_location = data[-8:].view("uint64")[0]
    header = msgpack.unpackb(data[header_location:-8], raw=False)

    def get_var(props):
        if len(props["d"]) > 0:
            dims, shape = zip(*[header["dims"][d] for d in props["d"]])
            size = np.prod(shape) * props["is"]
        else:
            dims = ()
            shape = ()
            size = props["is"]
        ofs = props["ofs"]
        d = data[ofs:ofs+size]
        d = d.view(dtype=props["t"])
        d = np.lib.stride_tricks.as_strided(d, shape, props["st"], subok=True, writeable=writeable)
        attrs = props.get("attrs", {})
        if with_dask:
            import dask.array as da
            d = da.from_array(d)
        return xr.DataArray(d, dims=dims, attrs=attrs)

    variables = {name: get_var(p) for name, p in header["vars"].items()}
    attrs = header.get("attrs", {})
    return xr.decode_cf(xr.Dataset(variables, attrs=attrs))
Example #35
0
 def test_write_store(self):
     expected = create_test_data()
     with self.create_store() as store:
         expected.dump_to_store(store)
         # we need to cf decode the store because it has time and
         # non-dimension coordinates
         actual = xr.decode_cf(store)
         self.assertDatasetAllClose(expected, actual)
Example #36
0
def test_decode_cf(calendar):
    days = [1., 2., 3.]
    da = DataArray(days, coords=[days], dims=['time'], name='test')
    ds = da.to_dataset()

    for v in ['test', 'time']:
        ds[v].attrs['units'] = 'days since 2001-01-01'
        ds[v].attrs['calendar'] = calendar

    if not has_cftime_or_netCDF4 and calendar not in _STANDARD_CALENDARS:
        with pytest.raises(ValueError):
            ds = decode_cf(ds)
    else:
        ds = decode_cf(ds)

        if calendar not in _STANDARD_CALENDARS:
            assert ds.test.dtype == np.dtype('O')
        else:
            assert ds.test.dtype == np.dtype('M8[ns]')
Example #37
0
def test_decode_cf_enable_cftimeindex(calendar, enable_cftimeindex):
    days = [1., 2., 3.]
    da = DataArray(days, coords=[days], dims=['time'], name='test')
    ds = da.to_dataset()

    for v in ['test', 'time']:
        ds[v].attrs['units'] = 'days since 2001-01-01'
        ds[v].attrs['calendar'] = calendar

    if (not has_cftime and enable_cftimeindex and
       calendar not in coding.times._STANDARD_CALENDARS):
        with pytest.raises(ValueError):
            with set_options(enable_cftimeindex=enable_cftimeindex):
                ds = decode_cf(ds)
    else:
        with set_options(enable_cftimeindex=enable_cftimeindex):
            ds = decode_cf(ds)

        if (enable_cftimeindex and
           calendar not in coding.times._STANDARD_CALENDARS):
            assert ds.test.dtype == np.dtype('O')
        else:
            assert ds.test.dtype == np.dtype('M8[ns]')
Example #38
0
def test_maybe_apply_time_shift(data_loader, ds, inst_ds, var_name,
                                generate_file_set_args):
    ds = xr.decode_cf(ds)
    da = ds[var_name]

    result = data_loader._maybe_apply_time_shift(
        da.copy(), **generate_file_set_args)[TIME_STR]
    assert result.identical(da[TIME_STR])

    offset = data_loader._maybe_apply_time_shift(
        da.copy(), {'days': 1}, **generate_file_set_args)
    result = offset[TIME_STR]

    expected = da[TIME_STR] + np.timedelta64(1, 'D')
    expected[TIME_STR] = expected

    assert result.identical(expected)
Example #39
0
def test_assert_has_data_for_time_cftime_datetimes(calendar, date_type):
    time_bounds = np.array([[0, 2], [2, 4], [4, 6]])
    nv = np.array([0, 1])
    time = np.array([1, 3, 5])
    data = np.zeros((3))
    var_name = 'a'
    ds = xr.DataArray(data,
                      coords=[time],
                      dims=[TIME_STR],
                      name=var_name).to_dataset()
    ds[TIME_BOUNDS_STR] = xr.DataArray(time_bounds,
                                       coords=[time, nv],
                                       dims=[TIME_STR, BOUNDS_STR],
                                       name=TIME_BOUNDS_STR)
    units_str = 'days since 0002-01-02 00:00:00'
    ds[TIME_STR].attrs['units'] = units_str
    ds[TIME_STR].attrs['calendar'] = calendar
    ds = ensure_time_avg_has_cf_metadata(ds)
    ds = set_grid_attrs_as_coords(ds)

    with warnings.catch_warnings(record=True):
        with xr.set_options(enable_cftimeindex=True):
            ds = xr.decode_cf(ds)
    da = ds[var_name]

    start_date = date_type(2, 1, 2)
    end_date = date_type(2, 1, 8)

    _assert_has_data_for_time(da, start_date, end_date)

    start_date_bad = date_type(2, 1, 1)
    end_date_bad = date_type(2, 1, 9)

    with pytest.raises(AssertionError):
        _assert_has_data_for_time(da, start_date_bad, end_date)

    with pytest.raises(AssertionError):
        _assert_has_data_for_time(da, start_date, end_date_bad)

    with pytest.raises(AssertionError):
        _assert_has_data_for_time(da, start_date_bad, end_date_bad)
Example #40
0
def test_maybe_apply_time_shift_inst(gfdl_data_loader, inst_ds, var_name,
                                     generate_file_set_args):
    inst_ds = xr.decode_cf(inst_ds)
    generate_file_set_args['dtype_in_time'] = 'inst'
    generate_file_set_args['intvl_in'] = '3hr'
    da = inst_ds[var_name]
    result = gfdl_data_loader._maybe_apply_time_shift(
        da.copy(), **generate_file_set_args)[TIME_STR]

    expected = da[TIME_STR] + np.timedelta64(-3, 'h')
    expected[TIME_STR] = expected
    assert result.identical(expected)

    generate_file_set_args['intvl_in'] = 'daily'
    da = inst_ds[var_name]
    result = gfdl_data_loader._maybe_apply_time_shift(
        da.copy(), **generate_file_set_args)[TIME_STR]

    expected = da[TIME_STR]
    expected[TIME_STR] = expected
    assert result.identical(expected)
Example #41
0
def _prep_time_data(ds):
    """Prepare time coordinate information in Dataset for use in aospy.

    1. If the Dataset contains a time bounds coordinate, add attributes
       representing the true beginning and end dates of the time interval used
       to construct the Dataset
    2. If the Dataset contains a time bounds coordinate, overwrite the time
       coordinate values with the averages of the time bounds at each timestep
    3. Decode the times into np.datetime64 objects for time indexing

    Parameters
    ----------
    ds : Dataset
        Pre-processed Dataset with time coordinate renamed to
        internal_names.TIME_STR

    Returns
    -------
    Dataset
        The processed Dataset
    """
    ds = times.ensure_time_as_index(ds)
    if TIME_BOUNDS_STR in ds:
        ds = times.ensure_time_avg_has_cf_metadata(ds)
        ds[TIME_STR] = times.average_time_bounds(ds)
    else:
        logging.warning("dt array not found.  Assuming equally spaced "
                        "values in time, even though this may not be "
                        "the case")
        ds = times.add_uniform_time_weights(ds)
    # Suppress enable_cftimeindex is a no-op warning; we'll keep setting it for
    # now to maintain backwards compatibility for older xarray versions.
    with warnings.catch_warnings():
        warnings.filterwarnings('ignore')
        with xr.set_options(enable_cftimeindex=True):
            ds = xr.decode_cf(ds, decode_times=True, decode_coords=False,
                              mask_and_scale=True)
    return ds
Example #42
0
def test_assert_has_data_for_time():
    time_bounds = np.array([[0, 31], [31, 59], [59, 90]])
    nv = np.array([0, 1])
    time = np.array([15, 46, 74])
    data = np.zeros((3))
    var_name = 'a'
    ds = xr.DataArray(data,
                      coords=[time],
                      dims=[TIME_STR],
                      name=var_name).to_dataset()
    ds[TIME_BOUNDS_STR] = xr.DataArray(time_bounds,
                                       coords=[time, nv],
                                       dims=[TIME_STR, BOUNDS_STR],
                                       name=TIME_BOUNDS_STR)
    units_str = 'days since 2000-01-01 00:00:00'
    ds[TIME_STR].attrs['units'] = units_str
    ds = ensure_time_avg_has_cf_metadata(ds)
    ds = set_grid_attrs_as_coords(ds)
    ds = xr.decode_cf(ds)
    da = ds[var_name]

    start_date = np.datetime64('2000-01-01')
    end_date = np.datetime64('2000-03-31')
    _assert_has_data_for_time(da, start_date, end_date)

    start_date_bad = np.datetime64('1999-12-31')
    end_date_bad = np.datetime64('2000-04-01')

    with pytest.raises(AssertionError):
        _assert_has_data_for_time(da, start_date_bad, end_date)

    with pytest.raises(AssertionError):
        _assert_has_data_for_time(da, start_date, end_date_bad)

    with pytest.raises(AssertionError):
        _assert_has_data_for_time(da, start_date_bad, end_date_bad)
Example #43
0
File: field.py Project: NCPP/ocgis
    def to_xarray(self, **kwargs):
        """
        Convert the field to a :class:`xarray.Dataset` with CF metadata interpretation.

        Limitations:
        * Bounds are treated as data arrays inside the ``xarray`` dataset.
        * Integer masked arrays are upcast to float data types in ``xarray``.
        * Group hierarchies are not supported in ``xarray``.

        :keyword bool decode_cf: (``=True``) If ``True``, run the ``xarray`` function ``decode_cf`` on the returned
         dataset.
        :param dict kwargs: Optional keyword arguments to dataset creation. See :meth:`ocgis.VariableCollection.to_xarray`
         for additional information.
        :rtype: :class:`xarray.Dataset`
        """
        from xarray import decode_cf

        kwargs = kwargs.copy()
        should_decode_cf = kwargs.pop('decode_cf', True)

        ret = super(Field, self).to_xarray(**kwargs)
        if should_decode_cf:
            ret = decode_cf(ret)
        return ret
Example #44
0
def load_variable(var_name, path_to_file,
                  method='xarray', fix_times=True, **extr_kwargs):
    """ Interface for loading an extracted variable into memory, using
    either iris or xarray. If `path_to_file` is instead a raw dataset,
    then the entire contents of the file will be loaded!

    Parameters
    ----------
    var_name : string
        The name of the variable to load
    path_to_file : string
        Location of file containing variable
    method : string
        Choose between 'iris' or 'xarray'
    fix_times : bool
        Correct the timestamps to the middle of the bounds
        in the variable metadata (CESM puts them at the right
        boundary which sucks!)
    extr_kwargs : dict
        Additional keyword arguments to pass to the extractor

    """

    logger.info("Loading %s from %s" % (var_name, path_to_file))

    if method == "iris":

        raise NotImplementedError("`iris` deprecated with Python 3")

        # cf = lambda c : c.var_name == var_name
        # cubes = iris.load(path_to_file, iris.Constraint(cube_func=cf),
        #                   **extr_kwargs)
        #
        # if not cubes:
        #     raise RuntimeError("Could not find '%s' in cube" % var_name)
        #
        # assert len(cubes) == 1
        #
        # c = cubes[0]
        #
        # if fix_times:
        #     times = c.coord('time')
        #     assert hasattr(times, 'bounds')
        #
        #     bnds = times.bounds
        #     mean_times = np.mean(bnds, axis=1)
        #
        #     times.points = mean_times
        #
        # return c

    elif method == "xarray":

        ds = xarray.open_dataset(path_to_file, decode_cf=False, **extr_kwargs)

        # Fix time unit, if necessary
        interval, timestamp = ds.time.units.split(" since ")
        timestamp = timestamp.split(" ")
        yr, mm, dy = timestamp[0].split("-")

        if int(yr) < 1650:
            yr = 2001
        yr = str(yr)
            
        # Re-construct at Jan 01, 2001 and re-set
        timestamp[0] = "-".join([yr, mm, dy])
        new_units = " ".join([interval, "since"] + timestamp)
        ds.time.attrs['units'] = new_units

        if fix_times:
            assert hasattr(ds, 'time_bnds')
            bnds = ds.time_bnds.values
            mean_times = np.mean(bnds, axis=1)

            ds.time.values = mean_times

        # Be pedantic and check that we don't have a "missing_value" attr
        for field in ds:
            if hasattr(ds[field], 'missing_value'):
                del ds[field].attrs['missing_value']
            
            
        # Lazy decode CF
        ds = xarray.decode_cf(ds)

        return ds
Example #45
0
def open_mdsdataset(dirname, iters='all', prefix=None, read_grid=True,
                    delta_t=1, ref_date=None, calendar='gregorian',
                    geometry='sphericalpolar',
                    grid_vars_to_coords=True, swap_dims=False,
                    endian=">", chunks=None,
                    ignore_unknown_vars=False,):
    """Open MITgcm-style mds (.data / .meta) file output as xarray datset.

    Parameters
    ----------
    dirname : string
        Path to the directory where the mds .data and .meta files are stored
    iters : list, optional
        The iterations numbers of the files to be read. If `None`, no data
        files will be read.
    prefix : list, optional
        List of different filename prefixes to read. Default is to read all
        available files.
    read_grid : bool, optional
        Whether to read the grid data
    deltaT : number, optional
        The timestep used in the model. (Can't be inferred.)
    ref_date : string, optional
        A date string corresponding to the zero timestep. E.g. "1990-1-1 0:0:0".
        See CF conventions [1]_
    calendar : string, optional
        A calendar allowed by CF conventions [1]_
    geometry : {'sphericalpolar', 'cartesian', 'llc'}
        MITgcm grid geometry specifier.
    swap_dims : boolean, optional
        Whether to swap the logical dimensions for physical ones.
    endian : {'=', '>', '<'}, optional
        Endianness of variables. Default for MITgcm is ">" (big endian)
    chunks : int or dict, optional
        If chunks is provided, it used to load the new dataset into dask arrays.
    ignore_unknown_vars : boolean, optional
        Don't raise an error if unknown variables are encountered while reading
        the dataset.

    Returns
    -------
    dset : xarray.Dataset
        Dataset object containing all coordinates and variables.

    References
    ----------
    .. [1] http://cfconventions.org/Data/cf-conventions/cf-conventions-1.7/build/ch04s04.html
    """

    # get frame info for history
    frame = inspect.currentframe()
    _, _, _, arg_values = inspect.getargvalues(frame)
    del arg_values['frame']
    function_name = inspect.getframeinfo(frame)[2]

    # some checks for argument consistency
    if swap_dims and not read_grid:
        raise ValueError("If swap_dims==True, read_grid must be True.")

    # We either have a single iter, in which case we create a fresh store,
    # or a list of iters, in which case we combine.
    if iters == 'all':
        iters = _get_all_iternums(dirname, file_prefixes=prefix)
    if iters is None:
        iternum = None
    else:
        try:
            iternum = int(iters)
        # if not we probably have some kind of list
        except TypeError:
            if len(iters) == 1:
                iternum = int(iters[0])
            else:
                # We have to check to make sure we have the same prefixes at
                # each timestep...otherwise we can't combine the datasets.
                first_prefixes = prefix or _get_all_matching_prefixes(
                                                        dirname, iters[0])
                for iternum in iters:
                    these_prefixes = _get_all_matching_prefixes(
                        dirname, iternum, prefix
                    )
                    # don't care about order
                    if set(these_prefixes) != set(first_prefixes):
                        raise IOError("Could not find the expected file "
                                      "prefixes %s at iternum %g. (Instead "
                                      "found %s)" % (repr(first_prefixes),
                                                     iternum,
                                                     repr(these_prefixes)))

                # chunk at least by time
                chunks = chunks or {}

                # recursively open each dataset at a time
                datasets = [open_mdsdataset(
                        dirname, iters=iternum, delta_t=delta_t,
                        read_grid=False, swap_dims=False,
                        prefix=prefix, ref_date=ref_date, calendar=calendar,
                        geometry=geometry,
                        grid_vars_to_coords=grid_vars_to_coords,
                        endian=endian, chunks=chunks,
                        ignore_unknown_vars=ignore_unknown_vars)
                    for iternum in iters]
                # now add the grid
                if read_grid:
                    datasets.insert(0, open_mdsdataset(
                        dirname, iters=None, delta_t=delta_t,
                        read_grid=True, swap_dims=False,
                        prefix=prefix, ref_date=ref_date, calendar=calendar,
                        geometry=geometry,
                        grid_vars_to_coords=grid_vars_to_coords,
                        endian=endian, chunks=chunks,
                        ignore_unknown_vars=ignore_unknown_vars))
                # apply chunking
                ds = xr.auto_combine(datasets)
                if swap_dims:
                    ds = _swap_dimensions(ds, geometry)
                return ds

    store = _MDSDataStore(dirname, iternum, delta_t, read_grid,
                          prefix, ref_date, calendar,
                          geometry, endian,
                          ignore_unknown_vars=ignore_unknown_vars)
    ds = xr.Dataset.load_store(store)

    if swap_dims:
        ds = _swap_dimensions(ds, geometry)

    if grid_vars_to_coords:
        ds = _set_coords(ds)

    # turn all the auxilliary grid variables into coordinates
    # if grid_vars_to_coords:
    #     for k in _grid_variables:
    #         ds.set_coords(k, inplace=True)
    #     ds.set_coords('iter', inplace=True)

    if ref_date:
        ds = xr.decode_cf(ds)

    # do we need more fancy logic (like open_dataset), or is this enough
    if chunks is not None:
        ds = ds.chunk(chunks)

    # set attributes for CF conventions
    ds.attrs['Conventions'] = "CF-1.6"
    ds.attrs['title'] = "netCDF wrapper of MITgcm MDS binary data"
    ds.attrs['source'] = "MITgcm"
    arg_string = ', '.join(['%s=%s' % (str(k), repr(v))
                            for (k, v) in arg_values.items()])
    ds.attrs['history'] = ('Created by calling '
                           '`%s(%s)`'% (function_name, arg_string))

    return ds
Example #46
0

# Grid info
loc = 'http://barataria.tamu.edu:8080/thredds/dodsC/NcML/txla_nesting6.nc'
# grid_filename = '/atch/raid1/zhangxq/Projects/txla_nesting6/txla_grd_v4_new.nc'
# grid = tracpy.inout.readgrid(grid_filename, usebasemap=True, llcrnrlat=22.85, llcrnrlon=-97.9, urcrnrlat=30.5)
# # actually using psi grid here despite the name
# xpsi = np.asanyarray(grid['xpsi'].T, order='C')
# ypsi = np.asanyarray(grid['ypsi'].T, order='C')
# xr = np.asanyarray(grid['xr'].T, order='C')
# yr = np.asanyarray(grid['yr'].T, order='C')
ds = xr.open_dataset(loc, decode_cf=False)
ds['temp'].attrs['missing_value'] = ds['temp'].attrs['_FillValue']
key='salt'
ds[key].attrs['missing_value'] = ds[key].attrs['_FillValue']
ds=xr.decode_cf(ds)

# current arrows
cdx = 7; cdy = 11 # in indices
wdx = 25; wdy = 40 # in indices, wind arrows

# Colormap for model output
if var == 'salt':
    levels = (37-np.exp(np.linspace(0,np.log(37.), 10)))[::-1] # log for salinity, 0 to 36
    levels[0] = 0
    # levels = (37-np.exp(np.linspace(0,np.log(36.), 10)))[::-1]-1 # log for salinity, 0 to 35
    cmap = calc_cmap(cmo.haline, levels)
    # cmap = cmPong.salinity(cmo.haline, levels)
    # cmap = cmPong.salinity('YlGnBu_r', levels)
    ilevels = [0,1,2,3,4,5,8] # which levels to label
    ticks = [int(tick) for tick in levels[ilevels]] # plot ticks