Example #1
0
def eddy_decomp(var, nt, lon1, lon2, taxis=0):
    """Decompose variable into mean and eddy fields."""

    lonname = atm.get_coord(var, 'lon', 'name')
    tstr = 'Time mean (%d-%s rolling)' % (nt, var.dims[taxis])
    lonstr = atm.latlon_labels([lon1, lon2], 'lon', deg_symbol=False)
    lonstr = 'zonal mean (' + '-'.join(lonstr) + ')'
    name, attrs, coords, dims = atm.meta(var)

    varbar = atm.rolling_mean(var, nt, axis=taxis, center=True)
    varbarzon = atm.subset(varbar, {lonname : (lon1, lon2)})
    varbarzon = varbarzon.mean(dim=lonname)
    varbarzon.attrs = attrs

    comp = xray.Dataset()
    comp[name + '_AVG'] = varbarzon
    comp[name + '_AVG'].attrs['component'] = tstr + ', ' + lonstr
    comp[name + '_ST'] = varbar - varbarzon
    comp[name + '_ST'].attrs = attrs
    comp[name + '_ST'].attrs['component'] = 'Stationary eddy'
    comp[name + '_TR'] = var - varbar
    comp[name + '_TR'].attrs = attrs
    comp[name + '_TR'].attrs['component'] = 'Transient eddy'

    return comp
Example #2
0
def composite(data, compdays, return_avg=True, daynm='Dayrel'):
    """Return composite data fields for selected days.

    Parameters
    ----------
    data : xray.DataArray
        Daily data to composite.
    compdays: dict of arrays or lists
        Lists of days to include in each composite.
    return_avg : bool, optional
        If True, return the mean of the selected days, otherwise
        return the extracted individual days for each composite.
    daynnm : str, optional
        Name of day dimension in data.

    Returns
    -------
    comp : dict of xray.DataArrays
        Composite data fields for each key in compdays.keys().
    """

    comp = collections.OrderedDict()
    _, attrs, _, _ = atm.meta(data)

    for key in compdays:
        comp[key] = atm.subset(data, {daynm : (compdays[key], None)})
        if return_avg:
            comp[key] = comp[key].mean(dim=daynm)
            comp[key].attrs = attrs
            comp[key].attrs[daynm] = compdays[key]

    return comp
Example #3
0
 def rolling(data, nroll):
     center = True
     _, _, coords, _ = atm.meta(data)
     dims = data.shape
     vals = np.zeros(dims)
     if len(dims) > 1:
         nyears = dims[0]
         for y in range(nyears):
             vals[y] = pd.rolling_mean(data.values[y], nroll, center=center)
     else:
         vals = pd.rolling_mean(data.values, nroll, center=center)
     data_out = xray.DataArray(vals, coords=coords)
     return data_out
Example #4
0
def daily_rel2onset(data, d_onset, npre, npost):
    """Return subset of daily data aligned relative to onset day.

    Parameters
    ----------
    data : xray.DataArray
        Daily data.
    d_onset : ndarray
        Array of onset date (day of year) for each year.
    npre, npost : int
        Number of days before and after onset to extract.

    Returns
    -------
    data_out : xray.DataArray
        Subset of N days of daily data for each year, where
        N = npre + npost + 1 and the day dimension is
        dayrel = day - d_onset.
    """

    name, attrs, coords, dimnames = atm.meta(data)
    yearnm = atm.get_coord(data, 'year', 'name')
    daynm = atm.get_coord(data, 'day', 'name')
    years = atm.makelist(atm.get_coord(data, 'year'))

    if isinstance(d_onset, xray.DataArray):
        d_onset = d_onset.values
    else:
        d_onset = atm.makelist(d_onset)

    relnm = daynm + 'rel'

    for y, year in enumerate(years):
        dmin, dmax = d_onset[y] - npre, d_onset[y] + npost
        subset_dict = {yearnm : (year, None), daynm : (dmin, dmax)}
        sub = atm.subset(data, subset_dict)
        sub = sub.rename({daynm : relnm})
        sub[relnm] = sub[relnm] - d_onset[y]
        sub[relnm].attrs['long_name'] = 'Day of year relative to onset day'
        if y == 0:
            data_out = sub
        else:
            data_out = xray.concat([data_out, sub], dim=yearnm)

    data_out.attrs['d_onset'] = d_onset

    return data_out
Example #5
0
 def applymask(data, mask):
     _, _, coords, _ = atm.meta(data)
     maskbig = atm.biggify(mask, data, tile=True)
     vals = np.ma.masked_array(data, maskbig).filled(np.nan)
     data_out = xray.DataArray(vals, coords=coords)
     return data_out
Example #6
0
def onset_HOWI(uq_int, vq_int, npts=50, nroll=7, days_pre=range(138, 145),
               days_post=range(159, 166), yearnm='year', daynm='day',
               maxbreak=7):
    """Return monsoon Hydrologic Onset/Withdrawal Index.

    Parameters
    ----------
    uq_int, vq_int : xray.DataArrays
        Vertically integrated moisture fluxes.
    npts : int, optional
        Number of points to use to define HOWI index.
    nroll : int, optional
        Number of days for rolling mean.
    days_pre, days_post : list of ints, optional
        Default values correspond to May 18-24 and June 8-14 (numbered
        as non-leap year).
    yearnm, daynm : str, optional
        Name of year and day dimensions in DataArray
    maxbreak:
        Maximum number of days with index <=0 to consider a break in
        monsoon season rather than end of monsoon season.

    Returns
    -------
    howi : xray.Dataset
        HOWI daily timeseries for each year and monsoon onset and retreat
        days for each year.

    Reference
    ---------
    J. Fasullo and P. J. Webster, 2003: A hydrological definition of
        Indian monsoon onset and withdrawal. J. Climate, 16, 3200-3211.

    Notes
    -----
    In some years the HOWI index can give a bogus onset or bogus retreat
    when the index briefly goes above or below 0 for a few days.  To deal
    with these cases, I'm defining the monsoon season as the longest set
    of consecutive days with HOWI that is positive or has been negative
    for no more than `maxbreak` number of days (monsoon break).
    """

    _, _, coords, _ = atm.meta(uq_int)
    latnm = atm.get_coord(uq_int, 'lat', 'name')
    lonnm = atm.get_coord(uq_int, 'lon', 'name')

    ds = xray.Dataset()
    ds['uq'] = uq_int
    ds['vq'] = vq_int
    ds['vimt'] = np.sqrt(ds['uq']**2 + ds['vq']**2)

    # Climatological moisture fluxes
    dsbar = ds.mean(dim=yearnm)
    ds['uq_bar'], ds['vq_bar'] = dsbar['uq'], dsbar['vq']
    ds['vimt_bar'] = np.sqrt(ds['uq_bar']**2 + ds['vq_bar']**2)

    # Pre- and post- monsoon climatology composites
    dspre = atm.subset(dsbar, {daynm : (days_pre, None)}).mean(dim=daynm)
    dspost = atm.subset(dsbar, {daynm : (days_post, None)}).mean(dim=daynm)
    dsdiff = dspost - dspre
    ds['uq_bar_pre'], ds['vq_bar_pre'] = dspre['uq'], dspre['vq']
    ds['uq_bar_post'], ds['vq_bar_post'] = dspost['uq'], dspost['vq']
    ds['uq_bar_diff'], ds['vq_bar_diff'] = dsdiff['uq'], dsdiff['vq']

    # Magnitude of vector difference
    vimt_bar_diff = np.sqrt(dsdiff['uq']**2 + dsdiff['vq']**2)
    ds['vimt_bar_diff'] = vimt_bar_diff

    # Top N difference vectors
    def top_n(data, n):
        """Return a mask with the highest n values in 2D array."""
        vals = data.copy()
        mask = np.ones(vals.shape, dtype=bool)
        for k in range(n):
            i, j = np.unravel_index(np.nanargmax(vals), vals.shape)
            mask[i, j] = False
            vals[i, j] = np.nan
        return mask

    # Mask to extract top N points
    mask = top_n(vimt_bar_diff, npts)
    ds['mask'] = xray.DataArray(mask, coords={latnm: coords[latnm],
                                              lonnm: coords[lonnm]})

    # Apply mask to DataArrays
    def applymask(data, mask):
        _, _, coords, _ = atm.meta(data)
        maskbig = atm.biggify(mask, data, tile=True)
        vals = np.ma.masked_array(data, maskbig).filled(np.nan)
        data_out = xray.DataArray(vals, coords=coords)
        return data_out

    ds['vimt_bar_masked'] = applymask(ds['vimt_bar'], mask)
    ds['vimt_bar_diff_masked'] = applymask(vimt_bar_diff, mask)
    ds['uq_masked'] = applymask(ds['uq'], mask)
    ds['vq_masked'] = applymask(ds['vq'], mask)
    ds['vimt_masked'] = np.sqrt(ds['uq_masked']**2 + ds['vq_masked']**2)

    # Timeseries data averaged over selected N points
    ds['howi_clim_raw'] = ds['vimt_bar_masked'].mean(dim=latnm).mean(dim=lonnm)
    ds['howi_raw'] = ds['vimt_masked'].mean(dim=latnm).mean(dim=lonnm)

    # Normalize
    howi_min = ds['howi_clim_raw'].min().values
    howi_max = ds['howi_clim_raw'].max().values
    def applynorm(data):
        return 2 * (data - howi_min) / (howi_max - howi_min) - 1
    ds['howi_norm'] = applynorm(ds['howi_raw'])
    ds['howi_clim_norm'] = applynorm(ds['howi_clim_raw'])

    # Apply n-day rolling mean
    def rolling(data, nroll):
        center = True
        _, _, coords, _ = atm.meta(data)
        dims = data.shape
        vals = np.zeros(dims)
        if len(dims) > 1:
            nyears = dims[0]
            for y in range(nyears):
                vals[y] = pd.rolling_mean(data.values[y], nroll, center=center)
        else:
            vals = pd.rolling_mean(data.values, nroll, center=center)
        data_out = xray.DataArray(vals, coords=coords)
        return data_out

    ds['howi_norm_roll'] = rolling(ds['howi_norm'], nroll)
    ds['howi_clim_norm_roll'] = rolling(ds['howi_clim_norm'], nroll)

    # Index timeseries dataset
    howi = xray.Dataset()
    howi['tseries'] = ds['howi_norm_roll']
    howi['tseries_clim'] = ds['howi_clim_norm_roll']

    # Find zero crossings for onset and withdrawal indices
    nyears = len(howi[yearnm])
    onset = np.zeros(nyears, dtype=int)
    retreat = np.zeros(nyears, dtype=int)
    for y in range(nyears):
        # List of days with positive HOWI index
        pos = howi[daynm].values[howi['tseries'][y].values > 0]

        # In case of extra zero crossings, find the longest set of days
        # with positive index
        splitpos = atm.splitdays(pos)
        lengths = np.array([len(v) for v in splitpos])
        imonsoon = lengths.argmax()
        monsoon = splitpos[imonsoon]

        # In case there is a break in the monsoon season, check the
        # sets of days before and after and add to monsoon season
        # if applicable
        if imonsoon > 0:
            predays = splitpos[imonsoon - 1]
            if monsoon.min() - predays.max() <= maxbreak:
                predays = np.arange(predays.min(), monsoon.min())
                monsoon = np.concatenate([predays, monsoon])
        if imonsoon < len(splitpos) - 1:
            postdays = splitpos[imonsoon + 1]
            if postdays.min() - monsoon.max() <= maxbreak:
                postdays = np.arange(monsoon.max() + 1, postdays.max() + 1)
                monsoon = np.concatenate([monsoon, postdays])

        # Onset and retreat days
        onset[y] = monsoon[0]
        retreat[y] = monsoon[-1] + 1

    howi['onset'] = xray.DataArray(onset, coords={yearnm : howi[yearnm]})
    howi['retreat'] = xray.DataArray(retreat, coords={yearnm : howi[yearnm]})
    howi.attrs = {'npts' : npts, 'nroll' : nroll, 'maxbreak' : maxbreak,
                  'days_pre' : days_pre, 'days_post' : days_post}

    return howi, ds
Example #7
0
tseries[onset_nm] = index['tseries']

# ENSO
enso = utils.get_enso_indices(years)
enso = xray.DataArray(enso[enso_nm]).rename({'Year' : 'year'})

# ----------------------------------------------------------------------
# Climatology

index_clim = index.mean(dim='year')
tseries_clim = tseries.mean(dim='year')
enso_clim = enso.mean(dim='year').values

# Tile the climatology to each year for plot_tseries_together
vals = atm.biggify(tseries_clim[onset_nm], index['tseries'].values, tile=True)
_, _, coords, dims = atm.meta(index['tseries'])
ts_clim = xray.DataArray(vals, name=tseries_clim[onset_nm].name, coords=coords,
                         dims=dims)
tseries[onset_nm + '_clim'] = ts_clim

# ----------------------------------------------------------------------
# Timeseries relative to onset, shifted to 0 at onset day

npre, npost = 0, 200
tseries_rel = xray.Dataset()
for key in tseries.data_vars:
    tseries_rel[key] = daily_rel2onset(tseries[key], onset, npre, npost,
                                       yearnm='year', daynm='day')
    if key.startswith('CHP') or key.endswith('ACC'):
        tseries_rel[key] = tseries_rel[key] - tseries_rel[key][:, 0]
Example #8
0
# See testing/testing-indices-onset_TT.py for details.

# Select vertical pressure level to use, or None to use 200-600mb
# vertical mean
plev = None

# Read daily data from each year
if plev is None:
    T = atm.combine_daily_years('Tbar', ttfiles, years, yearname='year')
else:
    T = atm.combine_daily_years('T', ttfiles, years, yearname='year',
                                subset_dict={'plev' : (plev, plev)})
    # Remove extra dimension (vertical)
    pdim = atm.get_coord(T, 'plev', 'dim')
    pname = atm.get_coord(T, 'plev', 'name')
    name, attrs, coords, dims = atm.meta(T)
    dims = list(dims)
    dims.pop(pdim)
    coords = atm.odict_delete(coords, pname)
    T = xray.DataArray(np.squeeze(T.values), dims=dims, coords=coords,
                       name=name, attrs=attrs)

# Calculate index
north=(5, 30, 40, 100)
south=(-15, 5, 40, 100)
index['TT'] = indices.onset_TT(T, north=north, south=south)

# Some weirdness going on in 1991, for now just set to NaN
for nm in ['ttn', 'tts', 'tseries']:
    vals = index['TT'][nm].values
    vals = np.ma.masked_array(vals, abs(vals) > 1e30).filled(np.nan)
Example #9
0
def calc_fluxes(year, month,
                var_ids=['u', 'q', 'T', 'theta', 'theta_e', 'hgt'],
                concat_dim='TIME', scratchdir=None, keepscratch=False,
                verbose=True):
    """Return the monthly mean of MERRA daily fluxes.

    Reads MERRA daily data from OpenDAP urls, computes fluxes, and
    returns the monthly mean of the daily variable and its zonal and
    meridional fluxes.

    Parameters
    ----------
    year, month : int
        Numeric year and month (1-12).
    var_ids : list of str, optional
        IDs of variables to include.
    concat_dim : str, optional
        Name of dimension for concatenation.
    scratchdir : str, optional
        Directory path to store temporary files while processing data.
        If omitted, the current working directory is used.
    keepscratch : bool, optional
        If True, scratch files are kept in scratchdir. Otherwise they
        are deleted.
    verbose : bool, optional
        If True, print updates while processing files.

    Returns
    -------
    data : xray.Dataset
        Mean of daily data and the mean of the daily zonal fluxes
        (u * var) and meridional fluxes (v * var), for each variable
        in var_ids.
    """

    nms = [get_varname(nm) for nm in atm.makelist(var_ids)]
    u_nm, v_nm = get_varname('u'), get_varname('v')
    nms.extend([u_nm, v_nm])
    if 'theta' in nms:
        nms.append(get_varname('T'))
    if 'theta_e' in nms:
        nms.extend([get_varname('T'), get_varname('q')])
    nms = set(nms)

    days = range(1, atm.days_this_month(year, month) + 1)

    def scratchfile(nm, k, year, month, day):
        filestr = '%s_level%d_%d%02d%02d.nc' % (nm, k, year, month, day)
        if scratchdir is not None:
            filestr = scratchdir + '/' + filestr
        return filestr

    # Read metadata from one file to get pressure-level array
    dataset = 'p_daily'
    url = url_list(dataset, return_dict=False)[0]
    with xray.open_dataset(url) as ds:
        pname = atm.get_coord(ds, 'plev', 'name')
        plev = atm.get_coord(ds, 'plev')
        # Pressure levels in Pa for theta/theta_e calcs
        p_units = atm.pres_units(ds[pname].units)
        pres = atm.pres_convert(plev, p_units, 'Pa')

    # Get daily data (raw and calculate extended variables)
    def get_data(nms, pres, year, month, day, concat_dim, subset_dict, verbose):
        # Lists of raw and extended variables
        ids = list(nms)
        ext = []
        for var in ['theta', 'theta_e']:
            if var in ids:
                ext.append(var)
                ids.remove(var)

        # Read raw data and calculate extended variables
        data = read_daily(ids, year, month, day, concat_dim=concat_dim,
                          subset_dict=subset_dict, verbose=verbose)
        if 'theta' in ext:
            print_if('Computing potential temperature', verbose)
            T = data[get_varname('T')]
            data['theta'] = atm.potential_temp(T, pres)
        if 'theta_e' in ext:
            print_if('Computing equivalent potential temperature', verbose)
            T = data[get_varname('T')]
            q = data[get_varname('q')]
            data['theta_e'] = atm.equiv_potential_temp(T, pres, q)

        return data

    # Iterate over vertical levels
    for k, p in enumerate(plev):
        subset_dict = {pname : (p, p)}
        print_if('Pressure-level %.1f' % p, verbose)

        files = []

        for day in days:
            # Read data for this level and day
            ds = get_data(nms, pres[k], year, month, day, concat_dim,
                           subset_dict, verbose)

            # Compute fluxes
            print_if('Computing fluxes', verbose)
            u = ds[get_varname('u')]
            v = ds[get_varname('v')]
            for nm in var_ids:
                var = ds[get_varname(nm)]
                varname, attrs, _, _ = atm.meta(var)
                u_var = u * var
                v_var = v * var

                u_var.name = get_varname(u_nm) + '*' +  var.name
                units = var.attrs['units'] + ' * ' + u.attrs['units']
                u_var.attrs['units'] = units
                v_var.name = get_varname(v_nm) + '*' +  var.name
                v_var.attrs['units'] = units
                ds[u_var.name] = u_var
                ds[v_var.name] = v_var

            # Save to temporary scratch file
            filenm = scratchfile('fluxes', k, year, month, day)
            files.append(filenm)
            print_if('Saving to scratch file ' + filenm, verbose)
            ds.to_netcdf(filenm)

        # Concatenate daily scratch files
        ds = atm.load_concat(files)

        if not keepscratch:
            for f in files:
                os.remove(f)

        # Compute monthly means
        print_if('Computing monthly means', verbose)
        if k == 0:
            data = ds.mean(dim=concat_dim)
        else:
            data = xray.concat([data, ds.mean(dim=concat_dim)], dim=pname)

    for var in data.data_vars:
        data[var].attrs = ds[var].attrs

    return data
# ----------------------------------------------------------------------
# Data and calcs

# Onset index
with xray.open_dataset(indfile) as index:
    index.load()
index = index.sel(year=years)
d0 = index[ind_nm].values

# Precip data
if pcp_nm == 'cmap':
    pcp = precipdat.read_cmap(pcpfiles, yearmin=min(years), yearmax=max(years))

    # Interpolate to daily resolution
    name, attrs, coords, dimnames = atm.meta(pcp)
    days = np.arange(3, 364)
    interp_func = scipy.interpolate.interp1d(pcp['day'], pcp, axis=1)
    vals = interp_func(days)
    coords['day'] = xray.DataArray(days, coords={'day' : days})
    pcp = xray.DataArray(vals, dims=dimnames, coords=coords, name=name,
                         attrs=attrs)
else:
    pcp = atm.combine_daily_years(None, pcpfiles, years, yearname='year',
                                  subset_dict=subset_dict)


# Wrap from following year to get extended daily range
daymin = min(d0) - npre
daymax = max(d0) + npost
pcp = utils.wrapyear_all(pcp, daymin=daymin, daymax=daymax)