Beispiel #1
0
def wrapyear_all(data, daymin, daymax):
    """Wrap daily data to extended ranges over each year in yearly data."""

    def extract_year(data, year, years):
        if year in years:
            data_out = atm.subset(data, {'year' : (year, year)})
        else:
            data_out = None
        return data_out

    daynm = atm.get_coord(data, 'day', 'name')
    days = np.arange(daymin, daymax + 1)
    days = xray.DataArray(days, name=daynm, coords={daynm : days})
    years = atm.get_coord(data, 'year')
    yearnm = atm.get_coord(data, 'year', 'name')
    for y, year in enumerate(years):
        year_prev, year_next = year - 1, year + 1
        var = extract_year(data, year, years)
        var_prev = extract_year(data, year_prev, years)
        var_next = extract_year(data, year_next, years)
        var_out = wrapyear(var, var_prev, var_next, daymin, daymax, year)
        var_out = atm.expand_dims(var_out, 'year', year, axis=0)
        var_out = var_out.reindex_like(days)
        if y == 0:
            data_out = var_out
        else:
            data_out = xray.concat([data_out, var_out], dim=yearnm)

    return data_out
Beispiel #2
0
def concat_plevs(datadir, year, varnm, plevs, pdim, version):
    pname = 'Height'
    for i, plev in enumerate(plevs):
        filenm = datafile(datadir, varnm, plev, year, version)
        print('Reading ' + filenm)
        with xray.open_dataset(filenm) as ds:
            var_in = ds[varnm].load()
            var_in = atm.expand_dims(var_in, pname, plev, axis=1)
        if i == 0:
            var = var_in
        else:
            var = xray.concat([var, var_in], dim=pname)
    return var
Beispiel #3
0
def get_data_rel(varid, plev, years, datafiles, data, onset, npre, npost):
    """Return daily data aligned relative to onset/withdrawal day.
    """

    years = atm.makelist(years)
    onset = atm.makelist(onset)
    datafiles = atm.makelist(datafiles)

    daymin = min(onset) - npre
    daymax = max(onset) + npost

    # For a single year, add extra year before/after, if necessary
    wrap_single = False
    years_in = years
    if len(years) == 1 and var_type(varid) == 'basic':
        filenm = datafiles[0]
        year = years[0]
        if daymin < 1:
            wrap_single = True
            file_pre = filenm.replace(str(year), str(year - 1))
            if os.path.isfile(file_pre):
                years_in = [year - 1] + years_in
                datafiles = [file_pre] + datafiles
        if daymax > len(atm.season_days('ANN', year)):
            wrap_single = True
            file_post = filenm.replace(str(year), str(year + 1))
            if os.path.isfile(file_post):
                years_in = years_in + [year + 1]
                datafiles = datafiles + [file_post]

    var = get_daily_data(varid, plev, years_in, datafiles, data, daymin=daymin,
                         daymax=daymax)

    # Get rid of extra years
    if wrap_single:
        var = atm.subset(var, {'year' : (years[0], years[0])})

    # Make sure year dimension is included for single year
    if len(years) == 1 and 'year' not in var.dims:
        var = atm.expand_dims(var, 'year', years[0], axis=0)

    # Align relative to onset day
    # (not needed for calc variables since they're already aligned)
    if var_type(varid) == 'basic':
        print('Aligning data relative to onset day')
        var = daily_rel2onset(var, onset, npre, npost)

    return var
    with xray.open_dataset(datafile) as ds:
        data = utils.wrapyear(ds, ds_pre, ds_post, daymin, daymax, year=year)
        data.attrs = ds.attrs
    return data

for plev in plevs:
    for y, year in enumerate(years):
        datafile = datafiles[plev][y]
        d_onset, d_retreat = onset[y], retreat[y]
        d0 = int(index[ind_nm][y].values)

        ds_rel = xray.Dataset()
        ds = get_data(datafile, year, d0, npre, npost)
        ds_rel.attrs = ds.attrs
        for nm in ds.data_vars:
            var = atm.expand_dims(ds[nm], 'year', year)
            ds_rel[nm] = utils.daily_rel2onset(var, d_onset, npre, npost)
        ds_rel.attrs['d_onset'] = d_onset
        ds_rel.attrs['d_retreat'] = d_retreat
        savefile = savefiles[plev][y]
        print('Saving to ' + savefile)
        ds_rel.to_netcdf(savefile)


# ----------------------------------------------------------------------
# Compute climatologies and save

yearstr = '%d-%d' % (years.min(), years.max())
for plev in plevs:
    relfiles = savefiles[plev]
    savefile = savestr % plev + '_' + yearstr + '.nc'
Beispiel #5
0
def onset_changepoint_merged(precip_acc, order=1, yearnm='year',
                             daynm='day'):
    """Return monsoon onset/retreat based on changepoint in precip.

    Uses piecewise least-squares fit of data to detect changepoints.
    I've modified the original method by doing a 3-piecewise fit
    of the entire year, rather than a 2-piecewise fit of an onset
    range and retreat range.

    Parameters
    ----------
    precip_acc : xray.DataArray
        Accumulated precipitation.
    order : int, optional
        Order of polynomial to fit.
    yearnm, daynm : str, optional
        Name of year and day dimensions in precip_acc.

    Returns
    -------
    chp : xray.Dataset
        Onset/retreat days, daily timeseries, piecewise polynomial
        fits, and rss values.

    Reference
    ---------
    Cook, B. I., & Buckley, B. M. (2009). Objective determination of
    monsoon season onset, withdrawal, and length. Journal of Geophysical
    Research, 114(D23), D23109. doi:10.1029/2009JD012795
    """

    def split(x, n1, n2):
        return x[:n1], x[n1:n2], x[n2:]

    def piecewise_polyfit(x, y, n1, n2, order):
        y = np.ma.masked_array(y, np.isnan(y))
        x1, x2, x3 = split(x, n1, n2)
        y1, y2, y3 = split(y, n1, n2)
        p1 = np.ma.polyfit(x1, y1, order)
        p2 = np.ma.polyfit(x2, y2, order)
        p3 = np.ma.polyfit(x3, y3, order)
        if np.isnan(p1).any() or np.isnan(p2).any() or np.isnan(p3).any():
            raise ValueError('NaN for polyfit coeffs. Check data.')
        ypred1 = np.polyval(p1, x1)
        ypred2 = np.polyval(p2, x2)
        ypred3 = np.polyval(p3, x3)
        ypred = np.concatenate([ypred1, ypred2, ypred3])
        rss = np.sum((y - ypred)**2)
        return ypred, rss

    def find_changepoint(x, y, order):
        rss = {}
        for n1 in range(2, len(x)- 4):
            print(n1)
            for n2 in range(n1 + 2, len(x) - 2):
                _, rssval = piecewise_polyfit(x, y, n1, n2, order)
                rss[(n1, n2)] = rssval
        keys = rss.keys()
        rssvec = np.nan * np.ones(len(keys))
        for i, key in enumerate(keys):
            rssvec[i] = rss[key]
        i0 = np.nanargmin(rssvec)
        n1, n2 = keys[i0]
        x1, x2 = x[n1], x[n2]
        ypred, _ = piecewise_polyfit(x, y, n1, n2, order)
        return x1, x2, ypred, rss

    if yearnm not in precip_acc.dims:
        precip_acc = atm.expand_dims(precip_acc, yearnm, -1, axis=0)
    years = precip_acc[yearnm].values
    days = precip_acc[daynm].values
    chp = xray.Dataset()
    chp['tseries'] = precip_acc

    onset = np.nan * np.ones(years.shape)
    retreat = np.nan * np.ones(years.shape)
    pred = np.nan * np.ones(precip_acc.shape)
    for y, year in enumerate(years):
        print (year)
        results = find_changepoint(days, precip_acc[y], order)
        onset[y], retreat[y], pred[y,:], _ = results
    chp['onset'] = xray.DataArray(onset, dims=[yearnm], coords={yearnm : years})
    chp['retreat'] = xray.DataArray(retreat, dims=[yearnm], coords={yearnm : years})
    chp['tseries_fit'] = xray.DataArray(pred, dims=[yearnm, daynm],
                                        coords={yearnm : years, daynm : days})
    chp.attrs['order'] = order

    return chp
Beispiel #6
0
def onset_changepoint(precip_acc, onset_range=(1, 250),
                      retreat_range=(201, 366), order=1, yearnm='year',
                      daynm='day'):
    """Return monsoon onset/retreat based on changepoint in precip.

    Uses piecewise least-squares fit of data to detect changepoints.

    Parameters
    ----------
    precip_acc : xray.DataArray
        Accumulated precipitation.
    onset_range, retreat_range : 2-tuple of ints, optional
        Range of days to use when calculating onset / retreat.
    order : int, optional
        Order of polynomial to fit.
    yearnm, daynm : str, optional
        Name of year and day dimensions in precip_acc.

    Returns
    -------
    chp : xray.Dataset
        Onset/retreat days, daily timeseries, piecewise polynomial
        fits, and rss values.

    Reference
    ---------
    Cook, B. I., & Buckley, B. M. (2009). Objective determination of
    monsoon season onset, withdrawal, and length. Journal of Geophysical
    Research, 114(D23), D23109. doi:10.1029/2009JD012795
    """

    def split(x, n):
        return x[:n], x[n:]

    def piecewise_polyfit(x, y, n, order=1):
        y = np.ma.masked_array(y, np.isnan(y))
        x1, x2 = split(x, n)
        y1, y2 = split(y, n)
        p1 = np.ma.polyfit(x1, y1, order)
        p2 = np.ma.polyfit(x2, y2, order)
        if np.isnan(p1).any() or np.isnan(p2).any():
            raise ValueError('NaN for polyfit coeffs. Check data.')
        ypred1 = np.polyval(p1, x1)
        ypred2 = np.polyval(p2, x2)
        ypred = np.concatenate([ypred1, ypred2])
        rss = np.sum((y - ypred)**2)
        return ypred, rss

    def find_changepoint(x, y, order=1):
        rss = np.nan * x
        for n in range(2, len(x)- 2):
            _, rssval = piecewise_polyfit(x, y, n, order)
            rss[n] = rssval
        n0 = np.nanargmin(rss)
        x0 = x[n0]
        ypred, _ = piecewise_polyfit(x, y, n0)
        return x0, ypred, rss

    if yearnm not in precip_acc.dims:
        precip_acc = atm.expand_dims(precip_acc, yearnm, -1, axis=0)
    years = precip_acc[yearnm].values
    chp = xray.Dataset()
    chp['tseries'] = precip_acc

    for key, drange in zip(['onset', 'retreat'], [onset_range, retreat_range]):
        print('Calculating ' + key)
        print(drange)
        dmin, dmax = drange
        precip_sub = atm.subset(precip_acc, {daynm : (dmin, dmax)})
        dsub = precip_sub[daynm].values

        d_cp = np.nan * np.ones(years.shape)
        pred = np.nan * np.ones(precip_sub.shape)
        rss = np.nan * np.ones(precip_sub.shape)
        for y, year in enumerate(years):
            # Cut out any NaNs from day range
            pcp_yr = precip_sub[y]
            ind = np.where(np.isfinite(pcp_yr))[0]
            islice = slice(ind.min(), ind.max() + 1)
            pcp_yr = pcp_yr[islice]
            days_yr = pcp_yr[daynm].values
            print('%d (%d-%d)' % (year, min(days_yr), max(days_yr)))
            results = find_changepoint(days_yr, pcp_yr, order)
            d_cp[y], pred[y, islice], rss[y, islice] = results
        chp[key] = xray.DataArray(d_cp, dims=[yearnm], coords={yearnm : years})
        chp['tseries_fit_' + key] = xray.DataArray(
            pred, dims=[yearnm, daynm], coords={yearnm : years, daynm : dsub})
        chp['rss_' + key] = xray.DataArray(
            rss, dims=[yearnm, daynm], coords={yearnm : years, daynm : dsub})

    chp.attrs['order'] = order
    chp.attrs['onset_range'] = onset_range
    chp.attrs['retreat_range'] = retreat_range

    return chp
Beispiel #7
0
def get_daily_data(varid, plev, years, datafiles, data, daymin=1,
                   daymax=366, yearnm='year'):
    """Return daily data (basic variable or calculated variable).

    Data is read from datafiles if varnm is a basic variable.
    If varnm is a calculated variable (e.g. potential temperature),
    the base variables for calculation are provided in the dict data.
    """

    years = atm.makelist(years)
    datafiles = atm.makelist(datafiles)

    if isinstance(plev, int) or isinstance(plev, float):
        pres = atm.pres_convert(plev, 'hPa', 'Pa')
    elif plev == 'LML' and 'PS' in data:
        pres = data['PS']
    else:
        pres = None

    def get_var(data, varnm, plev=None):
        if plev is None:
            plev = ''
        elif plev == 'LML' and varnm == 'QV':
            varnm = 'Q'
        return data[varnm + str(plev)]

    if var_type(varid) == 'calc':
        print('Computing ' + varid)
        if varid == 'THETA':
            var = atm.potential_temp(get_var(data, 'T', plev), pres)
        elif varid == 'THETA_E':
            var = atm.equiv_potential_temp(get_var(data, 'T', plev), pres,
                                           get_var(data, 'QV', plev))
        elif varid == 'DSE':
            var = atm.dry_static_energy(get_var(data, 'T', plev),
                                        get_var(data, 'H', plev))
        elif varid == 'MSE':
            var = atm.moist_static_energy(get_var(data, 'T', plev),
                                          get_var(data, 'H', plev),
                                          get_var(data, 'QV', plev))
        elif varid == 'VFLXMSE':
            Lv = atm.constants.Lv.values
            var = data['VFLXCPT'] + data['VFLXPHI'] + data['VFLXQV'] * Lv
            var.attrs['units'] = data['VFLXCPT'].attrs['units']
            var.attrs['long_name'] = 'Vertically integrated MSE meridional flux'
    else:
        with xray.open_dataset(datafiles[0]) as ds:
            if varid not in ds.data_vars:
                varid = varid + str(plev)
        var = atm.combine_daily_years(varid, datafiles, years, yearname=yearnm,
                                      subset_dict={'day' : (daymin, daymax)})
        var = atm.squeeze(var)

        # Make sure year dimension is included for single year
        if len(years) == 1 and 'year' not in var.dims:
            var = atm.expand_dims(var, yearnm, years[0], axis=0)

        # Wrap years for extended day ranges
        if daymin < 1 or daymax > 366:
            var = wrapyear_all(var, daymin, daymax)

    # Convert precip and evap to mm/day
    if varid in ['precip', 'PRECTOT', 'EVAP']:
        var = atm.precip_convert(var, var.attrs['units'], 'mm/day')

    return var