Exemple #1
0
def fill_data(da):
    valid = np.isfinite(da.values)
    valid_times = da.time[valid]
    dt = np.median(np.diff(da.time))
    dt_gap = np.diff(da.time[valid]).max()
    if dt_gap > dt:
        log.warning("%s: gaps up to %.1f minutes" %
                    (da.name, dt_gap / np.timedelta64(60, 's')))
    da.values = utils.fill_invalid(da.values)
Exemple #2
0
 def lp(x):
     x = utils.fill_invalid(x)
     dn = utils.to_dnum(t)
     # cutoff for low pass filtering, must be 2 * cutoff days after start or before end of datenums
     cutoff = 36 / 24.
     x_lp = filters.lowpass(x, dn, cutoff=cutoff)
     mask = (dn < dn[0] + 2 * cutoff) | (dn > dn[-1] - 2 * cutoff)
     x_lp[mask] = np.nan
     return x_lp
def samples_from_usgs_erddap(run_start, field='salinity'):
    """
    DEPRECATED.  Better to use the dynamically cached data from
    samples_from_usgs().

    --
    Pull some USGS transect data and return a set of 2D salinity
    samples appropriate for the given date.
    Caveats: This is currently using ERDDAP behind the scenes, which
    does not have the most recent data.  It will use a prior year
    if the date cannot be matched within 30 days.

    field: name of the field to pull from usgs_crusies.
       'salinity','temperature'
    """
    # This copy of the USGS data ends early:
    usgs_data_end = np.datetime64('2016-04-28')
    usgs_pad = np.timedelta64(30, 'D')

    usgs_target = run_start

    # so we may have to grab a previous years cruise and pretend
    while usgs_target + usgs_pad > usgs_data_end:
        usgs_target -= np.timedelta64(365, 'D')

    usgs_cruises = usgs_sfbay.cruise_dataset(usgs_target - usgs_pad,
                                             usgs_target + usgs_pad)

    # lame filling
    scal3d = usgs_cruises[field]
    scal2d = scal3d.mean(dim='prof_sample')
    assert scal2d.dims[0] == 'date'
    scal2d_fill = utils.fill_invalid(scal2d.values, axis=0)

    scal_f = interp1d(utils.to_dnum(scal2d.date.values),
                      scal2d_fill,
                      axis=0,
                      bounds_error=False)(utils.to_dnum(usgs_target))

    usgs_init_scal = np.c_[scal2d.x.values, scal2d.y.values, scal_f]
    return usgs_init_scal
Exemple #4
0
def load_cimis(start_date,end_date):
    union_city=cimis.cimis_fetch_to_xr(stations=171,
                                       start_date=start_date,
                                       end_date=end_date,
                                       cache_dir=common.cache_dir)
                                       
    # union_city=xr.open_dataset('/opt/data/cimis/union_city-hourly-2001-2016.nc')

    # https://cals.arizona.edu/azmet/et1.htm
    # which says cool period, divide ETO by 0.7 to get pan evaporation,
    # warm period, divide by 0.6.

    temps=utils.fill_invalid(union_city.HlyAirTmp.values)
    temp_zscore=((temps-temps.mean()) / temps.std()).clip(-1,1)
    # score of 1 means warm temperature
    factors=np.interp(temp_zscore,
                      [-1,1],[0.7,0.6])
    union_city['HlyEvap']=union_city.HlyEto/factors

    union_city.time.values += np.timedelta64(8,'h')
    return union_city
Exemple #5
0
def load_cimis(start_date, end_date):

    # undo this change rusty made because don't want to deal with putting cimis key in local environment
    # but also change the name to not limit to 2001-2016 and check that dates are included ([email protected])
    #union_city=cimis.cimis_fetch_to_xr(stations=171,
    #                                   start_date=start_date,
    #                                   end_date=end_date,
    #                                   cache_dir=common.cache_dir)

    # union_city=xr.open_dataset('/opt/data/cimis/union_city-hourly-2001-2016.nc')
    union_city = xr.open_dataset('/opt/data/cimis/union_city-hourly.nc')

    # add a check that the date range is ok
    uc_start = (np.datetime64(union_city.Date.values[0][0:10]) +
                np.timedelta64(union_city.Date.values[0][11:13], 'h') +
                np.timedelta64(union_city.Date.values[0][13:], 'm'))
    uc_end = (np.datetime64(union_city.Date.values[-1][0:10]) +
              np.timedelta64(union_city.Date.values[-1][11:13], 'h') +
              np.timedelta64(union_city.Date.values[-1][13:], 'm'))
    if uc_start > start_date:
        raise Exception(
            '/opt/data/cimis/union_city-hourly.nc start date is after simulation start date'
        )
    if uc_end < end_date:
        raise Exception(
            '/opt/data/cimis/union_city-hourly.nc end date is before simulation end date'
        )

    # https://cals.arizona.edu/azmet/et1.htm
    # which says cool period, divide ETO by 0.7 to get pan evaporation,
    # warm period, divide by 0.6.

    temps = utils.fill_invalid(union_city.HlyAirTmp.values)
    temp_zscore = ((temps - temps.mean()) / temps.std()).clip(-1, 1)
    # score of 1 means warm temperature
    factors = np.interp(temp_zscore, [-1, 1], [0.7, 0.6])
    union_city['HlyEvap'] = union_city.HlyEto / factors

    union_city.time.values += np.timedelta64(8, 'h')
    return union_city
def build_data(ping_xyt_template, ds_strong, xyt0, rx_xy):
    matrix = ds_strong.matrix.values
    temps = ds_strong.temp.values

    tx_beacon = np.zeros(matrix.shape[0], np.int32)
    for p, tag in enumerate(ds_strong.tag.values):
        tx_beacon[p] = 1 + np.nonzero(ds_strong.rx_beacon.values == tag)[0][0]

    rx_c = seawater.svel(0, ds_strong['temp'], 0) / time_scale
    # occasional missing data...
    rx_c = utils.fill_invalid(rx_c, axis=1)

    data = dict(
        Np=matrix.shape[0],
        Nb=matrix.shape[1],
        rx_t=(matrix - xyt0[2]) * time_scale,
        rx_c=rx_c,
        rx_x=rx_xy[:, 0] - xyt0[0],
        rx_y=rx_xy[:, 1] - xyt0[1],
        tx_x=ping_xyt_template[:, 0] - xyt0[0],
        tx_y=ping_xyt_template[:, 1] - xyt0[1],
        # tx_t=ping_xyt_template[:,2]-xyt0[2],
        tx_beacon=tx_beacon,
        sigma_t=0.0005 * time_scale,  # 0.5ms timing precision
    )

    # explicitly pass in the indexing for matrix to linear
    Nb = data['Nb']
    dist_k = np.zeros((Nb, Nb), np.int32)
    for ai in range(Nb):
        for bi in range(ai + 1, Nb):
            a = ai + 1
            b = bi + 1
            k = (Nb * (Nb - 1) / 2) - (Nb - a + 1) * (Nb - a) / 2 + b - a
            dist_k[ai, bi] = dist_k[bi, ai] = k
    data['dist_k'] = dist_k
    data['Ndist'] = (Nb * (Nb - 1)) // 2
    return data
Exemple #7
0
missing=np.nan*np.ones(ntimes)

for traj_i in utils.progress(range(len(trimmed_with_hits))):
    traj,hits,T = trimmed_with_hits[traj_i]
    rec=dict(traj=traj,
             hits=hits,
             T=T)
    # Additional data:

    # some trajectories have no hits,
    # some with only a single hit or cluster of hits
    measured=np.nonzero(hits>=0)[0]

    # This will remain all nan if there is no valid data in T
    rec['Tfill']=utils.fill_invalid(T)
    if len(measured)==0:
        # Save on memory when there is no data.
        rec['fill_dist']=missing[:len(T)]
        rec['dTdt']=missing[:len(T)]
    else:
        # trajectory time in seconds
        t_s=(traj['t']-traj['t'][0])/np.timedelta64(1,'s')
        # just the times that were observed
        t_measured=t_s[measured]
        # second between each point and the nearest observation
        fill_dist=t_s - utils.nearest_val(t_measured,t_s)
        # that's signed seconds offset from the nearest in time measurement
        rec['fill_dist']=fill_dist

        if len(measured)>1:
Exemple #8
0
# Final formulae:
# u_lagoon=0.307*u_haf - 0.517*v_haf
# v_lagoon=0.097*u_haf + 0.673*v_haf

##

# Generate a single csv with the predicted values:
ds_pred=xr.Dataset()
ds_pred['time']=('time',), ds_haf.time.values

u_coeff_u= 0.307
u_coeff_v=-0.517
v_coeff_u= 0.097
v_coeff_v= 0.673

ds_pred['u_wind']=('time',),utils.fill_invalid( (u_coeff_u*ds_haf['u10'] + u_coeff_v*ds_haf['v10']).values )
ds_pred['u_wind'].attrs['units']='m s-1'
ds_pred['u_wind'].attrs['desc']=f'extrapolated wind from ASOS HAF station, {u_coeff_u}*u10 + {u_coeff_v}*v10'

ds_pred['v_wind']=('time',),utils.fill_invalid( (v_coeff_u*ds_haf['u10'] + v_coeff_v*ds_haf['v10']).values )
ds_pred['v_wind'].attrs['units']='m s-1'
ds_pred['u_wind'].attrs['desc']=f'extrapolated wind from ASOS HAF station, {v_coeff_u}*u10 + {v_coeff_v}*v10'

ds_pred['T']=('time',), utils.fill_invalid(ds_haf['Ta'].values)
ds_pred['T'].attrs.update(ds_haf['Ta'].attrs)
ds_pred['rh']=('time',), utils.fill_invalid(ds_haf['rh'].values)
ds_pred['rh'].attrs.update(ds_haf['rh'].attrs)

##
ds_pred.to_netcdf('lagoon-met-updated.nc',mode='w')
Exemple #9
0
        # get the data into a monthly time series before trying to fit seasonal cycle
        valid = np.isfinite(fld_in.values)
        absmonth_mean=bin_mean(absmonth[valid],fld_in.values[valid])
        month_mean=bin_mean(month[valid],fld_in.values[valid])
        
        if np.sum(np.isfinite(month_mean)) < 12:
            print("Insufficient data for seasonal trends - will fill with sample mean")
            trend_and_season=np.nanmean(month_mean) * np.ones(len(dns))
            t_and_s_flag=FLAG_MEAN
        else:
            # fit long-term trend and a stationary seasonal cycle
            # this removes both the seasonal cycle and the long-term mean,
            # leaving just the trend
            trend_hf=fld_in.values - month_mean[month]
            lp = filters.lowpass_fir(trend_hf,lowpass_days,nan_weight_threshold=0.01)
            trend = utils.fill_invalid(lp)
            # recombine with the long-term mean and monthly trend 
            # to get the fill values.
            trend_and_season = trend + month_mean[month]
            t_and_s_flag=FLAG_SEASONAL_TREND

        # long gaps are mostly filled by trend and season
        gaps=mark_gaps(dns,valid,shortgap_days,include_ends=True) 
        fld_in.values[gaps] = trend_and_season[gaps]
        fld_flag.values[gaps] = t_and_s_flag

        still_missing=np.isnan(fld_in.values)
        fld_in.values[still_missing] = utils.fill_invalid(fld_in.values)[still_missing]
        fld_flag.values[still_missing] = FLAG_INTERP

        # Make sure all flows are nonnegative
def effective_clock_offset(rxs, ds_tot):
    ds = ds_tot.sel(rx=rxs)
    # weird. sometimes adds an extra array(..,dtype=object) layer
    ds['rx_beacon'] = ('rx', ), [
        ds_tot.rx_beacon.sel(rx=rx).item() for rx in rxs
    ]

    beacon_to_xy = dict([(ds.rx_beacon.values[i], (ds.rx_x.values[i],
                                                   ds.rx_y.values[i]))
                         for i in range(ds.dims['rx'])])

    # both beacons heard it
    is_multirx = (np.isfinite(ds.matrix).sum(axis=1) > 1).values
    # it came from one of them.
    is_selfaware = np.zeros_like(is_multirx)
    for i, tag in enumerate(ds.tag.values):
        if tag not in beacon_to_xy:
            continue
        else:
            # And did the rx see itself?
            local_rx = np.nonzero(ds.rx_beacon.values == tag)[0]
            if local_rx.size and np.isfinite(ds.matrix.values[i, local_rx[0]]):
                is_selfaware[i] = True

    sel_pings = is_multirx & is_selfaware

    ds_strong = ds.isel(index=sel_pings)

    matrix = ds_strong.matrix.values
    rx_xy = np.c_[ds_strong.rx_x.values, ds_strong.rx_y.values]

    temps = ds_strong.temp.values
    tx_beacon = np.zeros(matrix.shape[0], np.int32)
    for p, tag in enumerate(ds_strong.tag.values):
        tx_beacon[p] = np.nonzero(ds_strong.rx_beacon.values == tag)[0][0]

    rx_c = seawater.svel(0, ds_strong['temp'], 0) / time_scale
    # occasional missing data...
    rx_c = utils.fill_invalid(rx_c, axis=1)

    # do the calculation manually for two rxs:

    ab_mask = (tx_beacon == 0)
    ba_mask = (tx_beacon == 1)

    assert np.all(ab_mask | ba_mask)  # somebody has to hear it

    # how much 'later' b saw it than a
    t = matrix[:, 0]  # take rx 0 as the reference

    # without rx_c, transit time varies from 0.051970 to 0.051945
    # for a variation of 0.48ppt
    # with rx_c, transit time distance varies 0.07721 to 0.07710
    # for a variation of 1.4ppt, and it introduces some step changes.
    # so for this application it's better to have the smaller variation
    # that also changes smoothly, rather than correct back to a precise
    # distance
    deltas = (matrix[:, 1] - matrix[:, 0])  # * rx_c.mean(axis=1)

    # partial time series
    dt_ab = deltas[ab_mask]
    dt_ba = deltas[ba_mask]

    dt_ab_dense = np.interp(t, t[ab_mask], deltas[ab_mask])
    dt_ba_dense = np.interp(t, t[ba_mask], deltas[ba_mask])
    # proxy for the uncertainty.  Not scaled!
    dt_ab_dense_std = np.abs(t - utils.nearest_val(t[ab_mask], t))
    dt_ba_dense_std = np.abs(t - utils.nearest_val(t[ba_mask], t))

    dt_offset = 0.5 * (dt_ab_dense + dt_ba_dense
                       )  # sum of clock offset and travel asymmetry
    dt_transit = 0.5 * (dt_ab_dense - dt_ba_dense)  # transit time
    dt_std = dt_ab_dense_std + dt_ba_dense_std

    ds = xr.Dataset()
    ds['time'] = ('time', ), t
    ds['offset'] = ('time', ), dt_offset
    ds['transit'] = ('time', ), dt_transit
    ds['error'] = ('time', ), dt_std
    ds['c'] = ('time', ), rx_c.mean(axis=1)
    ds['rx'] = ('rx', ), rxs

    return ds
Exemple #11
0
def fill_and_flag(ds,fld,site,
                  lowpass_days=3*365,
                  shortgap_days=45 # okay to interpolate a little over a month?
              ):
    """
    Update a single field for a single site in ds, by
    extracting long-term trends, seasonal cycle, and
    interpolating between these and measured data
    """
    # first, create mapping from time index to absolute month
    dts=utils.to_datetime(dns)
    absmonth = [12*dt.year + (dt.month-1) for dt in dts]
    absmonth = np.array(absmonth) - dts[0].year*12
    month=absmonth%12

    fld_in=ds[fld].sel(site=site)
    orig_values=fld_in.values
    fld_flag=ds[fld+'_flag'].sel(site=site)

    prefilled=fld_flag.values & (FLAG_SEASONAL_TREND | FLAG_INTERP | FLAG_MEAN)        
    fld_in.values[prefilled]=np.nan # resets the work of this loop in case it's run multiple times
    n_valid=np.sum(~fld_in.isnull())        

    if n_valid==0:
        msg=" --SKIPPING--"
    else:
        msg=""
    print("   field: %s  %d/%d valid input points %s"%(fld,n_valid,len(fld_in),msg))

    if n_valid==0:
        return

    # get the data into a monthly time series before trying to fit seasonal cycle
    valid = np.isfinite(fld_in.values)
    absmonth_mean=bin_mean(absmonth[valid],fld_in.values[valid])
    month_mean=bin_mean(month[valid],fld_in.values[valid])

    if np.sum(np.isfinite(month_mean)) < 12:
        print("Insufficient data for seasonal trends - will fill with sample mean")
        trend_and_season=np.nanmean(month_mean) * np.ones(len(dns))
        t_and_s_flag=FLAG_MEAN
    else:
        # fit long-term trend and a stationary seasonal cycle
        # this removes both the seasonal cycle and the long-term mean,
        # leaving just the trend
        trend_hf=fld_in.values - month_mean[month]
        lp = filters.lowpass_fir(trend_hf,lowpass_days,nan_weight_threshold=0.01)
        trend = utils.fill_invalid(lp)
        # recombine with the long-term mean and monthly trend 
        # to get the fill values.
        trend_and_season = trend + month_mean[month]
        t_and_s_flag=FLAG_SEASONAL_TREND

    # long gaps are mostly filled by trend and season
    gaps=mark_gaps(dns,valid,shortgap_days,include_ends=True) 
    fld_in.values[gaps] = trend_and_season[gaps]
    fld_flag.values[gaps] = t_and_s_flag

    still_missing=np.isnan(fld_in.values)
    fld_in.values[still_missing] = utils.fill_invalid(fld_in.values)[still_missing]
    fld_flag.values[still_missing] = FLAG_INTERP

    # Make sure all flows are nonnegative
    negative=fld_in.values<0.0
    fld_in.values[negative]=0.0
    fld_flag.values[negative] |= FLAG_CLIPPED

    if 0: # illustrative(?) plots
        fig,ax=plt.subplots()
        ax.plot(dns,orig_values,'m-o',label='Measured %s'%fld)
        ax.plot(dns,fld_in,'k-',label='Final %s'%fld,zorder=5)
        # ax.plot(dns,month_mean[month],'r-',label='Monthly Clim.')
        # ax.plot(dns,trend_hf,'b-',label='Trend w/HF')
        ax.plot(dns,trend,'g-',lw=3,label='Trend')
        ax.plot(dns,trend_and_season,color='orange',label='Trend and season')
Exemple #12
0
##

lat = np.asarray(ds["latitude"])
lon = np.asarray(ds["longitude"])
salt = np.asarray(ds["salinity"])
temp = np.asarray(ds["temperature"])
depth = np.asarray(ds["depth"])
dist = np.asarray(ds["Distance_from_station_36"])
time = np.asarray(ds["time"])
# These times already come in as UTC. the date math below does everything in
# PST, so take the 8 hours back in
times = -8 * 3600 + (
    time - np.datetime64('1970-01-01T00:00:00')) / np.timedelta64(1, 's')

### fix nans in time record and fill in with valid times
tvalid = utils.fill_invalid(times, axis=-1)
tvalid = utils.fill_invalid(tvalid, axis=1)
tvalid = utils.fill_invalid(tvalid, axis=0)
times = tvalid

### Load in his files, define variables, adjust times to same reference time as USGS data

temp = True

his = nc.MFDataset(path + hisfile)
xcoor = his.variables["station_x_coordinate"][:]
ycoor = his.variables["station_y_coordinate"][:]

# get reference time from the mdu:
t_ref, t_start, t_stop = mdu.time_range()
dref = utils.to_unix(t_ref)  # dt.datetime.strptime(emdu['time','refdate']
Exemple #13
0
def fill_tidal_data(da,fill_time=True):
    """
    Extract tidal harmonics from an incomplete xarray DataArray, use
    those to fill in the gaps and return a complete DataArray.

    Uses all 37 of the standard NOAA harmonics, may not be stable
    with short time series.
    
    A 5-day lowpass is removed from the harmonic decomposition, and added
    back in afterwards.

    Assumes that the DataArray has a 'time' coordinate with datetime64 values.

    The time dimension must be dense enough to extract an exact time step
    
    If fill_time is True, holes in the time coordinate will be filled, too.
    """
    diffs=np.diff(da.time)
    dt=np.median(diffs)

    if fill_time:
        gaps=np.nonzero(diffs>1.5*dt)[0]
        pieces=[]
        last=0
        for gap_i in gaps:
            # gap_i=10 means that the 10th diff was too big
            # that means the jump from 10 to 11 was too big
            # the preceding piece should go through 9, so
            # exclusive of gap_i
            pieces.append(da.time.values[last:gap_i])
            pieces.append(np.arange( da.time.values[gap_i],
                                     da.time.values[gap_i+1],
                                     dt))
            last=gap_i+1
        pieces.append(da.time.values[last:])
        dense_times=np.concatenate(pieces)
        dense_values=np.nan*np.zeros(len(dense_times),np.float64)
        dense_values[ np.searchsorted(dense_times,da.time.values) ] = da.values
        da=xr.DataArray(dense_values,
                        dims=['time'],coords=[dense_times])
    else:
        pass 

    dnums=utils.to_dnum(da.time)
    data=da.values

    # lowpass at about 5 days, splitting out low/high components
    winsize=int( np.timedelta64(5,'D') / dt )
    data_lp=filters.lowpass_fir(data,winsize)
    data_hp=data - data_lp

    valid=np.isfinite(data_hp)
    omegas=harm_decomp.noaa_37_omegas() # as rad/sec

    harmonics=harm_decomp.decompose(dnums[valid]*86400,data_hp[valid],omegas)

    dense=harm_decomp.recompose(dnums*86400,harmonics,omegas)

    data_recon=utils.fill_invalid(data_lp) + dense

    data_filled=data.copy()
    missing=np.isnan(data_filled)
    data_filled[missing] = data_recon[missing]

    fda=xr.DataArray(data_filled,coords=[da.time],dims=['time'])
    return fda
Exemple #14
0
        tx_beacon[i]=np.nonzero(ds.rx_beacon.values==tag)[0][0]

    # And did the rx see itself?
    is_selfaware[i]=np.isfinite(ds.matrix.values[i,tx_beacon[i]])
ds['tx_beacon']=('index',),tx_beacon
is_beacon=tx_beacon>=0

temps=ds.temp.values

# WHOA -- comparisons to the beacon-beacon transits suggest
# that this has some significant error.  The inferred and
# calculated speeds of sound are close-ish if temperature is
# offset 4.5 degC.
rx_c=seawater.svel(0,ds['temp']-4.5,0) 
# occasional missing data...
rx_c=utils.fill_invalid(rx_c,axis=1)

ds['c']=('index','rx'),rx_c

## 
sel_pings=is_multirx&is_selfaware

ds_strong=ds.isel(index=sel_pings)


## 
# Calculate time deltas for each ordered pair of receivers
Nping=ds_strong.dims['index']
Nrx=ds_strong.dims['rx']
tx_beacon=ds_strong.tx_beacon.values
Exemple #15
0
A first cut at direct precipitation and evaporation.
"""
import xarray as xr
from stompy import utils

# Last SUNTANS run had used NARR
# it's way way coarse.  Seems better to use an in-Bay climatology
# than to use NARR.

##

union_city = xr.open_dataset('/opt/data/cimis/union_city-hourly-2001-2016.nc')

##

temps = utils.fill_invalid(union_city.HlyAirTmp.values)
temp_zscore = ((temps - temps.mean()) / temps.std()).clip(-1, 1)
# score of 1 means warm temperature
factors = np.interp(temp_zscore, [-1, 1], [0.7, 0.6])
union_city['HlyEvap'] = union_city.HlyEto / factors

##

narr_data_dir = "/opt/data/suntans/spinupdated/suntans-spinup/narr-data/data/"

precip = xr.open_dataset(os.path.join(narr_data_dir, "apcp.mon.mean.nc"),
                         decode_cf=False)
# bug in the nc file
if precip.apcp._FillValue != precip.apcp.missing_value:
    precip.apcp.attrs['_FillValue'] = precip.apcp.missing_value
precip = xr.decode_cf(precip)
Exemple #16
0
track['step']=np.cumsum(np.r_[0,dt_steps]).astype(np.int32)
track_exp=pd.DataFrame()
track_exp['step']=np.arange(expanded_idx.max()+1)
track2=track_exp.merge(right=track,on='step', how='left')

##


data=track2.loc[:,['x','y']]

data.loc[:,'x'] -= data.x.mean()
data.loc[:,'y'] -= data.y.mean()

# Make life a little simpler and just fill
data.loc[:,'x'] = utils.fill_invalid(data.loc[:,'x'].values)
data.loc[:,'y'] = utils.fill_invalid(data.loc[:,'y'].values)

##

# Sort of following https://www.statsmodels.org/stable/examples/notebooks/generated/tsa_arma_0.html

import statsmodels.api as sm
from statsmodels.graphics.api import qqplot
from statsmodels import tsa

data.plot(figsize=(12,8))

## 
dta=data['x']