def fill_data(da): valid = np.isfinite(da.values) valid_times = da.time[valid] dt = np.median(np.diff(da.time)) dt_gap = np.diff(da.time[valid]).max() if dt_gap > dt: log.warning("%s: gaps up to %.1f minutes" % (da.name, dt_gap / np.timedelta64(60, 's'))) da.values = utils.fill_invalid(da.values)
def lp(x): x = utils.fill_invalid(x) dn = utils.to_dnum(t) # cutoff for low pass filtering, must be 2 * cutoff days after start or before end of datenums cutoff = 36 / 24. x_lp = filters.lowpass(x, dn, cutoff=cutoff) mask = (dn < dn[0] + 2 * cutoff) | (dn > dn[-1] - 2 * cutoff) x_lp[mask] = np.nan return x_lp
def samples_from_usgs_erddap(run_start, field='salinity'): """ DEPRECATED. Better to use the dynamically cached data from samples_from_usgs(). -- Pull some USGS transect data and return a set of 2D salinity samples appropriate for the given date. Caveats: This is currently using ERDDAP behind the scenes, which does not have the most recent data. It will use a prior year if the date cannot be matched within 30 days. field: name of the field to pull from usgs_crusies. 'salinity','temperature' """ # This copy of the USGS data ends early: usgs_data_end = np.datetime64('2016-04-28') usgs_pad = np.timedelta64(30, 'D') usgs_target = run_start # so we may have to grab a previous years cruise and pretend while usgs_target + usgs_pad > usgs_data_end: usgs_target -= np.timedelta64(365, 'D') usgs_cruises = usgs_sfbay.cruise_dataset(usgs_target - usgs_pad, usgs_target + usgs_pad) # lame filling scal3d = usgs_cruises[field] scal2d = scal3d.mean(dim='prof_sample') assert scal2d.dims[0] == 'date' scal2d_fill = utils.fill_invalid(scal2d.values, axis=0) scal_f = interp1d(utils.to_dnum(scal2d.date.values), scal2d_fill, axis=0, bounds_error=False)(utils.to_dnum(usgs_target)) usgs_init_scal = np.c_[scal2d.x.values, scal2d.y.values, scal_f] return usgs_init_scal
def load_cimis(start_date,end_date): union_city=cimis.cimis_fetch_to_xr(stations=171, start_date=start_date, end_date=end_date, cache_dir=common.cache_dir) # union_city=xr.open_dataset('/opt/data/cimis/union_city-hourly-2001-2016.nc') # https://cals.arizona.edu/azmet/et1.htm # which says cool period, divide ETO by 0.7 to get pan evaporation, # warm period, divide by 0.6. temps=utils.fill_invalid(union_city.HlyAirTmp.values) temp_zscore=((temps-temps.mean()) / temps.std()).clip(-1,1) # score of 1 means warm temperature factors=np.interp(temp_zscore, [-1,1],[0.7,0.6]) union_city['HlyEvap']=union_city.HlyEto/factors union_city.time.values += np.timedelta64(8,'h') return union_city
def load_cimis(start_date, end_date): # undo this change rusty made because don't want to deal with putting cimis key in local environment # but also change the name to not limit to 2001-2016 and check that dates are included ([email protected]) #union_city=cimis.cimis_fetch_to_xr(stations=171, # start_date=start_date, # end_date=end_date, # cache_dir=common.cache_dir) # union_city=xr.open_dataset('/opt/data/cimis/union_city-hourly-2001-2016.nc') union_city = xr.open_dataset('/opt/data/cimis/union_city-hourly.nc') # add a check that the date range is ok uc_start = (np.datetime64(union_city.Date.values[0][0:10]) + np.timedelta64(union_city.Date.values[0][11:13], 'h') + np.timedelta64(union_city.Date.values[0][13:], 'm')) uc_end = (np.datetime64(union_city.Date.values[-1][0:10]) + np.timedelta64(union_city.Date.values[-1][11:13], 'h') + np.timedelta64(union_city.Date.values[-1][13:], 'm')) if uc_start > start_date: raise Exception( '/opt/data/cimis/union_city-hourly.nc start date is after simulation start date' ) if uc_end < end_date: raise Exception( '/opt/data/cimis/union_city-hourly.nc end date is before simulation end date' ) # https://cals.arizona.edu/azmet/et1.htm # which says cool period, divide ETO by 0.7 to get pan evaporation, # warm period, divide by 0.6. temps = utils.fill_invalid(union_city.HlyAirTmp.values) temp_zscore = ((temps - temps.mean()) / temps.std()).clip(-1, 1) # score of 1 means warm temperature factors = np.interp(temp_zscore, [-1, 1], [0.7, 0.6]) union_city['HlyEvap'] = union_city.HlyEto / factors union_city.time.values += np.timedelta64(8, 'h') return union_city
def build_data(ping_xyt_template, ds_strong, xyt0, rx_xy): matrix = ds_strong.matrix.values temps = ds_strong.temp.values tx_beacon = np.zeros(matrix.shape[0], np.int32) for p, tag in enumerate(ds_strong.tag.values): tx_beacon[p] = 1 + np.nonzero(ds_strong.rx_beacon.values == tag)[0][0] rx_c = seawater.svel(0, ds_strong['temp'], 0) / time_scale # occasional missing data... rx_c = utils.fill_invalid(rx_c, axis=1) data = dict( Np=matrix.shape[0], Nb=matrix.shape[1], rx_t=(matrix - xyt0[2]) * time_scale, rx_c=rx_c, rx_x=rx_xy[:, 0] - xyt0[0], rx_y=rx_xy[:, 1] - xyt0[1], tx_x=ping_xyt_template[:, 0] - xyt0[0], tx_y=ping_xyt_template[:, 1] - xyt0[1], # tx_t=ping_xyt_template[:,2]-xyt0[2], tx_beacon=tx_beacon, sigma_t=0.0005 * time_scale, # 0.5ms timing precision ) # explicitly pass in the indexing for matrix to linear Nb = data['Nb'] dist_k = np.zeros((Nb, Nb), np.int32) for ai in range(Nb): for bi in range(ai + 1, Nb): a = ai + 1 b = bi + 1 k = (Nb * (Nb - 1) / 2) - (Nb - a + 1) * (Nb - a) / 2 + b - a dist_k[ai, bi] = dist_k[bi, ai] = k data['dist_k'] = dist_k data['Ndist'] = (Nb * (Nb - 1)) // 2 return data
missing=np.nan*np.ones(ntimes) for traj_i in utils.progress(range(len(trimmed_with_hits))): traj,hits,T = trimmed_with_hits[traj_i] rec=dict(traj=traj, hits=hits, T=T) # Additional data: # some trajectories have no hits, # some with only a single hit or cluster of hits measured=np.nonzero(hits>=0)[0] # This will remain all nan if there is no valid data in T rec['Tfill']=utils.fill_invalid(T) if len(measured)==0: # Save on memory when there is no data. rec['fill_dist']=missing[:len(T)] rec['dTdt']=missing[:len(T)] else: # trajectory time in seconds t_s=(traj['t']-traj['t'][0])/np.timedelta64(1,'s') # just the times that were observed t_measured=t_s[measured] # second between each point and the nearest observation fill_dist=t_s - utils.nearest_val(t_measured,t_s) # that's signed seconds offset from the nearest in time measurement rec['fill_dist']=fill_dist if len(measured)>1:
# Final formulae: # u_lagoon=0.307*u_haf - 0.517*v_haf # v_lagoon=0.097*u_haf + 0.673*v_haf ## # Generate a single csv with the predicted values: ds_pred=xr.Dataset() ds_pred['time']=('time',), ds_haf.time.values u_coeff_u= 0.307 u_coeff_v=-0.517 v_coeff_u= 0.097 v_coeff_v= 0.673 ds_pred['u_wind']=('time',),utils.fill_invalid( (u_coeff_u*ds_haf['u10'] + u_coeff_v*ds_haf['v10']).values ) ds_pred['u_wind'].attrs['units']='m s-1' ds_pred['u_wind'].attrs['desc']=f'extrapolated wind from ASOS HAF station, {u_coeff_u}*u10 + {u_coeff_v}*v10' ds_pred['v_wind']=('time',),utils.fill_invalid( (v_coeff_u*ds_haf['u10'] + v_coeff_v*ds_haf['v10']).values ) ds_pred['v_wind'].attrs['units']='m s-1' ds_pred['u_wind'].attrs['desc']=f'extrapolated wind from ASOS HAF station, {v_coeff_u}*u10 + {v_coeff_v}*v10' ds_pred['T']=('time',), utils.fill_invalid(ds_haf['Ta'].values) ds_pred['T'].attrs.update(ds_haf['Ta'].attrs) ds_pred['rh']=('time',), utils.fill_invalid(ds_haf['rh'].values) ds_pred['rh'].attrs.update(ds_haf['rh'].attrs) ## ds_pred.to_netcdf('lagoon-met-updated.nc',mode='w')
# get the data into a monthly time series before trying to fit seasonal cycle valid = np.isfinite(fld_in.values) absmonth_mean=bin_mean(absmonth[valid],fld_in.values[valid]) month_mean=bin_mean(month[valid],fld_in.values[valid]) if np.sum(np.isfinite(month_mean)) < 12: print("Insufficient data for seasonal trends - will fill with sample mean") trend_and_season=np.nanmean(month_mean) * np.ones(len(dns)) t_and_s_flag=FLAG_MEAN else: # fit long-term trend and a stationary seasonal cycle # this removes both the seasonal cycle and the long-term mean, # leaving just the trend trend_hf=fld_in.values - month_mean[month] lp = filters.lowpass_fir(trend_hf,lowpass_days,nan_weight_threshold=0.01) trend = utils.fill_invalid(lp) # recombine with the long-term mean and monthly trend # to get the fill values. trend_and_season = trend + month_mean[month] t_and_s_flag=FLAG_SEASONAL_TREND # long gaps are mostly filled by trend and season gaps=mark_gaps(dns,valid,shortgap_days,include_ends=True) fld_in.values[gaps] = trend_and_season[gaps] fld_flag.values[gaps] = t_and_s_flag still_missing=np.isnan(fld_in.values) fld_in.values[still_missing] = utils.fill_invalid(fld_in.values)[still_missing] fld_flag.values[still_missing] = FLAG_INTERP # Make sure all flows are nonnegative
def effective_clock_offset(rxs, ds_tot): ds = ds_tot.sel(rx=rxs) # weird. sometimes adds an extra array(..,dtype=object) layer ds['rx_beacon'] = ('rx', ), [ ds_tot.rx_beacon.sel(rx=rx).item() for rx in rxs ] beacon_to_xy = dict([(ds.rx_beacon.values[i], (ds.rx_x.values[i], ds.rx_y.values[i])) for i in range(ds.dims['rx'])]) # both beacons heard it is_multirx = (np.isfinite(ds.matrix).sum(axis=1) > 1).values # it came from one of them. is_selfaware = np.zeros_like(is_multirx) for i, tag in enumerate(ds.tag.values): if tag not in beacon_to_xy: continue else: # And did the rx see itself? local_rx = np.nonzero(ds.rx_beacon.values == tag)[0] if local_rx.size and np.isfinite(ds.matrix.values[i, local_rx[0]]): is_selfaware[i] = True sel_pings = is_multirx & is_selfaware ds_strong = ds.isel(index=sel_pings) matrix = ds_strong.matrix.values rx_xy = np.c_[ds_strong.rx_x.values, ds_strong.rx_y.values] temps = ds_strong.temp.values tx_beacon = np.zeros(matrix.shape[0], np.int32) for p, tag in enumerate(ds_strong.tag.values): tx_beacon[p] = np.nonzero(ds_strong.rx_beacon.values == tag)[0][0] rx_c = seawater.svel(0, ds_strong['temp'], 0) / time_scale # occasional missing data... rx_c = utils.fill_invalid(rx_c, axis=1) # do the calculation manually for two rxs: ab_mask = (tx_beacon == 0) ba_mask = (tx_beacon == 1) assert np.all(ab_mask | ba_mask) # somebody has to hear it # how much 'later' b saw it than a t = matrix[:, 0] # take rx 0 as the reference # without rx_c, transit time varies from 0.051970 to 0.051945 # for a variation of 0.48ppt # with rx_c, transit time distance varies 0.07721 to 0.07710 # for a variation of 1.4ppt, and it introduces some step changes. # so for this application it's better to have the smaller variation # that also changes smoothly, rather than correct back to a precise # distance deltas = (matrix[:, 1] - matrix[:, 0]) # * rx_c.mean(axis=1) # partial time series dt_ab = deltas[ab_mask] dt_ba = deltas[ba_mask] dt_ab_dense = np.interp(t, t[ab_mask], deltas[ab_mask]) dt_ba_dense = np.interp(t, t[ba_mask], deltas[ba_mask]) # proxy for the uncertainty. Not scaled! dt_ab_dense_std = np.abs(t - utils.nearest_val(t[ab_mask], t)) dt_ba_dense_std = np.abs(t - utils.nearest_val(t[ba_mask], t)) dt_offset = 0.5 * (dt_ab_dense + dt_ba_dense ) # sum of clock offset and travel asymmetry dt_transit = 0.5 * (dt_ab_dense - dt_ba_dense) # transit time dt_std = dt_ab_dense_std + dt_ba_dense_std ds = xr.Dataset() ds['time'] = ('time', ), t ds['offset'] = ('time', ), dt_offset ds['transit'] = ('time', ), dt_transit ds['error'] = ('time', ), dt_std ds['c'] = ('time', ), rx_c.mean(axis=1) ds['rx'] = ('rx', ), rxs return ds
def fill_and_flag(ds,fld,site, lowpass_days=3*365, shortgap_days=45 # okay to interpolate a little over a month? ): """ Update a single field for a single site in ds, by extracting long-term trends, seasonal cycle, and interpolating between these and measured data """ # first, create mapping from time index to absolute month dts=utils.to_datetime(dns) absmonth = [12*dt.year + (dt.month-1) for dt in dts] absmonth = np.array(absmonth) - dts[0].year*12 month=absmonth%12 fld_in=ds[fld].sel(site=site) orig_values=fld_in.values fld_flag=ds[fld+'_flag'].sel(site=site) prefilled=fld_flag.values & (FLAG_SEASONAL_TREND | FLAG_INTERP | FLAG_MEAN) fld_in.values[prefilled]=np.nan # resets the work of this loop in case it's run multiple times n_valid=np.sum(~fld_in.isnull()) if n_valid==0: msg=" --SKIPPING--" else: msg="" print(" field: %s %d/%d valid input points %s"%(fld,n_valid,len(fld_in),msg)) if n_valid==0: return # get the data into a monthly time series before trying to fit seasonal cycle valid = np.isfinite(fld_in.values) absmonth_mean=bin_mean(absmonth[valid],fld_in.values[valid]) month_mean=bin_mean(month[valid],fld_in.values[valid]) if np.sum(np.isfinite(month_mean)) < 12: print("Insufficient data for seasonal trends - will fill with sample mean") trend_and_season=np.nanmean(month_mean) * np.ones(len(dns)) t_and_s_flag=FLAG_MEAN else: # fit long-term trend and a stationary seasonal cycle # this removes both the seasonal cycle and the long-term mean, # leaving just the trend trend_hf=fld_in.values - month_mean[month] lp = filters.lowpass_fir(trend_hf,lowpass_days,nan_weight_threshold=0.01) trend = utils.fill_invalid(lp) # recombine with the long-term mean and monthly trend # to get the fill values. trend_and_season = trend + month_mean[month] t_and_s_flag=FLAG_SEASONAL_TREND # long gaps are mostly filled by trend and season gaps=mark_gaps(dns,valid,shortgap_days,include_ends=True) fld_in.values[gaps] = trend_and_season[gaps] fld_flag.values[gaps] = t_and_s_flag still_missing=np.isnan(fld_in.values) fld_in.values[still_missing] = utils.fill_invalid(fld_in.values)[still_missing] fld_flag.values[still_missing] = FLAG_INTERP # Make sure all flows are nonnegative negative=fld_in.values<0.0 fld_in.values[negative]=0.0 fld_flag.values[negative] |= FLAG_CLIPPED if 0: # illustrative(?) plots fig,ax=plt.subplots() ax.plot(dns,orig_values,'m-o',label='Measured %s'%fld) ax.plot(dns,fld_in,'k-',label='Final %s'%fld,zorder=5) # ax.plot(dns,month_mean[month],'r-',label='Monthly Clim.') # ax.plot(dns,trend_hf,'b-',label='Trend w/HF') ax.plot(dns,trend,'g-',lw=3,label='Trend') ax.plot(dns,trend_and_season,color='orange',label='Trend and season')
## lat = np.asarray(ds["latitude"]) lon = np.asarray(ds["longitude"]) salt = np.asarray(ds["salinity"]) temp = np.asarray(ds["temperature"]) depth = np.asarray(ds["depth"]) dist = np.asarray(ds["Distance_from_station_36"]) time = np.asarray(ds["time"]) # These times already come in as UTC. the date math below does everything in # PST, so take the 8 hours back in times = -8 * 3600 + ( time - np.datetime64('1970-01-01T00:00:00')) / np.timedelta64(1, 's') ### fix nans in time record and fill in with valid times tvalid = utils.fill_invalid(times, axis=-1) tvalid = utils.fill_invalid(tvalid, axis=1) tvalid = utils.fill_invalid(tvalid, axis=0) times = tvalid ### Load in his files, define variables, adjust times to same reference time as USGS data temp = True his = nc.MFDataset(path + hisfile) xcoor = his.variables["station_x_coordinate"][:] ycoor = his.variables["station_y_coordinate"][:] # get reference time from the mdu: t_ref, t_start, t_stop = mdu.time_range() dref = utils.to_unix(t_ref) # dt.datetime.strptime(emdu['time','refdate']
def fill_tidal_data(da,fill_time=True): """ Extract tidal harmonics from an incomplete xarray DataArray, use those to fill in the gaps and return a complete DataArray. Uses all 37 of the standard NOAA harmonics, may not be stable with short time series. A 5-day lowpass is removed from the harmonic decomposition, and added back in afterwards. Assumes that the DataArray has a 'time' coordinate with datetime64 values. The time dimension must be dense enough to extract an exact time step If fill_time is True, holes in the time coordinate will be filled, too. """ diffs=np.diff(da.time) dt=np.median(diffs) if fill_time: gaps=np.nonzero(diffs>1.5*dt)[0] pieces=[] last=0 for gap_i in gaps: # gap_i=10 means that the 10th diff was too big # that means the jump from 10 to 11 was too big # the preceding piece should go through 9, so # exclusive of gap_i pieces.append(da.time.values[last:gap_i]) pieces.append(np.arange( da.time.values[gap_i], da.time.values[gap_i+1], dt)) last=gap_i+1 pieces.append(da.time.values[last:]) dense_times=np.concatenate(pieces) dense_values=np.nan*np.zeros(len(dense_times),np.float64) dense_values[ np.searchsorted(dense_times,da.time.values) ] = da.values da=xr.DataArray(dense_values, dims=['time'],coords=[dense_times]) else: pass dnums=utils.to_dnum(da.time) data=da.values # lowpass at about 5 days, splitting out low/high components winsize=int( np.timedelta64(5,'D') / dt ) data_lp=filters.lowpass_fir(data,winsize) data_hp=data - data_lp valid=np.isfinite(data_hp) omegas=harm_decomp.noaa_37_omegas() # as rad/sec harmonics=harm_decomp.decompose(dnums[valid]*86400,data_hp[valid],omegas) dense=harm_decomp.recompose(dnums*86400,harmonics,omegas) data_recon=utils.fill_invalid(data_lp) + dense data_filled=data.copy() missing=np.isnan(data_filled) data_filled[missing] = data_recon[missing] fda=xr.DataArray(data_filled,coords=[da.time],dims=['time']) return fda
tx_beacon[i]=np.nonzero(ds.rx_beacon.values==tag)[0][0] # And did the rx see itself? is_selfaware[i]=np.isfinite(ds.matrix.values[i,tx_beacon[i]]) ds['tx_beacon']=('index',),tx_beacon is_beacon=tx_beacon>=0 temps=ds.temp.values # WHOA -- comparisons to the beacon-beacon transits suggest # that this has some significant error. The inferred and # calculated speeds of sound are close-ish if temperature is # offset 4.5 degC. rx_c=seawater.svel(0,ds['temp']-4.5,0) # occasional missing data... rx_c=utils.fill_invalid(rx_c,axis=1) ds['c']=('index','rx'),rx_c ## sel_pings=is_multirx&is_selfaware ds_strong=ds.isel(index=sel_pings) ## # Calculate time deltas for each ordered pair of receivers Nping=ds_strong.dims['index'] Nrx=ds_strong.dims['rx'] tx_beacon=ds_strong.tx_beacon.values
A first cut at direct precipitation and evaporation. """ import xarray as xr from stompy import utils # Last SUNTANS run had used NARR # it's way way coarse. Seems better to use an in-Bay climatology # than to use NARR. ## union_city = xr.open_dataset('/opt/data/cimis/union_city-hourly-2001-2016.nc') ## temps = utils.fill_invalid(union_city.HlyAirTmp.values) temp_zscore = ((temps - temps.mean()) / temps.std()).clip(-1, 1) # score of 1 means warm temperature factors = np.interp(temp_zscore, [-1, 1], [0.7, 0.6]) union_city['HlyEvap'] = union_city.HlyEto / factors ## narr_data_dir = "/opt/data/suntans/spinupdated/suntans-spinup/narr-data/data/" precip = xr.open_dataset(os.path.join(narr_data_dir, "apcp.mon.mean.nc"), decode_cf=False) # bug in the nc file if precip.apcp._FillValue != precip.apcp.missing_value: precip.apcp.attrs['_FillValue'] = precip.apcp.missing_value precip = xr.decode_cf(precip)
track['step']=np.cumsum(np.r_[0,dt_steps]).astype(np.int32) track_exp=pd.DataFrame() track_exp['step']=np.arange(expanded_idx.max()+1) track2=track_exp.merge(right=track,on='step', how='left') ## data=track2.loc[:,['x','y']] data.loc[:,'x'] -= data.x.mean() data.loc[:,'y'] -= data.y.mean() # Make life a little simpler and just fill data.loc[:,'x'] = utils.fill_invalid(data.loc[:,'x'].values) data.loc[:,'y'] = utils.fill_invalid(data.loc[:,'y'].values) ## # Sort of following https://www.statsmodels.org/stable/examples/notebooks/generated/tsa_arma_0.html import statsmodels.api as sm from statsmodels.graphics.api import qqplot from statsmodels import tsa data.plot(figsize=(12,8)) ## dta=data['x']