def ll_to_lonilati(ll, coastal_ds): """ Match lon/lat coordinates to a cell in the coastal_ds dataset. ll: [lon,lat] coastal_ds: xarray dataset, assumed to be lon/lat grid, rectilinear. """ loni = utils.nearest(snap_lon % 360, g_map_ll[c, 0] % 360) lati = utils.nearest(snap_lat, g_map_ll[c, 1]) err_km = utils.haversine([snap_lon[loni], snap_lat[lati]], g_map_ll[c, :])
def add_src_time(a_to_b, a_to_a, shift_us=0, drift_us_per_day=0, t0=np.datetime64("1970-01-01")): """ for each a_to_b, find the nearest in time a_to_a, and add a column for its time, named time_src. shift_us: constant time offset drift_us_per_day: linear drift, starting at 0 at t0 (defined just above), in microseconds per day. originally this would record the src time """ days = (a_to_a.time.values - t0) / np.timedelta64(86400, 's') shift = (shift_us + drift_us_per_day * days).astype(np.int64) a_to_a_times = a_to_a.time.values a_to_a_times_shifted = a_to_a_times + shift * np.timedelta64(1, 'us') near = utils.nearest(a_to_a_times_shifted, a_to_b.time.values) # choice here whether to use the shifted or unshifted time. # now trying the unshifted a_to_b['time_src'] = ('index', ), a_to_a_times[near] # but save the shifted version, too a_to_b['time_src_shifted'] = ('index', ), a_to_a_times_shifted[near] # and calculate trav_seconds with the shifted data, because it # is used to weed out bad matches trav_secs = (a_to_b.time - a_to_b.time_src_shifted) / np.timedelta64( 1, 'us') * 1e-6 # some crazy numbers are throwing off the averages trav_secs[np.abs(trav_secs) > 1000] = np.nan # note that this can be negative a_to_b['trav_secs'] = ('index', ), trav_secs
def depth_avg(run_i, t): ug = ug_for_run(run_i) time_idx = utils.nearest(ug.nc.time.values, t) # have to transpose as the velocities have Nk first, but # this defaults to Nk second return ug.vertical_averaging_weights(time_slice=time_idx, ztop=0, zbottom=0).T
def quantize_time(t): run_i=np.searchsorted(run_starts,t)-1 ug=ug_for_run(run_i) if use_ptm_output: time_idx=np.searchsorted(ug.nc.time.values,t) else: time_idx=utils.nearest(ug.nc.time.values, t) return run_i,time_idx
def plot_circulation(rxs, ds_tot, num=1): ds_ab = effective_clock_offset([rxs[0], rxs[1]], ds_tot) ds_bc = effective_clock_offset([rxs[1], rxs[2]], ds_tot) ds_ca = effective_clock_offset([rxs[2], rxs[0]], ds_tot) # Combine t_common = np.unique( np.concatenate( (ds_ab.time.values, ds_bc.time.values, ds_ca.time.values))) ds_abc = xr.Dataset() ds_abc['time'] = ('time', ), t_common ds_abc['offset'] = ('time', ), ( np.interp(t_common, ds_ab.time.values, ds_ab.offset.values) + np.interp(t_common, ds_bc.time.values, ds_bc.offset.values) + np.interp(t_common, ds_ca.time.values, ds_ca.offset.values)) error = 0 for ds in [ds_ab, ds_bc, ds_ca]: near_idx = utils.nearest(ds.time, t_common) # The accumulated "error" is the error of the nearest value in each source error = error + ds['error'].values[near_idx] # *and* the time offset from that value. error += np.abs(t_common - utils.nearest_val(ds.time.values, t_common)) ds_abc['error'] = ('time', error) plt.figure(num).clf() fig, axs = plt.subplots(4, 1, num=num, sharex=True) for ds in [ds_ab, ds_bc, ds_ca]: rx_from, rx_to = ds.rx.values label = "%s-%s" % (rx_from, rx_to) axs[0].plot(ds.time, ds.offset, label=label) axs[1].plot(ds.time, ds.transit, label=label) axs[0].set_ylabel('Offset') axs[1].set_ylabel('Transit') axs[2].plot(ds_abc.time, 1e6 * ds_abc.offset, label="Circ") axs[2].set_ylabel('Circulation (us)') axs[3].plot(ds_abc.time, ds_abc.error, label="Error") [ax.legend(loc='upper right') for ax in axs] return fig, ds_abc, [ds_ab, ds_bc, ds_ca]
def add_src_time(a_to_b, a_to_a, shift_us=0, shift_sec_per_day=0): """ for each a_to_b, find the nearest in time a_to_a, and add a column for its time, named time_src. shift_us: constant time offset shift_sec_per_day: linear drift, starting at 0 at t0 (defined just above) """ a_to_a_times = a_to_a.time.values + shift_us * np.timedelta64(1, 'us') days = (a_to_a.time.values - t0) / np.timedelta64(86400, 's') a_to_a_times += np.timedelta64( 1, 'us') * (1e6 * shift_sec_per_day * days).astype(np.int32) near = utils.nearest(a_to_a_times, a_to_b.time.values) a_to_b['time_src'] = ('index', ), a_to_a_times[near] trav_secs = (a_to_b.time - a_to_b.time_src) / np.timedelta64(1, 'us') * 1e-6 # some crazy numbers are throwing off the averages trav_secs[np.abs(trav_secs) > 1000] = np.nan a_to_b['trav_secs'] = ('index', ), trav_secs
def snap_for_time(run_i, t): map_ds = map_for_run(run_i) time_idx = utils.nearest(map_ds.time.values, t) return map_ds.isel(time=time_idx)
def process_period(start_time_string, end_time_string, outfileprefix, force=False): nc_fn = outfileprefix + ".nc" print("Processing %s to %s output to %s" % (start_time_string, end_time_string, nc_fn)) if (not force) and os.path.exists(nc_fn): print("File %s exists - skipping" % nc_fn) return # pick interpolation method (natural neighbor or linear is recommended): # 'nearest' = nearest neighbor # 'linear' = linear # 'cubic' = cubic spline # 'natural' = natural neighbor interp_method = 'natural' # specify comment string (one line only, i.e., no '\n') for *.amu/*.amv files commentstring = 'Prepared by Allie King, SFEI, times are in PST (ignore the +00:00), adapted by Rusty Holleman' # specify properties of the wind grid -- this one was used for CASCaDE and sfb_dfm bounds = [340000, 610000, 3980000, 4294000] dx = 1500. dy = 1500. #--------------------------------------------------------------------------------------# # Main Program #--------------------------------------------------------------------------------------# n_cols = int(round(1 + (bounds[1] - bounds[0]) / dx)) n_rows = int(round(1 + (bounds[3] - bounds[2]) / dy)) x_llcorner = bounds[0] y_llcorner = bounds[2] start_date = np.datetime64(start_time_string) end_date = np.datetime64(end_time_string) # specify directory containing the compiled wind observation data and station # coordinates (SFB_hourly_U10_2011.csv, SFB_hourly_V10_2011.csv, etc...) windobspath = os.path.join(basedir, 'Compiled_Hourly_10m_Winds/data') # convert start and end time to datetime object start_dt = utils.to_datetime(start_date) end_dt = utils.to_datetime(end_date) # create a meshgrid corresponding to the CASCaDE wind grid x_urcorner = x_llcorner + dx * (n_cols - 1) y_urcorner = y_llcorner + dy * (n_rows - 1) x = np.linspace(x_llcorner, x_urcorner, n_cols) # RH: orient y the usual way, not the arcinfo/dfm wind way (i.e. remove flipud) y = np.linspace(y_llcorner, y_urcorner, n_rows) xg, yg = np.meshgrid(x, y) # read the observed wind data tz_offset = dt.timedelta(hours=8) try: # start_time,end_time are in UTC, so remove the offset when requesting data # from wlib which expects PST time_days, station_names, U10_obs = wlib.read_10m_wind_data_from_csv( os.path.join(windobspath, 'SFB_hourly_U10_'), start_dt - tz_offset, end_dt - tz_offset) time_days, station_names, V10_obs = wlib.read_10m_wind_data_from_csv( os.path.join(windobspath, 'SFB_hourly_V10_'), start_dt - tz_offset, end_dt - tz_offset) except FileNotFoundError: print("Okay - probably beyond the SFEI data") U10_obs = V10_obs = None if U10_obs is not None: # note that time_days is just decimal days after start, so it doesn't need to be adjusted for timezone. # read the coordinates of the wind observation stations df = pd.read_csv(os.path.join(windobspath, 'station_coordinates.txt')) station_names_check = df['Station Organization-Name'].values x_obs = df['x (m - UTM Zone 10N)'].values y_obs = df['y (m - UTM Zone 10N)'].values Nstations = len(df) for snum in range(Nstations): if not station_names[snum] == station_names_check[snum]: raise ( 'station names in station_coordinates.txt must match headers in SFB_hourly_U10_YEAR.csv and SFB_hourly_V10_YEAR.csv files' ) else: x_obs = np.zeros(0, np.float64) y_obs = np.zeros(0, np.float64) Nstations = 0 # Fabricate time_days all_times = [] t = start_dt interval = dt.timedelta(hours=1) while t <= end_dt: all_times.append(t) t = t + interval all_dt64 = np.array([utils.to_dt64(t) for t in all_times]) time_days = (all_dt64 - all_dt64[0]) / np.timedelta64(1, 's') / 86400. # zip the x, y coordinates for use in the griddata interpolation points = np.column_stack((x_obs, y_obs)) # loop through all times, at each time step find all the non-nan data, and # interpolate it onto the model grid, then compile the data from all times # into a dimension-3 matrix. keep track of which stations were non-nan ('good') # at each time step in the matrix igood coamps_ds = None # handled on demand below coamps_xy = None # ditto # drops COAMPS data points within buffer dist of a good observation buffer_dist = 30e3 for it in range(len(time_days)): if it % 10 == 0: print("%d/%d steps" % (it, len(time_days))) #-- augment with COAMPS output target_time = start_date + np.timedelta64(int(time_days[it] * 86400), 's') if (coamps_ds is None) or (target_time > coamps_ds.time.values[-1]): coamps_ds = coamps.coamps_dataset(bounds, target_time, target_time + np.timedelta64(1, 'D'), cache_dir=cache_dir, fields=['wnd_utru', 'wnd_vtru']) # reduce dataset size -- out in the ocean really don't need too many points coamps_ds = coamps_ds.isel(x=slice(None, None, 2), y=slice(None, None, 2)) coamps_X, coamps_Y = np.meshgrid(coamps_ds.x.values, coamps_ds.y.values) coamps_xy = np.c_[coamps_X.ravel(), coamps_Y.ravel()] print("COAMPS shape: ", coamps_X.shape) # seems that the coamps dataset is not entirely consistent in its shape? # not sure what's going on, but best to redefine this each time to be # sure. @memoize.memoize() def mask_near_point(xy): dists = utils.dist(xy, coamps_xy) return (dists > buffer_dist) coamps_time_idx = utils.nearest(coamps_ds.time, target_time) coamps_sub = coamps_ds.isel(time=coamps_time_idx) # Which coamps points are far enough from good observations. there are # also some time where coamps data is missing # mask=np.ones(len(coamps_xy),np.bool8) mask = np.isfinite(coamps_sub.wind_u.values.ravel()) # find all non-nan data at this time step if U10_obs is not None: igood = np.logical_and(~np.isnan(U10_obs[it, :]), ~np.isnan(V10_obs[it, :])) obs_xy = np.c_[x_obs[igood], y_obs[igood]] for xy in obs_xy: mask = mask & mask_near_point(xy) input_xy = np.concatenate([obs_xy, coamps_xy[mask]]) input_U = np.concatenate( [U10_obs[it, igood], coamps_sub.wind_u.values.ravel()[mask]]) input_V = np.concatenate( [V10_obs[it, igood], coamps_sub.wind_v.values.ravel()[mask]]) else: # No SFEI data -- input_xy = coamps_xy[mask] input_U = coamps_sub.wind_u.values.ravel()[mask] input_V = coamps_sub.wind_v.values.ravel()[mask] if np.any(np.isnan(input_U)) or np.any(np.isnan(input_V)): import pdb pdb.set_trace() Ngood = len(input_xy) # set the interpolation method to be used in this time step: interp_method_1. # ideally, this would just be the user-defined interpolation method: # interp_method. however, if we do not have enough non-nan data to use the # user-defined method this time step, temporarily revert to the nearest # neighbor method if interp_method == 'natural' or interp_method == 'linear' or interp_method == 'cubic': if Ngood >= 4: interp_method_1 = interp_method else: interp_method_1 = 'nearest' elif interp_method == 'nearest': interp_method_1 = 'nearest' # if natural neighbor method, interpolate using the pyngl package if interp_method_1 == 'natural': U10g = np.transpose( ngl.natgrid(input_xy[:, 0], input_xy[:, 1], input_U, xg[0, :], yg[:, 0])) V10g = np.transpose( ngl.natgrid(input_xy[:, 0], input_xy[:, 1], input_V, xg[0, :], yg[:, 0])) # for other interpolation methods use the scipy package else: U10g = griddata(input_xy, input_U, (xg, yg), method=interp_method_1) V10g = griddata(input_xy, input_V, (xg, yg), method=interp_method_1) # since griddata interpolation fills all data outside range with nan, use # the nearest neighbor method to extrapolate U10g_nn = griddata(input_xy, input_U, (xg, yg), method='nearest') V10g_nn = griddata(input_xy, input_V, (xg, yg), method='nearest') ind = np.isnan(U10g) U10g[ind] = U10g_nn[ind] ind = np.isnan(V10g) V10g[ind] = V10g_nn[ind] # compile results together over time # igood_all not updated for COAMPS, omit here. if it == 0: U10g_all = np.expand_dims(U10g, axis=0) V10g_all = np.expand_dims(V10g, axis=0) # igood_all = np.expand_dims(igood,axis=0) else: U10g_all = np.append(U10g_all, np.expand_dims(U10g, axis=0), axis=0) V10g_all = np.append(V10g_all, np.expand_dims(V10g, axis=0), axis=0) # igood_all = np.append(igood_all, np.expand_dims(igood,axis=0), axis=0) ## # Write netcdf: ds = xr.Dataset() ds['time'] = ('time', ), start_date + (time_days * 86400).astype( np.int32) * np.timedelta64(1, 's') ds['x'] = ('x', ), x ds['y'] = ('y', ), y ds['wind_u'] = ('time', 'y', 'x'), U10g_all ds['wind_v'] = ('time', 'y', 'x'), V10g_all os.path.exists(nc_fn) and os.unlink(nc_fn) ds.to_netcdf(nc_fn)
# contaminate the lags as much when the times are already # shifted. # this choice does not affect the choice of ping pairs, # as that choice is made solely on destination times. a2b_time_srcs = a2b_good.time_src_shifted.values b2a_time_srcs = b2a_good.time_src_shifted.values for a2b_idx in range(len(a2b_good.index)): match = {} match['a2b_idx'] = a2b_idx match['a2b_time_b'] = a2b_times[a2b_idx] match['a2b_time_a'] = a2b_time_srcs[a2b_idx] # Find a good b2a ping: b2a_idx = utils.nearest(b2a_times, match['a2b_time_b']) match['b2a_idx'] = b2a_idx match['b2a_time_b'] = b2a_time_srcs[b2a_idx] match['b2a_time_a'] = b2a_times[b2a_idx] match['match_diff'] = match['a2b_time_b'] - match['b2a_time_a'] a_matches.append(match) ## a_df = pd.DataFrame(a_matches) # almost the key calculation a_df['mean_travel'] = 0.5 * (a_df.a2b_time_b - a_df.a2b_time_a + a_df.b2a_time_a - a_df.b2a_time_b) # This is what I want: #a_df['clock_skew']=0.5*(a_df.a2b_time_b+a_df.b2a_time_b) - 0.5*(a_df.a2b_time_a + a_df.b2a_time_a)
if len(idxs) == 0: continue print("%d samples to pull for %s -- %s" % (len(idxs), model_day.run_start, model_day.run_stop)) sample_xy = np.c_[segments.xm.values[idxs], segments.ym.values[idxs]] # These will have the whole day stations = model_day.extract_station(xy=sample_xy) # Slim to just the specific timestamps sample_times = seg_dt64_utc[idxs] # this gives time errors [0,1800s] #time_idxs=np.searchsorted( stations.time.values, sample_times ) # this centers the time offsets [-900s,900s] time_idxs = utils.nearest(stations.time.values, sample_times) station_dss = [ stations.isel(station=station_i, time=time_idx) for station_i, time_idx in enumerate(time_idxs) ] stations_ds = xr.concat(station_dss, dim='station') # And record the original index so they can be put back together stations_ds['input_idx'] = ('station', ), idxs all_stations_ds.append(stations_ds) ## joined = xr.concat(all_stations_ds, dim='station') # Make sure we hit every station assert np.all(np.unique(all_idxs) == np.arange(len(all_idxs)))
def set_ic_from_hycom(model, hycom_ll_box, cache_dir, default_s=None, default_T=None): """ Update model.ic_ds with salinity and temperature from hycom. hycom_ll_box is like [-124.9, -121.7, 35.9, 39.0], and is specified here to make best use of cached data (so slightly mis-alignment between models doesn't require refetching all of the hycom files). where to save/find cached hycom files default_s, default_T: when a grid point does not intersect valid hycom data, what value to use. leave as None in order to leave the value in ic_ds alone. In the past used: default_s=33.4 default_T=10.0 """ fns = hycom.fetch_range( hycom_ll_box[:2], hycom_ll_box[2:], [model.run_start, model.run_start + np.timedelta64(1, 'D')], cache_dir=cache_dir) hycom_ic_fn = fns[0] hycom_ds = xr.open_dataset(hycom_ic_fn) if 'time' in hycom_ds.dims: hycom_ds = hycom_ds.isel(time=0) cc = model.grid.cells_center() cc_ll = model.native_to_ll(cc) # Careful - some experiments (such as 92.8) have lon in [0,360], # while later ones have lon in [-180,180] # this forces all to be [-180,180] hycom_ds.lon.values[:] = (hycom_ds.lon.values + 180) % 360.0 - 180.0 dlat = np.median(np.diff(hycom_ds.lat.values)) dlon = np.median(np.diff(hycom_ds.lon.values)) lat_i = utils.nearest(hycom_ds.lat.values, cc_ll[:, 1], max_dx=1.2 * dlat) lon_i = utils.nearest(hycom_ds.lon.values, cc_ll[:, 0], max_dx=1.2 * dlon) # make this positive:down to match hycom and make the interpolation sun_z = -model.ic_ds.z_r.values assert ('time', 'Nk', 'Nc') == model.ic_ds.salt.dims, "Workaround is fragile" for scal, hy_var, sun_var, default in [ ('s', 'salinity', 'salt', default_s), ('T', 'water_temp', 'temp', default_T) ]: if scal == 's' and float(model.config['beta']) == 0.0: continue if scal == 'T' and float(model.config['gamma']) == 0.0: continue for c in utils.progress(range(model.grid.Ncells()), msg="HYCOM initial condition %s %%s" % scal): sun_val = default if lat_i[c] < 0 or lon_i[c] < 0: print("Cell %d does not overlap HYCOM grid" % c) else: # top to bottom, depth positive:down val_profile = hycom_ds[hy_var].isel(lon=lon_i[c], lat=lat_i[c]).values valid = np.isfinite(val_profile) if not np.any(valid): # print("Cell %d is dry in HYCOM grid"%c) pass else: # could add bottom salinity if we really cared. sun_val = np.interp(sun_z, hycom_ds.depth.values[valid], val_profile[valid]) if sun_val is not None: model.ic_ds[sun_var].values[0, :, c] = sun_val
if xcoor.ndim == 2: # some DFM runs add time to coordinates xcoor = xcoor[0, :] ycoor = ycoor[0, :] mll = utm_to_ll(np.c_[xcoor, ycoor]).T llind = np.zeros(len(ds["latitude"]), np.int32) for i in range(len(llind)): dist_ = np.sqrt((mll[0, :] - lon[i])**2 + (mll[1, :] - lat[i])**2) llind[i] = np.argmin(dist_) # np.where(dist_ == np.min(dist_))[0] tind = np.zeros(times[:, :, 0].shape, np.int32) for i in range(tind.shape[0]): # loop over cruises for j in range(tind.shape[1]): # loop over stations within cruise ind = utils.nearest(mtimes, times[i, j, 0]) # match on time at top of cast # if the match is more than a day off, assume the model # doesn't cover this cruise. if np.abs(mtimes[ind] - times[i, j, 0]) > 86400: ind = -1 tind[i, j] = ind mdates = np.zeros((len(tind[:, 0]), len(tind[0, :]), len(mdepth[0, 0, :]))) msalt_ = np.zeros((len(tind[:, 0]), len(tind[0, :]), len(mdepth[0, 0, :]))) if temp == True: mtemp_ = np.zeros((len(tind[:, 0]), len(tind[0, :]), len(mdepth[0, 0, :]))) mdepth_ = np.zeros((len(tind[:, 0]), len(tind[0, :]), len(mdepth[0, 0, :]))) for i in range(len(msalt_[:, 0, 0])): for j in range(len(msalt_[0, :, 0])): if tind[i, j] >= 0:
def ping_to_solver_data(ping): ping_tnum=ping.tnum.item() # fish_tnums[i] ping_rx=ping.matrix.values # matrix[i,:] offset_idx=utils.nearest(mat_tnums,ping_tnum) fish_to_offset=np.abs(ping_tnum-mat_tnums[offset_idx]) offsets=offset_mat[offset_idx,:,:] # total staleness -- include offset to fish time to be conservative. stales =stale2_mat[offset_idx,:,:] + fish_to_offset # The shortest path traversing offsets based on staleness. # this will choose a->b->c over a->c if the collection of # pings for the former case are "fresher" in time than # for a->c directly. dists,preds=shortest_path(stales,return_predecessors=True) # dists: [N,N] matrix of total staleness # preds: [N,N] of predecessors # the specific indices involved in this ping rxs=np.nonzero(np.isfinite(ping_rx))[0] # and calculate offsets to the other receivers # use stales as a distance matrix, and find the # shortest path # declare rxs[0] as the reference... idx0=rxs[0] best_offsets=offsets[idx0,:].copy() best_offsets[idx0]=0.0 # declare rxs[0] to be the reference time for idxn in rxs[1:]: stale_sum=0.0 offset_sum=0.0 trav=idxn while trav!=idx0: new_trav=preds[idx0,trav] # new_trav-trav is an edge on the shortest path stale_sum+=stales[new_trav,trav] offset_sum+=offsets[new_trav,trav] trav=new_trav #print(f"{idx0:2} => {idxn:2}: best stale={stale_sum:.2f} offset={offset_sum:.4f}") #print(f" orig stale={stales[idx0,idxn]:.2f} offset={offsets[idx0,idxn]:.4f}") best_offsets[idxn]=offset_sum ping_rx_adj=(ping_rx - best_offsets) - ping_rx[idx0] times=ping_rx_adj[rxs], c=ping.c.isel(rx=rxs).values # ds_fish.c.isel(index=i,rx=rxs).values x=ping.rx_x.isel(rx=rxs).values y=ping.rx_y.isel(rx=rxs).values z=ping.rx_z.isel(rx=rxs).values time_scale=1000. rx_xy=np.c_[x,y] xy0=rx_xy.mean(axis=0) rx_xy-=xy0 data=dict(Nb=len(x), xy0=xy0, time_scale=time_scale, rx_t=time_scale*ping_rx_adj[rxs], rx_x=rx_xy[:,0], rx_y=rx_xy[:,1], sigma_t=0.1, sigma_x=1000.0, rx_c=c/time_scale) return data