def parse_tek(det_fn, cf2_fn=None): if cf2_fn is None: fn = det_fn.replace('.DET', '.CF2') if os.path.exists(fn): cf2_fn = fn df = pd.read_csv(det_fn, names=[ 'id', 'year', 'month', 'day', 'hour', 'minute', 'second', 'epoch', 'usec', 'tag', 'nbwQ', 'corrQ', 'num1', 'num2', 'one', 'pressure', 'temp' ]) if 0: # this way is slow, and I *think* yields PST times, where epoch is UTC. dates = [ datetime.datetime(year=rec['year'], month=rec['month'], day=rec['day'], hour=rec['hour'], minute=rec['minute'], second=rec['second']) for idx, rec in df.iterrows() ] df['time'] = utils.to_dt64( np.array(dates)) + df['usec'] * np.timedelta64(1, 'us') else: # this is quite fast and should yield UTC. # the conversion in utils happens to be good down to microseconds, so we can # do this in one go df['time'] = utils.unix_to_dt64(df['epoch'] + df['usec'] * 1e-6) # clean up time: bad_time = (df.time < np.datetime64('2018-01-01')) | ( df.time > np.datetime64('2022-01-01')) df2 = df[~bad_time].copy() # clean up temperature: df2.loc[df.temp < -5, 'temp'] = np.nan df2.loc[df.temp > 35, 'temp'] = np.nan # clean up pressure df2.loc[df2.pressure > 160e3, 'pressure'] = np.nan df2.loc[df2.pressure < 110e3, 'pressure'] = np.nan # trim to first/last valid pressure valid_idx = np.nonzero(np.isfinite(df2.pressure.values))[0] df3 = df2.iloc[valid_idx[0]:valid_idx[-1] + 1, :].copy() df3['tag'] = [s.strip() for s in df3.tag.values] ds = xr.Dataset.from_dataframe(df3) if cf2_fn is not None: cf = pd.read_csv(cf2_fn, header=None) local_tag = cf.iloc[0, 1].strip() ds['beacon_id'] = local_tag return ds
def dbg_to_clock_changes(dbg_filename): """ Scan the .DBG file for indications that the FPGA clock was changed. Returns a DataFrame with fpga_pre and fpga_post fields giving UTC time before and just after clock was changed. For unanticipated resets, the DBG file just has hourly updates, so fpga_pre could be as much as an hour before the clock was actually reset. """ dbg = pd.read_csv(dbg_filename, names=['time', 'message']) idx = 0 clock_resets = [] # (time_before,msg_before,time_after, msg_after) # For reboots, track the last time we got a DBG message about the FPGA # clock last_fpga_status = dict(time_pre=None, msg_pre=None, fpga_pre=None) def parse_d(s): return datetime.datetime.strptime(s, '%m/%d/%Y %H:%M:%S') while idx < len(dbg): # Seem to get these status messages hourly # 03/24/2019 15:25:31, RTC: 1553469930.648437 FPGA: 1553469931.000051 dt=-0.351614 m = re.match(r'\s*RTC: [0-9\.]+ FPGA: ([0-9\.]+) dt=.*', dbg.message[idx]) if m: last_fpga_status = dict(time_pre=parse_d(dbg.time.values[idx]), msg_pre=dbg.message[idx], fpga_pre=utils.unix_to_dt64( float(m.group(1)))) idx += 1 continue # 03/25/2019 11:04:45, FPGA started!! Init FPGA clock using RTC=1553540685.000000 m = re.match(r'\s*FPGA started!! Init FPGA clock using RTC=([0-9\.]+)', dbg.message[idx]) if m: # this message also sets the new, running status going forward new_status = {} new_status['time_pre'] = last_fpga_status['time_post'] = parse_d( dbg.time.values[idx]) new_status['msg_pre'] = last_fpga_status['msg_post'] = dbg.message[ idx] new_status['fpga_pre'] = last_fpga_status[ 'fpga_post'] = utils.unix_to_dt64(float(m.group(1))) clock_resets.append(last_fpga_status) last_fpga_status = new_status idx += 1 continue # And pick up sync events if '-Before SYNC' not in dbg.message[idx]: idx += 1 continue before_msg = dbg.message[idx] after_msg = dbg.message[idx + 1] # Things like # 03/13/2019 16:25:31, -Before SYNC: FPGA=1552523147.485166 RTC=1552523147.406250 dt=-0.078916 # 03/13/2019 16:25:31, -After SYNC: FPGA=1552523131.024603 RTC=1552523131.023437 dt=-0.001166 assert '-After SYNC' in after_msg m1 = re.search(r'FPGA=([0-9\.]+)', before_msg) m2 = re.search(r'FPGA=([0-9\.]+)', after_msg) if m1: fpga_pre = utils.unix_to_dt64(float(m1.group(1))) else: fpga_pre = None if m2: fpga_post = utils.unix_to_dt64(float(m2.group(1))) else: fpga_post = None # dbg.time gives the timestamp of the log entry, but FPGA time is probably # what we actually want. clock_resets.append( dict(time_pre=parse_d(dbg.time.values[idx]), time_post=parse_d(dbg.time.values[idx + 1]), msg_pre=dbg.message[idx], msg_post=dbg.message[idx + 1], fpga_pre=fpga_pre, fpga_post=fpga_post)) idx += 2 clock_resets = pd.DataFrame(clock_resets) return clock_resets
def parse_tek( det_fn, cf2_fn=None, name=None, pressure_range=[110e3, 225e3], auto_beacon=True, split_on_clock_change=True, time_range=[np.datetime64('2018-01-01'), np.datetime64('2022-01-01')]): """ det_fn: path to DET file with detection information cf2_fn: optional, read beacon id from CF2 file. Will attempt to guess this if not specified. name: string identifier added to dataset pressure_range: valid range of pressures, used to filter the time series to when the receiver was in the water. pass None to skip any filtering. auto_beacon: if beacon tag ID cannot be read from CF2, this enables choosing the most common received tag as the beacon id. split_on_clock_change: if true, return a list of datasets, split up based on when logs indicated that the clock was updated. time_range: if specified, a start/stop datetime64 used to exclude known bad data. Defaults to very broad range to reject mis-parsed or mangled dates. """ if cf2_fn is None: fn = det_fn.replace('.DET', '.CF2') if os.path.exists(fn): cf2_fn = fn df = pd.read_csv(det_fn, names=[ 'id', 'year', 'month', 'day', 'hour', 'minute', 'second', 'epoch', 'usec', 'tag', 'nbwQ', 'corrQ', 'num1', 'num2', 'one', 'pressure', 'temp' ]) # this is quite fast and should yield UTC. # the conversion in utils happens to be good down to microseconds, so we can # do this in one go df['time'] = utils.unix_to_dt64(df['epoch'] + df['usec'] * 1e-6) # clean up time: bad_time = (df.time < time_range[0]) | (df.time > time_range[1]) df2 = df[~bad_time].copy() # clean up temperature: df2.loc[df.temp < -5, 'temp'] = np.nan df2.loc[df.temp > 35, 'temp'] = np.nan # clean up pressure # this had been limited to 160e3, but # AM9 has a bad calibration (or maybe it's just really deep) if pressure_range is not None: df2.loc[df2.pressure < pressure_range[0], 'pressure'] = np.nan df2.loc[df2.pressure > pressure_range[1], 'pressure'] = np.nan # trim to first/last valid pressure valid_idx = np.nonzero(np.isfinite(df2.pressure.values))[0] df3 = df2.iloc[valid_idx[0]:valid_idx[-1] + 1, :].copy() df3['tag'] = [s.strip() for s in df3.tag.values] ds = xr.Dataset.from_dataframe(df3) ds['det_filename'] = (), det_fn if name is not None: ds['name'] = (), name if cf2_fn is not None: # SM2 isn't getting the right value here. # looks like it would be FF13, but it never # hears FF13. cf = pd.read_csv(cf2_fn, header=None) local_tag = cf.iloc[0, 1].strip() ds['beacon_id'] = local_tag ds['cf2_filename'] = (), cf2_fn elif auto_beacon: beacon_id = df3.groupby('tag').size().sort_values().index[-1] ds['beacon_id'] = beacon_id ds['cf2_filename'] = None ds['beacon_id'].attrs['source'] = 'received tags' ds.attrs['pressure_range'] = pressure_range if split_on_clock_change: dbg_filename = ds.det_filename.item().replace('.DET', '.DBG') if not os.path.exists(dbg_filename): print("Split on clock: couldn't find %s" % dbg_filename) return [ds] else: all_clock_resets = dbg_to_clock_changes(dbg_filename) diced = dice_by_clock_resets(ds, all_clock_resets) return diced else: return ds
route='San_Joaquin' exit_t=rec['sj_upper_first'] else: route='Head_of_Old_River' exit_t=rec['hor_upper_first'] else: route='no_exit' exit_t=np.nan df_ptm.loc[idx,'route']=route df_ptm.loc[idx,'exit_time']=exit_t ## # 20201230: Rename the epoch timestamps, and make the # entry_time and exit_time as string datetimes in PST. df_ptm2=df_ptm.copy() utc_to_pst=np.timedelta64(-8,'h') for t_col in ['entry_time','exit_time','first_detection_time']: epo_col=t_col.replace('_time','_utc_epoch') pst_col=t_col+"_pst" df_ptm2[epo_col]=df_ptm2[t_col] df_ptm2[pst_col] = utc_to_pst + utils.unix_to_dt64(df_ptm2[epo_col].round()) del df_ptm2[t_col] ## df_ptm2.to_csv("screen_final-ptm_inputs-20201230.csv",index=False)
#output_fn=os.path.join(output_path,os.path.basename(track_fn)) print(idx) track=row[col_in] if len(track)<2: results.append(None) continue track=track.copy() for fld in ['x','y','tnum']: fld_m=0.5*( track[fld].values[:-1] + track[fld].values[1:] ) track[fld+'_m']=np.r_[ fld_m, np.nan ] track['time_m']=utils.unix_to_dt64(track['tnum_m'].values) new_records=[] for i,seg in utils.progress( track.iloc[:-1,:].iterrows() ): rec={} # new fields to be added to segments. t=seg['time_m'].to_datetime64() run_i,t_i=quantize_time(t) rec['index']=i for slice_name,slice_def in z_slices: Uint=interpolator(run_i,t_i,**slice_def) # vorticity at centers x_samp=np.array( [ [seg['x_m'] , seg['y_m'] ],
if not os.path.exists(local_file): if not os.path.exists(os.path.dirname(local_file)): os.makedirs(os.path.dirname(local_file)) utils.download_url(url, local_file, on_abort='remove') return pd.read_csv(local_file, **kwargs) ## # longer time scale trends fig = plt.figure(37).clf() fig, (ax, ax_lat, ax2, ax_dens) = plt.subplots(4, 1, sharex=True, num=37) fig.set_size_inches((9.5, 9.0), forward=True) times = utils.unix_to_dt64(df_start.t_mid.values) ax.plot(times, df_start.mean_swim_urel, 'g.', label='Mean downstream swimming') ax_lat.plot(times, df_start.mean_swim_lateral, 'b.', label='Mean lateral swimming') ax.legend(loc='upper right') ax_lat.legend(loc='upper right') fig.autofmt_xdate() ax.axhline(0.0, color='0.6', lw=1.0) # MSD flows msd_flow = fetch_and_parse( local_file="env_data/msd/flow-2018.csv", url= "http://wdl.water.ca.gov/waterdatalibrary/docs/Hydstra/docs/B95820Q/2018/FLOW_15-MINUTE_DATA_DATA.CSV",
# Add release column to df_start, then # split df_start into df_upper and df_lower df_with_release = df_start.merge(tagged_fish[['TagID_Hex', 'release']], left_index=True, right_on='TagID_Hex') # 103 of the tracks were from the lower release. # 30 tracks from the upper release. print(df_with_release.groupby('release').size()) df_upper = df_with_release[df_with_release['release'] == 'upper'] df_lower = df_with_release[df_with_release['release'] == 'lower'] ## times = utils.unix_to_dt64(df_start.t_mid.values) pad = np.timedelta64(1, 'D') t_min = min(times.min(), tag_releases.min()) - pad t_max = max(times.max(), tag_releases.max()) + pad # MSD flows msd_flow = common.msd_flow(np.datetime64("2018-03-01"), np.datetime64("2018-04-20")) # fetch_and_parse(local_file="env_data/msd/flow-2018.csv", # url="http://wdl.water.ca.gov/waterdatalibrary/docs/Hydstra/docs/B95820Q/2018/FLOW_15-MINUTE_DATA_DATA.CSV", # skiprows=3,parse_dates=['time'],names=['time','flow_cfs','quality','notes']) msd_turb = cdec.cdec_dataset('MSD', t_min, t_max, sensor=27,