예제 #1
0
def parse_tek(det_fn, cf2_fn=None):
    if cf2_fn is None:
        fn = det_fn.replace('.DET', '.CF2')
        if os.path.exists(fn):
            cf2_fn = fn

    df = pd.read_csv(det_fn,
                     names=[
                         'id', 'year', 'month', 'day', 'hour', 'minute',
                         'second', 'epoch', 'usec', 'tag', 'nbwQ', 'corrQ',
                         'num1', 'num2', 'one', 'pressure', 'temp'
                     ])

    if 0:
        # this way is slow, and I *think* yields PST times, where epoch is UTC.
        dates = [
            datetime.datetime(year=rec['year'],
                              month=rec['month'],
                              day=rec['day'],
                              hour=rec['hour'],
                              minute=rec['minute'],
                              second=rec['second'])
            for idx, rec in df.iterrows()
        ]
        df['time'] = utils.to_dt64(
            np.array(dates)) + df['usec'] * np.timedelta64(1, 'us')
    else:
        # this is quite fast and should yield UTC.
        # the conversion in utils happens to be good down to microseconds, so we can
        # do this in one go
        df['time'] = utils.unix_to_dt64(df['epoch'] + df['usec'] * 1e-6)

    # clean up time:
    bad_time = (df.time < np.datetime64('2018-01-01')) | (
        df.time > np.datetime64('2022-01-01'))
    df2 = df[~bad_time].copy()

    # clean up temperature:
    df2.loc[df.temp < -5, 'temp'] = np.nan
    df2.loc[df.temp > 35, 'temp'] = np.nan

    # clean up pressure
    df2.loc[df2.pressure > 160e3, 'pressure'] = np.nan
    df2.loc[df2.pressure < 110e3, 'pressure'] = np.nan

    # trim to first/last valid pressure
    valid_idx = np.nonzero(np.isfinite(df2.pressure.values))[0]
    df3 = df2.iloc[valid_idx[0]:valid_idx[-1] + 1, :].copy()

    df3['tag'] = [s.strip() for s in df3.tag.values]

    ds = xr.Dataset.from_dataframe(df3)

    if cf2_fn is not None:
        cf = pd.read_csv(cf2_fn, header=None)
        local_tag = cf.iloc[0, 1].strip()
        ds['beacon_id'] = local_tag

    return ds
예제 #2
0
def dbg_to_clock_changes(dbg_filename):
    """
    Scan the .DBG file for indications that the FPGA clock was changed.
    Returns a DataFrame with fpga_pre and fpga_post fields giving UTC time
    before and just after clock was changed. For unanticipated resets, the 
    DBG file just has hourly updates, so fpga_pre could be as much as an hour
    before the clock was actually reset.
    """
    dbg = pd.read_csv(dbg_filename, names=['time', 'message'])
    idx = 0
    clock_resets = []  # (time_before,msg_before,time_after, msg_after)

    # For reboots, track the last time we got a DBG message about the FPGA
    # clock
    last_fpga_status = dict(time_pre=None, msg_pre=None, fpga_pre=None)

    def parse_d(s):
        return datetime.datetime.strptime(s, '%m/%d/%Y %H:%M:%S')

    while idx < len(dbg):
        # Seem to get these status messages hourly
        # 03/24/2019 15:25:31, RTC: 1553469930.648437 FPGA: 1553469931.000051 dt=-0.351614
        m = re.match(r'\s*RTC: [0-9\.]+ FPGA: ([0-9\.]+) dt=.*',
                     dbg.message[idx])
        if m:
            last_fpga_status = dict(time_pre=parse_d(dbg.time.values[idx]),
                                    msg_pre=dbg.message[idx],
                                    fpga_pre=utils.unix_to_dt64(
                                        float(m.group(1))))
            idx += 1
            continue

        # 03/25/2019 11:04:45, FPGA started!! Init FPGA clock using RTC=1553540685.000000
        m = re.match(r'\s*FPGA started!! Init FPGA clock using RTC=([0-9\.]+)',
                     dbg.message[idx])
        if m:
            # this message also sets the new, running status going forward
            new_status = {}
            new_status['time_pre'] = last_fpga_status['time_post'] = parse_d(
                dbg.time.values[idx])
            new_status['msg_pre'] = last_fpga_status['msg_post'] = dbg.message[
                idx]
            new_status['fpga_pre'] = last_fpga_status[
                'fpga_post'] = utils.unix_to_dt64(float(m.group(1)))
            clock_resets.append(last_fpga_status)
            last_fpga_status = new_status
            idx += 1
            continue

        # And pick up sync events
        if '-Before SYNC' not in dbg.message[idx]:
            idx += 1
            continue
        before_msg = dbg.message[idx]
        after_msg = dbg.message[idx + 1]
        #  Things like
        #  03/13/2019 16:25:31, -Before SYNC: FPGA=1552523147.485166  RTC=1552523147.406250 dt=-0.078916
        #  03/13/2019 16:25:31, -After SYNC:  FPGA=1552523131.024603  RTC=1552523131.023437 dt=-0.001166
        assert '-After SYNC' in after_msg

        m1 = re.search(r'FPGA=([0-9\.]+)', before_msg)
        m2 = re.search(r'FPGA=([0-9\.]+)', after_msg)
        if m1:
            fpga_pre = utils.unix_to_dt64(float(m1.group(1)))
        else:
            fpga_pre = None
        if m2:
            fpga_post = utils.unix_to_dt64(float(m2.group(1)))
        else:
            fpga_post = None

        # dbg.time gives the timestamp of the log entry, but FPGA time is probably
        # what we actually want.
        clock_resets.append(
            dict(time_pre=parse_d(dbg.time.values[idx]),
                 time_post=parse_d(dbg.time.values[idx + 1]),
                 msg_pre=dbg.message[idx],
                 msg_post=dbg.message[idx + 1],
                 fpga_pre=fpga_pre,
                 fpga_post=fpga_post))
        idx += 2

    clock_resets = pd.DataFrame(clock_resets)
    return clock_resets
예제 #3
0
def parse_tek(
        det_fn,
        cf2_fn=None,
        name=None,
        pressure_range=[110e3, 225e3],
        auto_beacon=True,
        split_on_clock_change=True,
        time_range=[np.datetime64('2018-01-01'),
                    np.datetime64('2022-01-01')]):
    """
    det_fn: path to DET file with detection information
    cf2_fn: optional, read beacon id from CF2 file.  Will attempt to
      guess this if not specified. 
    name: string identifier added to dataset
    pressure_range: valid range of pressures, used to filter the time series
     to when the receiver was in the water.  pass None to skip any filtering.
    auto_beacon: if beacon tag ID cannot be read from CF2, this enables choosing
     the most common received tag as the beacon id.
    split_on_clock_change: if true, return a list of datasets, split up based on
      when logs indicated that the clock was updated.
    time_range: if specified, a start/stop datetime64 used to exclude known bad
      data. Defaults to very broad range to reject mis-parsed or mangled dates.
    """
    if cf2_fn is None:
        fn = det_fn.replace('.DET', '.CF2')
        if os.path.exists(fn):
            cf2_fn = fn

    df = pd.read_csv(det_fn,
                     names=[
                         'id', 'year', 'month', 'day', 'hour', 'minute',
                         'second', 'epoch', 'usec', 'tag', 'nbwQ', 'corrQ',
                         'num1', 'num2', 'one', 'pressure', 'temp'
                     ])

    # this is quite fast and should yield UTC.
    # the conversion in utils happens to be good down to microseconds, so we can
    # do this in one go
    df['time'] = utils.unix_to_dt64(df['epoch'] + df['usec'] * 1e-6)

    # clean up time:
    bad_time = (df.time < time_range[0]) | (df.time > time_range[1])
    df2 = df[~bad_time].copy()

    # clean up temperature:
    df2.loc[df.temp < -5, 'temp'] = np.nan
    df2.loc[df.temp > 35, 'temp'] = np.nan

    # clean up pressure
    # this had been limited to 160e3, but
    # AM9 has a bad calibration (or maybe it's just really deep)
    if pressure_range is not None:
        df2.loc[df2.pressure < pressure_range[0], 'pressure'] = np.nan
        df2.loc[df2.pressure > pressure_range[1], 'pressure'] = np.nan

    # trim to first/last valid pressure
    valid_idx = np.nonzero(np.isfinite(df2.pressure.values))[0]
    df3 = df2.iloc[valid_idx[0]:valid_idx[-1] + 1, :].copy()

    df3['tag'] = [s.strip() for s in df3.tag.values]

    ds = xr.Dataset.from_dataframe(df3)
    ds['det_filename'] = (), det_fn

    if name is not None:
        ds['name'] = (), name

    if cf2_fn is not None:
        # SM2 isn't getting the right value here.
        # looks like it would be FF13, but it never
        # hears FF13.
        cf = pd.read_csv(cf2_fn, header=None)
        local_tag = cf.iloc[0, 1].strip()
        ds['beacon_id'] = local_tag
        ds['cf2_filename'] = (), cf2_fn
    elif auto_beacon:
        beacon_id = df3.groupby('tag').size().sort_values().index[-1]
        ds['beacon_id'] = beacon_id
        ds['cf2_filename'] = None
        ds['beacon_id'].attrs['source'] = 'received tags'

    ds.attrs['pressure_range'] = pressure_range
    if split_on_clock_change:
        dbg_filename = ds.det_filename.item().replace('.DET', '.DBG')
        if not os.path.exists(dbg_filename):
            print("Split on clock: couldn't find %s" % dbg_filename)
            return [ds]
        else:
            all_clock_resets = dbg_to_clock_changes(dbg_filename)
            diced = dice_by_clock_resets(ds, all_clock_resets)
            return diced
    else:
        return ds
예제 #4
0
            route='San_Joaquin'
            exit_t=rec['sj_upper_first']
        else:
            route='Head_of_Old_River'
            exit_t=rec['hor_upper_first']
    else:
        route='no_exit'
        exit_t=np.nan
    df_ptm.loc[idx,'route']=route
    df_ptm.loc[idx,'exit_time']=exit_t

##

# 20201230: Rename the epoch timestamps, and make the
# entry_time and exit_time as string datetimes in PST.

df_ptm2=df_ptm.copy()
utc_to_pst=np.timedelta64(-8,'h')

for t_col in ['entry_time','exit_time','first_detection_time']:
    epo_col=t_col.replace('_time','_utc_epoch')
    pst_col=t_col+"_pst"
    df_ptm2[epo_col]=df_ptm2[t_col]
    df_ptm2[pst_col] = utc_to_pst + utils.unix_to_dt64(df_ptm2[epo_col].round())
    del df_ptm2[t_col]

##

df_ptm2.to_csv("screen_final-ptm_inputs-20201230.csv",index=False)

예제 #5
0
    #output_fn=os.path.join(output_path,os.path.basename(track_fn))
    print(idx)
    
    track=row[col_in]
    
    if len(track)<2:
        results.append(None)
        continue 

    track=track.copy()
    for fld in ['x','y','tnum']:
        fld_m=0.5*( track[fld].values[:-1] +
                   track[fld].values[1:] )
        track[fld+'_m']=np.r_[ fld_m, np.nan ]
        
    track['time_m']=utils.unix_to_dt64(track['tnum_m'].values)
    
    new_records=[]
    
    for i,seg in utils.progress( track.iloc[:-1,:].iterrows() ):
        rec={} # new fields to be added to segments.
        t=seg['time_m'].to_datetime64()
        run_i,t_i=quantize_time(t)

        rec['index']=i

        for slice_name,slice_def in z_slices:
            Uint=interpolator(run_i,t_i,**slice_def)
        
            # vorticity at centers
            x_samp=np.array( [ [seg['x_m']    , seg['y_m']      ],
    if not os.path.exists(local_file):
        if not os.path.exists(os.path.dirname(local_file)):
            os.makedirs(os.path.dirname(local_file))
        utils.download_url(url, local_file, on_abort='remove')
    return pd.read_csv(local_file, **kwargs)


##
# longer time scale trends

fig = plt.figure(37).clf()

fig, (ax, ax_lat, ax2, ax_dens) = plt.subplots(4, 1, sharex=True, num=37)
fig.set_size_inches((9.5, 9.0), forward=True)

times = utils.unix_to_dt64(df_start.t_mid.values)
ax.plot(times, df_start.mean_swim_urel, 'g.', label='Mean downstream swimming')
ax_lat.plot(times,
            df_start.mean_swim_lateral,
            'b.',
            label='Mean lateral swimming')
ax.legend(loc='upper right')
ax_lat.legend(loc='upper right')
fig.autofmt_xdate()
ax.axhline(0.0, color='0.6', lw=1.0)

# MSD flows
msd_flow = fetch_and_parse(
    local_file="env_data/msd/flow-2018.csv",
    url=
    "http://wdl.water.ca.gov/waterdatalibrary/docs/Hydstra/docs/B95820Q/2018/FLOW_15-MINUTE_DATA_DATA.CSV",
예제 #7
0
# Add release column to df_start, then
# split df_start into df_upper and df_lower
df_with_release = df_start.merge(tagged_fish[['TagID_Hex', 'release']],
                                 left_index=True,
                                 right_on='TagID_Hex')

# 103 of the tracks were from the lower release.
# 30 tracks from the upper release.
print(df_with_release.groupby('release').size())

df_upper = df_with_release[df_with_release['release'] == 'upper']
df_lower = df_with_release[df_with_release['release'] == 'lower']

##

times = utils.unix_to_dt64(df_start.t_mid.values)
pad = np.timedelta64(1, 'D')
t_min = min(times.min(), tag_releases.min()) - pad
t_max = max(times.max(), tag_releases.max()) + pad

# MSD flows
msd_flow = common.msd_flow(np.datetime64("2018-03-01"),
                           np.datetime64("2018-04-20"))
# fetch_and_parse(local_file="env_data/msd/flow-2018.csv",
#                          url="http://wdl.water.ca.gov/waterdatalibrary/docs/Hydstra/docs/B95820Q/2018/FLOW_15-MINUTE_DATA_DATA.CSV",
#                          skiprows=3,parse_dates=['time'],names=['time','flow_cfs','quality','notes'])

msd_turb = cdec.cdec_dataset('MSD',
                             t_min,
                             t_max,
                             sensor=27,