Esempio n. 1
0
def show_lowe_wfs():
    """
    separate function to show really low-e waveforms after the data cleaning cut
    """
    f_raw = '/Users/wisecg/Data/OPPI/raw/oppi_run0_cyc2027_raw.lh5'
    f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5'

    tb_name = 'ORSIS3302DecoderForEnergy/raw'
    hit_store = lh5.Store()
    data = hit_store.read_object(tb_name, f_hit)
    df_hit = data.get_dataframe()

    # correct energy_first (inplace) to allow negative values
    df_hit['energy_first'] = df_hit['energy_first'].astype(np.int64)
    efirst = df_hit['energy_first'].values
    idx = np.where(efirst > 4e9)
    eshift = efirst[idx] - 4294967295
    efirst[idx] = eshift

    nwfs = 40
    elo, ehi, epb = 1, 10, 0.1
    blo, bhi = 57700, 58500  # cut values
    etype = 'trapE_cal'  # noise stops @ 35 keV

    idx_lowe = df_hit[etype].loc[(df_hit[etype] > elo) & (df_hit[etype] < ehi)
                                 & (df_hit.bl > blo) & (df_hit.bl < bhi)]
    idx_lowe = idx_lowe.index[:nwfs]
    # print(df_hit.loc[idx_lowe])

    # get phys waveforms, normalized by max value
    i_max = idx_lowe[-1]

    raw_store = lh5.Store()
    data_raw = raw_store.read_object(tb_name,
                                     f_raw,
                                     start_row=0,
                                     n_rows=i_max + 1)

    wfs = data_raw['waveform']['values'].nda
    wfs_lowe = wfs[idx_lowe.values, :]
    ts = np.arange(0, wfs_lowe.shape[1], 1)

    # plot wfs
    for iwf in range(wfs_lowe.shape[0]):
        plt.plot(ts, wfs_lowe[iwf, :], lw=1, alpha=0.5)

    plt.xlabel('time (clock ticks)', ha='right', x=1)
    plt.ylabel('ADC', ha='right', y=1)
    # plt.show()
    plt.savefig('./plots/lowe_wfs.png', dpi=300)
    plt.cla()
Esempio n. 2
0
def show_raw_spectrum():
    """
    show spectrum w/ onbd energy and trapE
    - get calibration constants for onbd energy and 'trapE' energy
    - TODO: fit each expected peak and get resolution vs energy
    """
    f_dsp = '/Users/wisecg/Data/OPPI/dsp/oppi_run0_cyc2027_dsp_test.lh5'

    # we will probably make this part simpler in the near future
    sto = lh5.Store()
    groups = sto.ls(f_dsp)
    data = sto.read_object('ORSIS3302DecoderForEnergy/raw', f_dsp)
    df_dsp = data.get_dataframe()

    # from here, we can use standard pandas to work with data
    print(df_dsp)

    # elo, ehi, epb, etype = 0, 8e6, 1000, 'energy'
    # elo, ehi, epb, etype = 0, 8e6, 1000, 'energy' # whole spectrum
    # elo, ehi, epb, etype = 0, 800000, 1000, 'energy' # < 250 keV
    elo, ehi, epb, etype = 0, 10000, 10, 'trapE'

    ene_uncal = df_dsp[etype]
    hist, bins, _ = pgh.get_hist(ene_uncal, range=(elo, ehi), dx=epb)
    bins = bins[1:]  # trim zero bin, not needed with ds='steps'

    plt.plot(bins, hist, ds='steps', c='b', lw=2, label=etype)
    plt.xlabel(etype, ha='right', x=1)
    plt.ylabel('Counts', ha='right', y=1)
    plt.legend()
    plt.tight_layout()
    plt.show()
Esempio n. 3
0
def show_cal_spectrum():
    """
    """
    f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5'
    tb_name = 'ORSIS3302DecoderForEnergy/raw'
    sto = lh5.Store()
    groups = sto.ls(f_hit)
    data = sto.read_object(tb_name, f_hit)
    df_hit = data.get_dataframe()

    print(df_hit)

    # energy in keV
    elo, ehi, epb = 0, 3000, 0.5

    # choose energy estimator
    etype = 'energy_cal'
    # etype = 'trapE_cal'

    hist, bins, _ = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb)
    bins = bins[1:]  # trim zero bin, not needed with ds='steps'

    plt.plot(bins, hist, ds='steps', c='b', lw=2, label=etype)
    plt.xlabel(etype, ha='right', x=1)
    plt.ylabel('Counts', ha='right', y=1)
    plt.legend()
    plt.tight_layout()
    plt.show()
Esempio n. 4
0
def main():
    """
    an example of loading an LH5 DSP file and converting to pandas DataFrame.
    """
    # we will probably make this part simpler in the near future
    f = '/Users/wisecg/Data/lh5/hades_I02160A_r1_191021T162944_th_HS2_top_psa_dsp.lh5'
    sto = lh5.Store()
    groups = sto.ls(f) # the example file only has one group, 'raw'
    data = sto.read_object('raw', f)
    df_dsp = data.get_dataframe()

    # from here, we can use standard pandas to work with data
    print(df_dsp)
    
    # one example: create uncalibrated energy spectrum,
    # using a pygama helper function to get the histogram
    
    elo, ehi, epb = 0, 100000, 10
    ene_uncal = df_dsp['trapE']
    hist, bins, _ = pgh.get_hist(ene_uncal, range=(elo, ehi), dx=epb)
    bins = bins[1:] # trim zero bin, not needed with ds='steps'

    plt.semilogy(bins, hist, ds='steps', c='b', label='trapE')
    plt.xlabel('trapE', ha='right', x=1)
    plt.ylabel('Counts', ha='right', y=1)
    plt.legend()
    plt.tight_layout()
    plt.show()
Esempio n. 5
0
def get_runtimes(dg):
    """
    $ ./setup.py --runtime
    
    Get the Ge runtime of each cycle file (in seconds).  
    Add a 'ge_runtime' column to the fileDB.
    Requires the raw LH5 files.
    """
    dg.load_df()
    # dg.fileDB = dg.fileDB[50:55] # debug only

    # reset columns of interest
    new_cols = ['runtime', 'rt_std']
    for col in new_cols:
        if col in dg.fileDB.columns:
            dg.fileDB.drop(col, axis=1, inplace=True)

    sto = lh5.Store()

    t_start = time.time()

    def runtime_cycle(df_row):

        # load raw file path (with {these} in it)
        f_raw = f'{dg.lh5_dir}/{df_row.raw_path}/{df_row.raw_file}'
        f_raw = f_raw.format_map({'sysn': 'geds'})

        # always look for Ge
        f_key = df_row.raw_file.format_map({'sysn': 'geds'})
        if not os.path.exists(f_raw):
            # print(f'no Ge data: {f_key}')
            return pd.Series({'runtime': 0, 'rt_std': 0})

        # for PGT, compare the first three channels (for redundancy)
        rts = []
        ge_groups = sto.ls(f_raw)
        for ge in ge_groups[:3]:
            ts = lh5.load_nda([f_raw], ['timestamp'],
                              ge + '/raw/')['timestamp']
            rts.append(ts[-1])

        # take largest value & compute uncertainty
        runtime = max(rts) / 60
        rt_std = np.std(np.array([rts]))
        # print(f_key, runtime, rt_std)

        return pd.Series({'runtime': runtime, 'rt_std': rt_std})

    # df_tmp = dg.fileDB.apply(runtime_cycle, axis=1)
    dg.fileDB[new_cols] = dg.fileDB.progress_apply(runtime_cycle, axis=1)

    print(f'Done. Time elapsed: {(time.time()-t_start)/60:.2f} mins.')

    # save to fileDB if everything looks OK
    print(dg.fileDB)
    print(dg.fileDB.columns)
    print('FileDB location:', dg.config['fileDB'])
    ans = input('Save new fileDB? (y/n)')
    if ans.lower() == 'y':
        dg.save_df(dg.config['fileDB'])
Esempio n. 6
0
def dsp_to_hit():
    """
    save calibrated energies into the dsp file.
    this is a good example of adding a column, reading & writing to an LH5 file.
    """
    f_dsp = '/Users/wisecg/Data/OPPI/dsp/oppi_run0_cyc2027_dsp_test.lh5'
    f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5'
    sto = lh5.Store()
    groups = sto.ls(f_dsp)
    tb_name = 'ORSIS3302DecoderForEnergy/raw'
    data = sto.read_object(tb_name, f_dsp)
    df_dsp = data.get_dataframe()

    # add a new column for each energy estimator of interest
    for etype in ['energy', 'trapE']:
        ecal_name = etype + '_cal'
        pfit = linear_cal(etype)
        df_dsp[ecal_name] = df_dsp[etype] * pfit[0] + pfit[1]

        e_cal_lh5 = lh5.Array(df_dsp[ecal_name].values, attrs={'units': 'keV'})
        data.add_field(f'{etype}_cal', e_cal_lh5)

    # write to hit file.  delete if exists, LH5 overwrite is broken rn
    if os.path.exists(f_hit):
        os.remove(f_hit)
    sto.write_object(data, tb_name, f_hit)
Esempio n. 7
0
def show_wfs(dg):
    """
    show waveforms in different enery regions.
    use the hit file to select events
    """
    # get file list and load hit data
    lh5_dir = os.path.expandvars(dg.config['lh5_dir'])
    hit_list = lh5_dir + dg.file_keys['hit_path'] + '/' + dg.file_keys[
        'hit_file']
    df_hit = lh5.load_dfs(hit_list, ['trapEmax'],
                          'ORSIS3302DecoderForEnergy/hit')
    print(df_hit)
    print(df_hit.columns)

    # settings
    etype = 'trapEmax_cal'
    nwfs = 20
    # elo, ehi, epb = 0, 100, 0.2 # low-e region
    elo, ehi, epb = 0, 20, 0.2  # noise region
    # elo, ehi, epb = 1458, 1468, 1 # good physics events
    # elo, ehi, epb = 6175, 6250, 1 # overflow peak
    # elo, ehi, epb = 5000, 5200, 0.2 # lower overflow peak

    # # diagnostic plot
    # hE, xE, vE = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb)
    # plt.plot(xE[1:], hE, c='b', ds='steps')
    # plt.show()
    # exit()

    # select waveforms
    idx = df_hit[etype].loc[(df_hit[etype] >= elo)
                            & (df_hit[etype] <= ehi)].index[:nwfs]
    raw_store = lh5.Store()
    tb_name = 'ORSIS3302DecoderForEnergy/raw'
    raw_list = lh5_dir + dg.file_keys['raw_path'] + '/' + dg.file_keys[
        'raw_file']
    f_raw = raw_list.values[0]  # fixme, only works for one file rn
    data_raw = raw_store.read_object(tb_name,
                                     f_raw,
                                     start_row=0,
                                     n_rows=idx[-1] + 1)

    wfs_all = data_raw['waveform']['values'].nda
    wfs = wfs_all[idx.values, :]
    ts = np.arange(0, wfs.shape[1], 1)

    # plot wfs
    for iwf in range(wfs.shape[0]):
        plt.plot(ts, wfs[iwf, :], lw=1)

    plt.xlabel('time (clock ticks)', ha='right', x=1)
    plt.ylabel('ADC', ha='right', y=1)
    plt.show()
Esempio n. 8
0
def get_data(files, groupname, e_param='trapE'):
    """
     loop over file list, access energy array from LH5, concat arrays together
    return array
    """
    dsp = lh5.Store()
    energies = []

    if isinstance(files, list):

        for file in files:
            filename = os.path.expandvars(file)
            data = dsp.read_object(groupname, filename)
            energy = data[e_param].nda
            energies.extend(energy)
    else:
        filename = os.path.expandvars(files)
        data = dsp.read_object(groupname, filename)
        energy = data[e_param].nda
        energies.extend(energy)

    return np.asarray(energies)
Esempio n. 9
0
def show_groups():
    """
    show example of accessing the names of the HDF5 groups in our LH5 files
    """
    f_raw = '/Users/wisecg/Data/OPPI/raw/oppi_run0_cyc2027_raw.lh5'
    f_dsp = '/Users/wisecg/Data/OPPI/dsp/oppi_run0_cyc2027_dsp_test.lh5'
    f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5'

    # h5py method
    # hf = h5py.File(f_raw)
    # hf = h5py.File(f_dsp)

    # some examples of navigating the groups
    # print(hf.keys())
    # print(hf['ORSIS3302DecoderForEnergy/raw'].keys())
    # print(hf['ORSIS3302DecoderForEnergy/raw/waveform'].keys())
    # exit()

    # lh5 method
    sto = lh5.Store()
    groups = sto.ls(f_dsp)
    data = sto.read_object('ORSIS3302DecoderForEnergy/raw', f_dsp)

    # testing -- make sure data columns all have same shape
    for col in data.keys():
        print(col, data[col].nda.shape)

    # directly access timestamps in a raw file w/o loading all the wfs
    # groups = sto.ls(f_raw, 'ORSIS3302DecoderForEnergy/raw/')
    # data = sto.read_object('ORSIS3302DecoderForEnergy/raw/timestamp', f_raw)
    # ts = data.nda

    # check pandas conversion
    df_dsp = data.get_dataframe()
    print(df_dsp.columns)
    print(df_dsp)
Esempio n. 10
0
def check_wfs(dg):
    """
    somebody inevitably asks you, 'have you looked at the waveforms?'
    in this function, compare alpha wfs to gamma wfs
    
    use the temp_results file to pick indexes, and grab the corresponding
    wfs.  LH5 doesn't let us only load particular indexes (yet), so we
    have to load all the waveforms in the file every time.  butts.
    """
    # load dsp results
    cycle = dg.fileDB['cycle'].values[0]
    df_dsp = pd.read_hdf(f'./temp_{cycle}.h5', 'opt_dcr')

    # load waveforms
    sto = lh5.Store()
    lh5_dir = os.path.expandvars(dg.config['lh5_dir'])
    raw_list = lh5_dir + dg.fileDB['raw_path'] + '/' + dg.fileDB['raw_file']
    f_raw = raw_list.values[0]
    tb_wfs = sto.read_object('ORSIS3302DecoderForEnergy/raw/waveform', f_raw)

    # energy cut
    et = 'trapEmax'
    # elo, ehi = 8000, 16000
    # elo, ehi = 8000, 10000
    elo, ehi = 12000, 13000

    # dcr cut
    # alp_lo, alp_hi = -0.5, 0.5
    # gam_lo, gam_hi = 0.8, 1.2

    # aoe cut
    alp_lo, alp_hi = 0.064, 0.068
    gam_lo, gam_hi = 0.05, 0.06

    # selection
    idx_alp = df_dsp[et].loc[(df_dsp[et] > elo) & (df_dsp[et] < ehi) &
                             (df_dsp.aoe > alp_lo) &
                             (df_dsp.aoe < alp_hi)].index

    idx_gam = df_dsp[et].loc[(df_dsp[et] > elo) & (df_dsp[et] < ehi) &
                             (df_dsp.aoe > gam_lo) &
                             (df_dsp.aoe < gam_hi)].index

    wfs_alp = tb_wfs['values'].nda[idx_alp]
    wfs_gam = tb_wfs['values'].nda[idx_gam]

    print(f'found {wfs_alp.shape[0]} alpha candidates')
    print(f'found {wfs_gam.shape[0]} gamma candidates')

    # plot
    # fig, (p0, p1) = plt.subplots(2, 1, figsize=(8, 8))

    ts = np.arange(0, wfs_gam.shape[1], 1)

    n_gam = 10 if wfs_gam.shape[0] > 10 else wfs_gam.shape[0]
    for iwf in range(n_gam):
        max = np.amax(wfs_gam[iwf, :])
        # max = df_dsp[et].values[iwf]
        plt.plot(ts[:-1], wfs_gam[iwf, :-1] / max, '-b', lw=1, alpha=0.5)

    n_alp = 10 if wfs_alp.shape[0] > 10 else wfs_alp.shape[0]
    for iwf in range(n_alp):
        max = np.amax(wfs_alp[iwf, :])
        # max = df_dsp[et].values[iwf]
        plt.plot(ts[:-1], wfs_alp[iwf, :-1] / max, '-r', lw=1, alpha=0.5)

    # plt.xlim(1

    plt.xlabel('time (clock ticks)', ha='right', x=1)
    plt.ylabel('ADC', ha='right', y=1)
    plt.show()
Esempio n. 11
0
def optimize_dcr(dg):
    """
    I don't have an a priori figure of merit for the DCR parameter, until I can
    verify that we're seeing alphas.  So this function should just run processing
    on a CAGE run with known alpha events, and show you the 2d DCR vs. energy.
    
    Once we know we can reliably measure the alpha distribution somehow, then
    perhaps we can try a grid search optimization like the one done in 
    optimize_trap.
    """
    # files to consider.  fixme: right now only works with one file
    sto = lh5.Store()
    lh5_dir = os.path.expandvars(dg.config['lh5_dir'])
    raw_list = lh5_dir + dg.fileDB['raw_path'] + '/' + dg.fileDB['raw_file']
    f_raw = raw_list.values[0]

    tb_raw = 'ORSIS3302DecoderForEnergy/raw/'
    tb_data = sto.read_object(tb_raw, f_raw)

    cycle = dg.fileDB['cycle'].values[0]
    f_results = f'./temp_{cycle}.h5'

    write_output = True

    # adjust dsp config
    with open('opt_dcr.json') as f:
        dsp_config = json.load(f, object_pairs_hook=OrderedDict)
    # pprint(dsp_config)
    # exit()

    # set dcr parameters
    # rise, flat, dcr_tstart = 200, 1000, 'tp_0+1.5*us' # default
    # dcr_rise, dcr_flat, dcr_tstart = 100, 3000, 'tp_0+3*us' # best so far?
    dcr_rise, dcr_flat, dcr_tstart = 100, 2500, 'tp_0+1*us'
    dsp_config['processors']['dcr_raw']['args'][1] = dcr_rise
    dsp_config['processors']['dcr_raw']['args'][2] = dcr_flat
    dsp_config['processors']['dcr_raw']['args'][3] = dcr_tstart

    # set trap energy parameters
    # ene_rise, ene_flat = "2*us", "1*us" # best? from optimize_trap
    ene_rise, ene_flat = "10*us", "5*us"
    dsp_config['processors']['wf_trap']['args'][1] = ene_rise
    dsp_config['processors']['wf_trap']['args'][2] = ene_flat

    # adjust pole-zero constant
    dsp_config['processors']['wf_pz']['defaults']['db.pz.tau'] = '64.4*us'
    # dsp_config['processors']['wf_pz']['defaults']['db.pz.tau'] = '50*us'
    # dsp_config['processors']['wf_pz']['defaults']['db.pz.tau'] = '100*us'

    # run dsp
    print('Running DSP ...')
    t_start = time.time()
    pc, tb_out = build_processing_chain(tb_data, dsp_config, verbosity=1)
    pc.execute()
    t_elap = (time.time() - t_start) / 60
    print(f'Done.  Elapsed: {t_elap:.2f} min')

    df_out = tb_out.get_dataframe()

    if write_output:
        df_out.to_hdf(f_results, key='opt_dcr')
        print('Wrote output file:', f_results)
Esempio n. 12
0
def raw_to_dsp(f_raw,
               f_dsp,
               dsp_config,
               lh5_tables=None,
               verbose=1,
               outputs=None,
               n_max=np.inf,
               overwrite=True,
               buffer_len=3200,
               block_width=8):
    """
    Uses the ProcessingChain class.
    The list of processors is specifed via a JSON file.
    """
    t_start = time.time()

    if isinstance(dsp_config, str):
        with open(dsp_config, 'r') as config_file:
            dsp_config = json.load(config_file, object_pairs_hook=OrderedDict)

    if not isinstance(dsp_config, dict):
        raise Exception('Error, dsp_config must be an dict')

    raw_store = lh5.Store()
    lh5_file = raw_store.gimme_file(f_raw, 'r')

    # if no group is specified, assume we want to decode every table in the file
    if lh5_tables is None:
        lh5_tables = []
        lh5_tables_temp = raw_store.ls(f_raw)

        # sometimes 'raw' is nested, e.g g024/raw
        for tb in lh5_tables_temp:
            if "raw" not in tb:
                tbname = raw_store.ls(lh5_file[tb])[0]
                if "raw" in tbname:
                    tb = tb + '/' + tbname  # g024 + /raw
            lh5_tables.append(tb)

    # make sure every group points to waveforms, if not, remove the group
    for tb in lh5_tables:
        if 'raw' not in tb:
            lh5_tables.remove(tb)

    # delete the old file. TODO: ONCE BUGS ARE FIXED IN LH5 MODULE, DO THIS ONLY IF OVERWRITE IS TRUE!
    try:
        os.remove(f_dsp)
        print("Deleted", f_dsp)
    except:
        pass

    for tb in lh5_tables:
        # load primary table and build processing chain and output table
        tot_n_rows = raw_store.read_n_rows(tb, f_raw)
        if n_max and n_max < tot_n_rows: tot_n_rows = n_max

        lh5_in, n_rows_read = raw_store.read_object(tb, f_raw, 0, buffer_len)
        pc, tb_out = build_processing_chain(lh5_in, dsp_config, outputs,
                                            verbose, block_width)

        print(f'Processing table: {tb} ...')
        for start_row in range(0, tot_n_rows, buffer_len):
            if verbose > 0:
                update_progress(start_row / tot_n_rows)
            lh5_in, n_rows = raw_store.read_object(tb,
                                                   f_raw,
                                                   start_row=start_row,
                                                   obj_buf=lh5_in)
            n_rows = min(tot_n_rows - start_row, n_rows)
            pc.execute(0, n_rows)
            raw_store.write_object(tb_out,
                                   tb.replace('/raw', '/dsp'),
                                   f_dsp,
                                   n_rows=n_rows)

        if verbose > 0:
            update_progress(1)
        print(f'Done.  Writing to file ...')

    # write processing metadata
    dsp_info = lh5.Struct()
    dsp_info.add_field('timestamp', lh5.Scalar(np.uint64(time.time())))
    dsp_info.add_field('python_version', lh5.Scalar(sys.version))
    dsp_info.add_field('numpy_version', lh5.Scalar(np.version.version))
    dsp_info.add_field('h5py_version', lh5.Scalar(h5py.version.version))
    dsp_info.add_field('hdf5_version', lh5.Scalar(h5py.version.hdf5_version))
    dsp_info.add_field('pygama_version', lh5.Scalar(pygama_version))
    dsp_info.add_field('pygama_branch', lh5.Scalar(git.branch))
    dsp_info.add_field('pygama_revision', lh5.Scalar(git.revision))
    dsp_info.add_field('pygama_date', lh5.Scalar(git.commit_date))
    dsp_info.add_field('dsp_config',
                       lh5.Scalar(json.dumps(dsp_config, indent=2)))
    raw_store.write_object(dsp_info, 'dsp_info', f_dsp)

    t_elap = (time.time() - t_start) / 60
    print(f'Done processing.  Time elapsed: {t_elap:.2f} min.')
Esempio n. 13
0
if len(sys.argv) != 5:
    print('Usage: python', sys.argv[0],
          '[filename] [table_path] [buffer_size] [arr_col]')
    print(
        '  where arr_col is the name of an Array-like object in one of the table columns.'
    )
    sys.exit()

filename = sys.argv[1]
name = sys.argv[2]
buffer_size = int(sys.argv[3])
arr_col = sys.argv[4]
n_iter = 4

test_rows = n_iter * buffer_size
store = lh5.Store()

comp_table, n_rows_read = store.read_object(name, filename, n_rows=test_rows)

table_buf = store.get_buffer(name, filename, size=buffer_size)

success_its = 0
for i_it in range(n_iter):
    print('iteration', i_it)
    start_row = i_it * buffer_size
    table_buf, n_rows_read = store.read_object(name,
                                               filename,
                                               start_row=start_row,
                                               obj_buf=table_buf)
    if n_rows_read == 0:
        print('n_rows_read = 0')
Esempio n. 14
0
    def __init__(self,
                 files_in,
                 lh5_group,
                 dsp_config=None,
                 n_drawn=1,
                 x_unit='ns',
                 x_lim=None,
                 waveforms='waveform',
                 lines=None,
                 legend=None,
                 norm=None,
                 align=None,
                 selection=None,
                 buffer_len=128,
                 block_width=8,
                 verbosity=1):
        """Constructor for WaveformBrowser:
        - file_in: name of file or list of names to browse. Can use wildcards
        - lh5_group: name of LH5 group in file to browse
        - dsp_config (optional): name of DSP config json file containing transforms available to draw
        - n_drawn (default 1): number of events to draw simultaneously when calling DrawNext
        - x_unit (default ns): unit for x-axis
        - x_lim (default auto): range of x-values passes as tuple
        - waveforms (default 'waveform'): name of wf or list of wf names to draw
        - lines (default None): name of parameter or list of parameters to draw hlines and vlines for
        - legend (default None): name of parameters to include in legend
        - norm (default None): name of parameter (probably energy) to use to normalize WFs; useful when drawing multiple
        - align (default None): name of time parameter to set as 0 time; useful for aligning multiple waveforms
        - selection (optional): selection of events to draw. Can be either a list of event indices or a numpy array mask (ala pandas).
        - buffer_len (default 128): number of waveforms to keep in memory at a time
        - block_width (default 8): block width for processing chain
        """
        self.verbosity = verbosity

        # data i/o initialization
        self.lh5_st = lh5.Store(keep_open=True)
        if isinstance(files_in, str): files_in = [files_in]

        # Expand wildcards and map out the files
        self.lh5_files = [
            f for f_wc in files_in
            for f in sorted(glob.glob(os.path.expandvars(f_wc)))
        ]
        self.lh5_group = lh5_group
        # file map is cumulative lenght of files up to file n. By doing searchsorted left, we can get the file for a given wf index
        self.file_map = np.array(
            [self.lh5_st.read_n_rows(lh5_group, f) for f in self.lh5_files],
            'int64')
        np.cumsum(self.file_map, out=self.file_map)

        # Get the input buffer and read the first chunk
        self.lh5_in = self.lh5_st.get_buffer(self.lh5_group, self.lh5_files[0],
                                             buffer_len)
        self.lh5_st.read_object(self.lh5_group, self.lh5_files[0], 0,
                                buffer_len, self.lh5_in)
        self.buffer_len = buffer_len
        self.current_file = None
        self.current_chunk = None

        # initialize stuff for iteration
        self.selection = selection
        self.index_it = None
        self.reset()
        self.n_drawn = n_drawn

        # initialize list of objects to draw
        if isinstance(waveforms, str): self.waveforms = [waveforms]
        elif waveforms is None: self.waveforms = []
        else: self.waveforms = list(waveforms)

        if isinstance(lines, str): self.lines = [lines]
        elif lines is None: self.lines = []
        else: self.lines = list(lines)

        if isinstance(legend, str): self.legend = [legend]
        elif legend is None: self.legend = []
        else: self.legend = list(legend)
        self.labels = []

        self.norm_par = norm
        self.align_par = align

        self.x_unit = units.unit_parser.parse_unit(x_unit)
        self.x_lim = x_lim

        # make processing chain and output buffer
        outputs = self.waveforms + self.lines + self.legend + (
            [self.norm_par] if self.norm_par is not None else
            []) + ([self.align_par] if self.align_par is not None else [])
        self.proc_chain, self.lh5_out = build_processing_chain(
            self.lh5_in,
            dsp_config,
            outputs,
            verbosity=self.verbosity,
            block_width=block_width)

        self.fig = None
        self.ax = None
Esempio n. 15
0
def raw_to_dsp(ds, overwrite=False, nevt=None, test=False, verbose=2, block=8, group=''):
    """
    Run raw_to_dsp on a set of runs.
    [raw file] ---> [dsp_run{}.lh5] (digital signal processing results)
    """
    for run in ds.runs:
        raw_file = "/lfs/l1/legend/users/dandrea/pygama/pgt/tier1/pgt_longtrace_run0117-20200110-105115-calib_raw.lh5"
        dsp_file = "/lfs/l1/legend/users/dandrea/pygama/pgt/tier2/pgt_longtrace_run0117-20200110-105115-calib_dsp.lh5"
        #raw_file = ds.paths[run]["raw_path"]
        #dsp_file = ds.paths[run]["dsp_path"]
        print("raw_file: ",raw_file)
        print("dsp_file: ",dsp_file)
        if dsp_file is not None and overwrite is False:
            continue

        if dsp_file is None:
            # declare new file name
            dsp_file = raw_file.replace('raw_', 'dsp_')
            
        if test:
            print("test mode (dry run), processing raw file:", raw_file)
            continue
            
        print("Definition of new LH5 version")
        #f_lh5 = lh5.Store()
        #data = f_lh5.read_object("raw", raw_file)
        #wf_in = data['waveform']['values'].nda
        #dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units'])
        
        lh5_in = lh5.Store()
        #groups = lh5_in.ls(raw_file, group)
        f = h5py.File(raw_file,'r')
        print("File info: ",f.keys())
        for group in f.keys():
            print("Processing: " + raw_file + '/' + group)
            #data = lh5_in.read_object(group, raw_file)
            data =  f[group]['raw']
            
            #wf_in = data['waveform']['values'].nda
            #dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units'])
            wf_in = data['waveform']['values'][()]
            dt = data['waveform']['dt'][0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units'])
            
            # Parameters for DCR calculation
            dcr_trap_int = 200
            dcr_trap_flat = 1000
            dcr_trap_startSample = 1200
            
            # Set up processing chain
            proc = ProcessingChain(block_width=block, clock_unit=dt, verbosity=verbose)
            proc.add_input_buffer("wf", wf_in, dtype='float32')
            
            # Basic Filters
            proc.add_processor(mean_stdev, "wf[0:1000]", "bl", "bl_sig")
            proc.add_processor(np.subtract, "wf", "bl", "wf_blsub")
            proc.add_processor(pole_zero, "wf_blsub", 145*us, "wf_pz")
            proc.add_processor(trap_norm, "wf_pz", 10*us, 5*us, "wf_trap")
            proc.add_processor(asymTrapFilter, "wf_pz", 0.05*us, 2*us, 4*us, "wf_atrap")
            
            # Timepoint calculation
            proc.add_processor(np.argmax, "wf_blsub", 1, "t_max", signature='(n),()->()', types=['fi->i'])
            proc.add_processor(time_point_frac, "wf_blsub", 0.95, "t_max", "tp_95")
            proc.add_processor(time_point_frac, "wf_blsub", 0.8, "t_max", "tp_80")
            proc.add_processor(time_point_frac, "wf_blsub", 0.5, "t_max", "tp_50")
            proc.add_processor(time_point_frac, "wf_blsub", 0.2, "t_max", "tp_20")
            proc.add_processor(time_point_frac, "wf_blsub", 0.05, "t_max", "tp_05")
            proc.add_processor(time_point_thresh, "wf_atrap[0:2000]", 0, "tp_0")
            
            # Energy calculation
            proc.add_processor(np.amax, "wf_trap", 1, "trapEmax", signature='(n),()->()', types=['fi->f'])
            proc.add_processor(fixed_time_pickoff, "wf_trap", "tp_0+(5*us+9*us)", "trapEftp")
            proc.add_processor(trap_pickoff, "wf_pz", 1.5*us, 0, "tp_0", "ct_corr")
            
            # Current calculation
            proc.add_processor(avg_current, "wf_pz", 10, "curr(len(wf_pz)-10, f)")
            proc.add_processor(np.amax, "curr", 1, "curr_amp", signature='(n),()->()', types=['fi->f'])
            proc.add_processor(np.divide, "curr_amp", "trapEftp", "aoe")

            # DCR calculation: use slope using 1000 samples apart and averaging 200
            # samples, with the start 1.5 us offset from t0
            proc.add_processor(trap_pickoff, "wf_pz", 200, 1000, "tp_0+1.5*us", "dcr_unnorm")
            proc.add_processor(np.divide, "dcr_unnorm", "trapEftp", "dcr")
            
            # Tail slope. Basically the same as DCR, except with no PZ correction
            proc.add_processor(linear_fit, "wf_blsub[3000:]", "wf_b", "wf_m")
            proc.add_processor(np.divide, "-wf_b", "wf_m", "tail_rc")            
            
            #add zac filter energy calculation
            sigma = 10*us
            flat = 1*us
            decay = 160*us
            proc.add_processor(zac_filter, "wf", sigma, flat, decay, "wf_zac(101, f)")
            proc.add_processor(np.amax, "wf_zac", 1, "zacE", signature='(n),()->()', types=['fi->f'])
            
            # Set up the LH5 output
            lh5_out = lh5.Table(size=proc._buffer_len)
            lh5_out.add_field("zacE", lh5.Array(proc.get_output_buffer("zacE"), attrs={"units":"ADC"}))
            lh5_out.add_field("trapEmax", lh5.Array(proc.get_output_buffer("trapEmax"), attrs={"units":"ADC"}))
            lh5_out.add_field("trapEftp", lh5.Array(proc.get_output_buffer("trapEftp"), attrs={"units":"ADC"}))
            lh5_out.add_field("ct_corr", lh5.Array(proc.get_output_buffer("ct_corr"), attrs={"units":"ADC*ns"}))
            lh5_out.add_field("bl", lh5.Array(proc.get_output_buffer("bl"), attrs={"units":"ADC"}))
            lh5_out.add_field("bl_sig", lh5.Array(proc.get_output_buffer("bl_sig"), attrs={"units":"ADC"}))
            lh5_out.add_field("A", lh5.Array(proc.get_output_buffer("curr_amp"), attrs={"units":"ADC"}))
            lh5_out.add_field("AoE", lh5.Array(proc.get_output_buffer("aoe"), attrs={"units":"ADC"}))
            lh5_out.add_field("dcr", lh5.Array(proc.get_output_buffer("dcr"), attrs={"units":"ADC"}))
            
            lh5_out.add_field("tp_max", lh5.Array(proc.get_output_buffer("tp_95", unit=us), attrs={"units":"us"}))
            lh5_out.add_field("tp_95", lh5.Array(proc.get_output_buffer("tp_95", unit=us), attrs={"units":"us"}))
            lh5_out.add_field("tp_80", lh5.Array(proc.get_output_buffer("tp_80", unit=us), attrs={"units":"us"}))
            lh5_out.add_field("tp_50", lh5.Array(proc.get_output_buffer("tp_50", unit=us), attrs={"units":"us"}))
            lh5_out.add_field("tp_20", lh5.Array(proc.get_output_buffer("tp_20", unit=us), attrs={"units":"us"}))
            lh5_out.add_field("tp_05", lh5.Array(proc.get_output_buffer("tp_05", unit=us), attrs={"units":"us"}))
            lh5_out.add_field("tp_0", lh5.Array(proc.get_output_buffer("tp_0", unit=us), attrs={"units":"us"}))
            lh5_out.add_field("tail_rc", lh5.Array(proc.get_output_buffer("tail_rc", unit=us), attrs={"units":"us"}))
            
            print("Processing:\n",proc)
            proc.execute()
            
            #groupname = group[:group.rfind('/')+1]+"data"
            groupname = group+"/data"
            print("Writing to: " + dsp_file + "/" + groupname)
            lh5_in.write_object(lh5_out, groupname, dsp_file)
Esempio n. 16
0
def get_runtimes(dg, overwrite=False, batch_mode=False):
    """
    $ ./setup.py --rt
    
    Compute runtime (# minutes in run) and stopTime (unix timestamp) using
    the timestamps in the DSP file.
    
    NOTE: Could change this to use the raw file timestamps instead of dsp file, 
          but that still makes this function dependent on a processing step.
    NOTE: CAGE uses struck channel 2 (0-indexed)
    """
    print('Scanning DSP files for runtimes ...')
    
    # load existing fileDB
    dg.load_df()

    # first-time setup
    if 'runtime' not in dg.file_keys.columns or overwrite:
        df_keys = dg.file_keys.copy()
        update_existing = False
        print('Re-scanning entire fileDB')

    elif 'runtime' in dg.file_keys.columns:
        # look for any rows with nans to update
        idx = dg.file_keys.loc[pd.isna(dg.file_keys['runtime']), :].index
        if len(idx) > 0:
            df_keys = dg.file_keys.loc[idx].copy()
            print(f'Found {len(df_keys)} new files without runtime:')
            print(df_keys)
            update_existing = True
        else:
            print('No empty runtime values found.')

    if len(df_keys) == 0:
        print('No files to update.  Exiting...')
        exit()

    # clear new colums if they exist
    new_cols = ['stopTime', 'runtime']
    for col in new_cols:
        if col in df_keys.columns:
            df_keys.drop(col, axis=1, inplace=True)

    sto = lh5.Store()
    def get_runtime(df_row):

        # load timestamps from dsp file
        f_dsp = dg.lh5_dir + df_row['dsp_path'] + '/' + df_row['dsp_file']

        if not os.path.exists(f_dsp) and not df_row.skip:
            print(f"Error, file doesn't exist:\n  {f_dsp}")
            exit()
        elif df_row.skip:
            print(f'Skipping cycle file:\n  {f_dsp}')
            return pd.Series({'stopTime':0, 'runtime':0})

        data, n_rows = sto.read_object('ORSIS3302DecoderForEnergy/dsp', f_dsp)

        # correct for timestamp rollover
        clock = 100e6 # 100 MHz
        UINT_MAX = 4294967295 # (0xffffffff)
        t_max = UINT_MAX / clock
        
            
        # ts = data['timestamp'].nda.astype(np.int64) # must be signed for np.diff
        ts = data['timestamp'].nda / clock # converts to float
            
        tdiff = np.diff(ts)
        tdiff = np.insert(tdiff, 0 , 0)
        iwrap = np.where(tdiff < 0)
        iloop = np.append(iwrap[0], len(ts))
            
        ts_new, t_roll = [], 0
        for i, idx in enumerate(iloop):
            ilo = 0 if i==0 else iwrap[0][i-1]
            ihi = idx
            ts_block = ts[ilo:ihi]
            t_last = ts[ilo-1]
            t_diff = t_max - t_last
            ts_new.append(ts_block + t_roll)
            t_roll += t_last + t_diff  
        ts_corr = np.concatenate(ts_new)
            
        # calculate runtime and unix stopTime
        rt = ts_corr[-1] / 60 # minutes
        st = int(np.ceil(df_row['startTime'] + rt * 60))
        
        return pd.Series({'stopTime':st, 'runtime':rt})

    df_tmp = df_keys.progress_apply(get_runtime, axis=1)
    df_keys[new_cols] = df_tmp

    if update_existing:
        idx = dg.file_keys.loc[pd.isna(dg.file_keys['runtime']), :].index
        dg.file_keys.loc[idx] = df_keys
    else:
        dg.file_keys = df_keys

    dbg_cols = ['run', 'cycle', 'unique_key', 'startTime', 'runtime']
    print(dg.file_keys[dbg_cols])

    print('Ready to save.  This will overwrite any existing fileDB.')
    if not batch_mode:
        ans = input('Save updated fileDB? (y/n):')
        if ans.lower() == 'y':
            dg.file_keys = df_keys
            dg.save_df(dg.config['fileDB'])
            print('fileDB updated.')
    else:
        dg.file_keys = df_keys
        dg.save_df(dg.config['fileDB'])
        print('fileDB updated.')
Esempio n. 17
0
def plot_dsp(dg):
    """
    create a DataFrame from the dsp files and make some 1d and 2d diagnostic plots.
    
    for reference, current 12/30/20 dsp parameters:
      ['channel', 'timestamp', 'energy', 'bl', 'bl_sig', 'trapEftp',
       'trapEmax', 'triE', 'tp_max', 'tp_0', 'tp_10', 'tp_50', 'tp_80',
       'tp_90', 'A_10', 'AoE', 'dcr_raw', 'dcr_max', 'dcr_ftp', 'hf_max']
    columns added by this code:
      ['run', 'cycle', 'ts_sec', 'ts_glo']
    """
    sto = lh5.Store()

    dsp_name = 'ORSIS3302DecoderForEnergy/dsp'
    wfs_name = 'ORSIS3302DecoderForEnergy/raw/waveform'

    def get_dsp_dfs(df_row):
        """
        grab the dsp df, add some columns, and return it
        """
        f_dsp = dg.lh5_dir + '/' + df_row.dsp_path + '/' + df_row.dsp_file
        if len(f_dsp) > 1:
            print('Error, this part is supposed to only load individual files')
            exit()
        f_dsp = f_dsp.iloc[0]
        run, cyc = df_row.run.iloc[0], df_row.cycle.iloc[0]
        # print(run, cyc, f_dsp)

        # grab the dataframe and add some columns
        tb, nr = sto.read_object(dsp_name, f_dsp)
        df = tb.get_dataframe()
        df['run'] = run
        df['cycle'] = cyc

        # need global timestamp.  just calculate here instead of making hit files
        clock = 100e6  # 100 MHz
        UINT_MAX = 4294967295  # (0xffffffff)
        t_max = UINT_MAX / clock
        ts = df['timestamp'].values / clock
        tdiff = np.diff(ts)
        tdiff = np.insert(tdiff, 0, 0)
        iwrap = np.where(tdiff < 0)
        iloop = np.append(iwrap[0], len(ts))
        ts_new, t_roll = [], 0
        for i, idx in enumerate(iloop):
            ilo = 0 if i == 0 else iwrap[0][i - 1]
            ihi = idx
            ts_block = ts[ilo:ihi]
            t_last = ts[ilo - 1]
            t_diff = t_max - t_last
            ts_new.append(ts_block + t_roll)
            t_roll += t_last + t_diff
        df['ts_sec'] = np.concatenate(ts_new)
        t_start = df_row.startTime.iloc[0]
        df['ts_glo'] = df['ts_sec'] + t_start

        # print(df)
        return df

    # create the multi-cycle DataFrame
    df_dsp = dg.fileDB.groupby(['cycle']).apply(get_dsp_dfs)
    df_dsp.reset_index(inplace=True, drop=True)  # << VERY IMPORTANT!

    print(df_dsp)
    print(df_dsp.columns)

    # 1. 1d energy histogram -- use this to select energy range of interest
    et = 'trapEmax'
    elo, ehi, epb = 0, 10000, 10
    edata = df_dsp.trapEmax.values
    hist, bins, _ = pgh.get_hist(edata, range=(elo, ehi), dx=epb)
    plt.semilogy(bins[1:], hist, ds='steps', c='b', lw=1)
    plt.xlabel(et, ha='right', x=1)
    plt.ylabel('Counts', ha='right', y=1)
    # plt.show()
    plt.savefig('./plots/risingedge_1dspec.pdf')
    plt.cla()

    # 2. 2d histo: show risetime vs. time for wfs in an energy range

    # choose risetime range (usec)
    # rlo, rhi, rpb = 0, 5, 0.1 # run 110 (good)
    rlo, rhi, rpb = 0, 50, 1  # run 111 (bad)

    # select energy range
    elo, ehi, epb = 1500, 1600, 0.5
    df = df_dsp.query(f'trapEmax > {elo} and trapEmax < {ehi}').copy()

    # calculate timestamp range
    t0 = df_dsp.iloc[0]['ts_glo']
    df['ts_adj'] = (df.ts_glo - t0) / 60  # minutes after t0
    tlo, thi, tpb = 0, df.ts_adj.max(), 1

    # compute t50-100 risetime
    df['rt_us'] = (df.tp_max - df.tp_50) / 1e3  # convert ns to us
    # print(df[['tp_max', 'tp_50', 'rt_us']])

    nbx, nby = int((thi - tlo) / tpb), int((rhi - rlo) / rpb)
    plt.hist2d(df['ts_adj'],
               df['rt_us'],
               bins=[nbx, nby],
               range=[[tlo, thi], [rlo, rhi]],
               cmap='jet')

    plt.xlabel('Time (min)', ha='right', x=1)
    plt.ylabel('Rise Time (t50-100), usec', ha='right', y=1)
    # plt.show()
    plt.savefig('./plots/risingedge_2dRisetime.png', dpi=150)
    plt.cla()

    # 3. 1st 10 wfs from energy region selection (requires raw file)
    # this assumes the first file has 10 events
    db = dg.fileDB.iloc[0]
    cyc = db.cycle
    f_raw = dg.lh5_dir + '/' + db.raw_path + '/' + db.raw_file
    f_dsp = dg.lh5_dir + '/' + db.dsp_path + '/' + db.dsp_file

    edata = lh5.load_nda([f_dsp], ['trapEmax'], dsp_name)['trapEmax']
    idx = np.where((edata >= elo) & (edata <= ehi))

    nwfs = 10
    idx_sel = idx[0][:nwfs]
    n_rows = idx_sel[-1] + 1  # read up to this event and stop
    tb_wfs, n_wfs = sto.read_object(wfs_name, f_raw, n_rows=n_rows)

    # grab the 2d numpy array of waveforms
    wfs = tb_wfs['values'].nda[idx_sel, :]

    ts = np.arange(0, len(wfs[0, :-2])) / 1e2  # usec

    for iwf in range(wfs.shape[0]):
        plt.plot(ts, wfs[iwf, :-2], lw=2, alpha=0.5)

    plt.xlabel('Time (us)', ha='right', x=1)
    plt.ylabel('ADC', ha='right', y=1)

    plt.show()
Esempio n. 18
0
def optimize_trap(dg):
    """
    Generate a file with grid points to search, and events from the target peak.  
    Then run DSP a bunch of times on the small table, and fit the peak w/ the
    peakshape function.  
    NOTE: run table-to-table DSP (no file I/O)
    """
    f_peak = './temp_peak.lh5'  # lh5
    f_results = './temp_results.h5'  # pandas
    grp_data, grp_grid = '/optimize_data', '/optimize_grid'

    # epar, elo, ehi, epb = 'energy', 0, 1e7, 10000 # full range
    epar, elo, ehi, epb = 'energy', 3.88e6, 3.92e6, 500  # K40 peak

    show_movie = True
    write_output = True
    n_rows = None  # default None

    with open('opt_trap.json') as f:
        dsp_config = json.load(f, object_pairs_hook=OrderedDict)

    # files to consider.  fixme: right now only works with one file
    sto = lh5.Store()
    lh5_dir = os.path.expandvars(dg.config['lh5_dir'])
    raw_list = lh5_dir + dg.fileDB['raw_path'] + '/' + dg.fileDB['raw_file']
    f_raw = raw_list.values[0]
    tb_raw = 'ORSIS3302DecoderForEnergy/raw/'

    # quick check of the energy range
    # ene_raw = sto.read_object(tb_raw+'/'+epar, f_raw).nda
    # hist, bins, var = pgh.get_hist(ene_raw, range=(elo, ehi), dx=epb)
    # plt.plot(bins[1:], hist, ds='steps')
    # plt.show()
    # exit()

    # set grid parameters
    # TODO: jason's suggestions, knowing the expected shape of the noise curve
    # e_rises = np.linspace(-1, 0, sqrt(sqrt(3))
    # e_rises # make another list which is 10^pwr of this list
    # np.linspace(log_tau_min, log_tau_max) # try this too
    e_rises = np.arange(1, 12, 1)
    e_flats = np.arange(1, 6, 1)
    # rc_consts = np.arange(54, 154, 10) # changing this here messes up DCR

    # -- create the grid search file the first time --
    # NOTE: this makes a linear grid, and is editable by the arrays above.
    # jason also proposed a more active gradient-descent style search
    # like with Brent's method. (https://en.wikipedia.org/wiki/Brent%27s_method)

    if True:
        # if not os.path.exists(f_peak):
        print('Recreating grid search file')

        # create the grid file
        # NOTE: save it as an lh5 Table just as an example of writing/reading one
        lists = [e_rises, e_flats]  #, rc_consts]
        prod = list(itertools.product(*lists))  # clint <3 stackoverflow
        df_grid = pd.DataFrame(prod, columns=['rise', 'flat'])  #,'rc'])
        lh5_grid = {}
        for i, dfcol in df_grid.iteritems():
            lh5_grid[dfcol.name] = lh5.Array(dfcol.values)
        tb_grid = lh5.Table(col_dict=lh5_grid)
        sto.write_object(tb_grid, grp_grid, f_peak)

        # filter events by onboard energy
        ene_raw = sto.read_object(tb_raw + '/' + epar, f_raw).nda
        # hist, bins, var = pgh.get_hist(ene_raw, range=(elo, ehi), dx=epb)
        # plt.plot(bins[1:], hist, ds='steps')
        # plt.show()
        if n_rows is not None:
            ene_raw = ene_raw[:n_rows]
        idx = np.where((ene_raw > elo) & (ene_raw < ehi))

        # create a filtered table with correct waveform and attrs
        # TODO: move this into a function in lh5.py which takes idx as an input
        tb_data, wf_tb_data = lh5.Table(), lh5.Table()

        # read non-wf cols (lh5 Arrays)
        data_raw = sto.read_object(tb_raw, f_raw, n_rows=n_rows)
        for col in data_raw.keys():
            if col == 'waveform': continue
            newcol = lh5.Array(data_raw[col].nda[idx],
                               attrs=data_raw[col].attrs)
            tb_data.add_field(col, newcol)

        # handle waveform column (lh5 Table)
        data_wfs = sto.read_object(tb_raw + '/waveform', f_raw, n_rows=n_rows)
        for col in data_wfs.keys():
            attrs = data_wfs[col].attrs
            if isinstance(data_wfs[col], lh5.ArrayOfEqualSizedArrays):
                # idk why i can't put the filtered array into the constructor
                aoesa = lh5.ArrayOfEqualSizedArrays(attrs=attrs, dims=[1, 1])
                aoesa.nda = data_wfs[col].nda[idx]
                newcol = aoesa
            else:
                newcol = lh5.Array(data_wfs[col].nda[idx], attrs=attrs)
            wf_tb_data.add_field(col, newcol)
        tb_data.add_field('waveform', wf_tb_data)
        tb_data.attrs = data_raw.attrs
        sto.write_object(tb_data, grp_data, f_peak)

    else:
        print('Loading peak file. groups:', sto.ls(f_peak))
        tb_grid = sto.read_object(grp_grid, f_peak)
        tb_data = sto.read_object(grp_data, f_peak)  # filtered file
        # tb_data = sto.read_object(tb_raw, f_raw) # orig file
        df_grid = tb_grid.get_dataframe()

    # check shape of input table
    print('input table attributes:')
    for key in tb_data.keys():
        obj = tb_data[key]
        if isinstance(obj, lh5.Table):
            for key2 in obj.keys():
                obj2 = obj[key2]
                print('  ', key, key2, obj2.nda.shape, obj2.attrs)
        else:
            print('  ', key, obj.nda.shape, obj.attrs)

    # clear new colums if they exist
    new_cols = ['e_fit', 'fwhm_fit', 'rchisq', 'xF_err', 'fwhm_ovr_mean']
    for col in new_cols:
        if col in df_grid.columns:
            df_grid.drop(col, axis=1, inplace=True)

    t_start = time.time()

    def run_dsp(dfrow):
        """
        run dsp on the test file, editing the processor list
        alternate idea: generate a long list of processors with different names
        """
        # adjust dsp config dictionary
        rise, flat = dfrow
        # dsp_config['processors']['wf_pz']['defaults']['db.pz.tau'] = f'{tau}*us'
        dsp_config['processors']['wf_trap']['args'][1] = f'{rise}*us'
        dsp_config['processors']['wf_trap']['args'][2] = f'{flat}*us'
        # pprint(dsp_config)

        # run dsp
        pc, tb_out = build_processing_chain(tb_data, dsp_config, verbosity=0)
        pc.execute()

        # analyze peak
        e_peak = 1460.
        etype = 'trapEmax'
        elo, ehi, epb = 4000, 4500, 3  # the peak moves around a bunch
        energy = tb_out[etype].nda

        # get histogram
        hE, bins, vE = pgh.get_hist(energy, range=(elo, ehi), dx=epb)
        xE = bins[1:]

        # should I center the max at 1460?

        # simple numerical width
        i_max = np.argmax(hE)
        h_max = hE[i_max]
        upr_half = xE[(xE > xE[i_max]) & (hE <= h_max / 2)][0]
        bot_half = xE[(xE < xE[i_max]) & (hE >= h_max / 2)][0]
        fwhm = upr_half - bot_half
        sig = fwhm / 2.355

        # fit to gaussian: amp, mu, sig, bkg
        fit_func = pgf.gauss_bkg
        amp = h_max * fwhm
        bg0 = np.mean(hE[:20])
        x0 = [amp, xE[i_max], sig, bg0]
        xF, xF_cov = pgf.fit_hist(fit_func, hE, bins, var=vE, guess=x0)

        # collect results
        e_fit = xF[0]
        xF_err = np.sqrt(np.diag(xF_cov))
        e_err = xF
        fwhm_fit = xF[1] * 2.355 * 1460. / e_fit

        fwhm_err = xF_err[2] * 2.355 * 1460. / e_fit

        chisq = []
        for i, h in enumerate(hE):
            model = fit_func(xE[i], *xF)
            diff = (model - h)**2 / model
            chisq.append(abs(diff))
        rchisq = sum(np.array(chisq) / len(hE))
        fwhm_ovr_mean = fwhm_fit / e_fit

        if show_movie:

            plt.plot(xE,
                     hE,
                     ds='steps',
                     c='b',
                     lw=2,
                     label=f'{etype} {rise}--{flat}')

            # peak shape
            plt.plot(xE,
                     fit_func(xE, *x0),
                     '-',
                     c='orange',
                     alpha=0.5,
                     label='init. guess')
            plt.plot(xE,
                     fit_func(xE, *xF),
                     '-r',
                     alpha=0.8,
                     label='peakshape fit')
            plt.plot(np.nan,
                     np.nan,
                     '-w',
                     label=f'mu={e_fit:.1f}, fwhm={fwhm_fit:.2f}')

            plt.xlabel(etype, ha='right', x=1)
            plt.ylabel('Counts', ha='right', y=1)
            plt.legend(loc=2)

            # show a little movie
            plt.show(block=False)
            plt.pause(0.01)
            plt.cla()

        # return results
        return pd.Series({
            'e_fit': e_fit,
            'fwhm_fit': fwhm_fit,
            'rchisq': rchisq,
            'fwhm_err': xF_err[0],
            'fwhm_ovr_mean': fwhm_ovr_mean
        })

    # df_grid=df_grid[:10]
    df_tmp = df_grid.progress_apply(run_dsp, axis=1)
    df_grid[new_cols] = df_tmp
    # print(df_grid)

    if show_movie:
        plt.close()

    print('elapsed:', time.time() - t_start)
    if write_output:
        df_grid.to_hdf(f_results, key=grp_grid)
        print(f"Wrote output file: {f_results}")
Esempio n. 19
0
    const=1,
    dest='writemode',
    help=
    "Update existing file with new values. Useful with the --outpar option. Mutually exclusive with --recreate and --append THIS IS NOT IMPLEMENTED YET!"
    )
arg('-a',
    '--append',
    action='store_const',
    const=1,
    dest='writemode',
    help=
    "Append values to existing file. Mutually exclusive with --recreate and --update THIS IS NOT IMPLEMENTED YET!"
    )
args = parser.parse_args()

lh5_in = lh5.Store()
groups = lh5_in.ls(args.file, args.group)
out = args.output
if out is None:
    out = 't2_' + args.file[args.file.rfind('/') + 1:].replace('t1_', '')

for group in groups:
    print("Processing: " + args.file + '/' + group)
    #data = lh5_in.read_object(args.group, args.file, 0, args.chunk)
    data = lh5_in.read_object(group, args.file)

    wf_in = data['waveform']['values'].nda
    chan_in = data['channel'].nda
    dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(
        data['waveform']['dt'].attrs['units'])
Esempio n. 20
0
def get_resolution():
    """
    """
    # load hit file
    f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5'
    tb_name = 'ORSIS3302DecoderForEnergy/raw'
    sto = lh5.Store()
    groups = sto.ls(f_hit)
    data = sto.read_object(tb_name, f_hit)
    df_hit = data.get_dataframe()

    # load parameters
    e_peak = 1460.8
    etype = 'trapE_cal'
    # etype = 'energy_cal'
    elo, ehi, epb = 1445, 1475, 0.2

    # get histogram
    hE, bins, vE = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb)
    xE = bins[1:]

    # simple numerical width
    i_max = np.argmax(hE)
    h_max = hE[i_max]
    upr_half = xE[(xE > xE[i_max]) & (hE <= h_max / 2)][0]
    bot_half = xE[(xE < xE[i_max]) & (hE >= h_max / 2)][0]
    fwhm = upr_half - bot_half
    sig = fwhm / 2.355

    # # fit to gaussian: amp, mu, sig, bkg
    # amp = h_max * fwhm
    # bg0 = np.mean(hE[:20])
    # x0 = [amp, xE[i_max], sig, bg0]
    # xF, xF_cov = pgf.fit_hist(pgf.gauss_bkg, hE, bins, var=vE, guess=x0)
    # fit_func = pgf.gauss_bkg

    # fit to radford peak: mu, sigma, hstep, htail, tau, bg0, amp
    amp = h_max * fwhm
    hstep = 0.001  # fraction that the step contributes
    htail = 0.1
    tau = 10
    bg0 = np.mean(hE[:20])
    x0 = [xE[i_max], sig, hstep, htail, tau, bg0, amp]
    xF, xF_cov = pgf.fit_hist(pgf.radford_peak, hE, bins, var=vE, guess=x0)
    fit_func = pgf.radford_peak

    xF_err = np.sqrt(np.diag(xF_cov))
    chisq = []
    for i, h in enumerate(hE):
        model = fit_func(xE[i], *xF)
        diff = (model - h)**2 / model
        chisq.append(abs(diff))

    # collect results (for output, should use a dict or DataFrame)
    e_fit = xF[0]
    fwhm_fit = xF[1] * 2.355  #  * e_peak / e_fit
    print(fwhm, fwhm_fit)
    fwhmerr = xF_err[1] * 2.355 * e_peak / e_fit
    rchisq = sum(np.array(chisq) / len(hE))

    # plotting
    plt.plot(xE, hE, ds='steps', c='b', lw=2, label=etype)

    # peak shape
    plt.plot(xE,
             fit_func(xE, *x0),
             '-',
             c='orange',
             alpha=0.5,
             label='init. guess')
    plt.plot(xE, fit_func(xE, *xF), '-r', alpha=0.8, label='peakshape fit')
    plt.plot(np.nan,
             np.nan,
             '-w',
             label=f'mu={e_fit:.1f}, fwhm={fwhm_fit:.2f}')

    plt.xlabel(etype, ha='right', x=1)
    plt.ylabel('Counts', ha='right', y=1)
    plt.legend(loc=2)
    plt.tight_layout()
    # plt.show()
    plt.savefig(f'./plots/resolution_1460_{etype}.pdf')
    plt.cla()
Esempio n. 21
0
def pole_zero(dg):
    """
    """
    # load hit data
    lh5_dir = os.path.expandvars(dg.config['lh5_dir'])
    hit_list = lh5_dir + dg.file_keys['hit_path'] + '/' + dg.file_keys[
        'hit_file']
    df_hit = lh5.load_dfs(hit_list, ['trapEmax'],
                          'ORSIS3302DecoderForEnergy/hit')
    df_hit.reset_index(inplace=True)
    rt_min = dg.file_keys['runtime'].sum()
    # print(f'runtime: {rt_min:.2f} min')

    # load waveforms
    etype = 'trapEmax_cal'
    nwfs = 20
    elo, ehi = 1455, 1465

    # select waveforms
    idx = df_hit[etype].loc[(df_hit[etype] >= elo)
                            & (df_hit[etype] <= ehi)].index[:nwfs]
    raw_store = lh5.Store()
    tb_name = 'ORSIS3302DecoderForEnergy/raw'
    raw_list = lh5_dir + dg.file_keys['raw_path'] + '/' + dg.file_keys[
        'raw_file']
    f_raw = raw_list.values[0]  # fixme, only works for one file rn
    data_raw = raw_store.read_object(tb_name,
                                     f_raw,
                                     start_row=0,
                                     n_rows=idx[-1] + 1)

    wfs_all = data_raw['waveform']['values'].nda
    wfs = wfs_all[idx.values, :]
    df_wfs = pd.DataFrame(wfs)
    # print(df_wfs)

    # simple test function to compute pole-zero constant for a few wfs.
    # the final one should become a dsp processor
    clock = 1e8  # 100 MHz
    istart = 5000
    iwinlo, iwinhi, iwid = 500, 2500, 20  # two-point slope
    # ts = np.arange(istart, df_wfs.shape[1]-1, 1) / 1e3 # usec
    ts = np.arange(0, df_wfs.shape[1] - 1 - istart, 1) / 1e3  # usec

    def get_rc(row):
        # two-point method
        wf = row[istart:-1].values
        wflog = np.log(wf)
        win1 = np.mean(np.log(row[istart + iwinlo:istart + iwinlo + iwid]))
        win2 = np.mean(np.log(row[istart + iwinhi:istart + iwinhi + iwid]))
        slope = (win2 - win1) / (ts[iwinhi] - ts[iwinlo])
        tau = 1 / slope

        # # diagnostic plot: check against expo method
        # guess_tau = 60
        # a = wf.max()
        # expdec = lambda x : a * np.exp(-x / guess_tau)
        # logdec = lambda x : np.log(a * np.exp(-x / guess_tau))
        # slopeway = lambda x: wflog[0] + x / tau
        # plt.plot(ts, wflog, '-r', lw=1)
        # plt.plot(ts, logdec(ts), '-b', lw=1)
        # plt.plot(ts, slopeway(ts), '-k', lw=1)
        # plt.show()
        # exit()

        return tau

        # return tau

    res = df_wfs.apply(get_rc, axis=1)

    tau_avg, tau_std = res.mean(), res.std()
    print(f'average RC decay constant: {tau_avg:.2f} pm {tau_std:.2f}')
Esempio n. 22
0
    )
arg('-B',
    '--block',
    default=16,
    type=int,
    help="Number of waveforms to process simultaneously. Default is 8")

arg('-C',
    '--chunk',
    default=3200,
    type=int,
    help="Number of waveforms to read from disk at a time. Default is 256.")

args = parser.parse_args()

lh5_st = lh5.Store()
chans = lh5_st.ls(args.file, args.channel)

rc_range = tuple([round(float(tc), 1) for tc in args.range.split('-')])
if len(rc_range) != 2:
    print("Range must have exactly two values")
n_bins = int((rc_range[1] - rc_range[0]) / 0.1)

rc_const_lib = {}

np.seterr(all='ignore')

for chan_name in chans:
    group = chan_name + '/raw'
    print("Processing: " + args.file + '/' + group)
Esempio n. 23
0
def raw_to_dsp(ds,
               overwrite=False,
               nevt=None,
               test=False,
               verbose=2,
               block=8,
               group='daqdata'):
    """
    Run raw_to_dsp on a set of runs.
    [raw file] ---> [dsp_run{}.lh5] (digital signal processing results)
    """
    for run in ds.runs:
        raw_file = ds.paths[run]["raw_path"]
        dsp_file = ds.paths[run]["dsp_path"]

        if dsp_file is not None and overwrite is False:
            continue

        if dsp_file is None:
            # declare new file name
            dsp_file = raw_file.replace('raw', 'dsp')

        if test:
            print("test mode (dry run), processing raw file:", raw_file)
            continue

        # new LH5 version

        lh5_in = lh5.Store()
        data = lh5_in.read_object("/ORSIS3302DecoderForEnergy", raw_file)

        wf_in = data['waveform']['values'].nda
        dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(
            data['waveform']['dt'].attrs['units'])

        # Parameters for DCR calculation
        dcr_trap_int = 200
        dcr_trap_flat = 1000
        dcr_trap_startSample = 1200

        # Set up processing chain
        proc = ProcessingChain(block_width=block,
                               clock_unit=dt,
                               verbosity=verbose)
        proc.add_input_buffer("wf", wf_in, dtype='float32')

        proc.add_processor(mean_stdev, "wf[0:1000]", "bl", "bl_sig")
        proc.add_processor(np.subtract, "wf", "bl", "wf_blsub")
        proc.add_processor(pole_zero, "wf_blsub", 70 * us, "wf_pz")

        proc.add_processor(asymTrapFilter, "wf_pz", 10 * us, 5 * us, 10 * us,
                           "wf_atrap")
        proc.add_processor(np.amax,
                           "wf_atrap",
                           1,
                           "atrapE",
                           signature='(n),()->()',
                           types=['fi->f'])

        # proc.add_processor(np.divide, "atrapmax", 10*us, "atrapE")

        proc.add_processor(trap_norm, "wf_pz", 10 * us, 5 * us, "wf_trap")
        proc.add_processor(np.amax,
                           "wf_trap",
                           1,
                           "trapE",
                           signature='(n),()->()',
                           types=['fi->f'])
        proc.add_processor(avg_current, "wf_pz", 10, "curr")
        proc.add_processor(np.amax,
                           "curr",
                           1,
                           "A_10",
                           signature='(n),()->()',
                           types=['fi->f'])
        proc.add_processor(np.divide, "A_10", "trapE", "AoE")
        proc.add_processor(trap_pickoff, "wf_pz", dcr_trap_int, dcr_trap_flat,
                           dcr_trap_startSample, "dcr")

        # Set up the LH5 output
        lh5_out = lh5.Table(size=proc._buffer_len)
        lh5_out.add_field(
            "trapE",
            lh5.Array(proc.get_output_buffer("trapE"), attrs={"units": "ADC"}))

        lh5_out.add_field(
            "bl",
            lh5.Array(proc.get_output_buffer("bl"), attrs={"units": "ADC"}))
        lh5_out.add_field(
            "bl_sig",
            lh5.Array(proc.get_output_buffer("bl_sig"), attrs={"units":
                                                               "ADC"}))
        lh5_out.add_field(
            "A",
            lh5.Array(proc.get_output_buffer("A_10"), attrs={"units": "ADC"}))
        lh5_out.add_field(
            "AoE",
            lh5.Array(proc.get_output_buffer("AoE"), attrs={"units": "ADC"}))
        lh5_out.add_field(
            "dcr",
            lh5.Array(proc.get_output_buffer("dcr"), attrs={"units": "ADC"}))

        print("Processing:\n", proc)
        proc.execute()

        print("Writing to: ", dsp_file)
        f_lh5.write_object(lh5_out, "data", dsp_file)
Esempio n. 24
0
def dsp_to_hit_cage(f_dsp, f_hit, dg, n_max=None, verbose=False, t_start=None):
    """
    non-general placeholder for creating a pygama 'hit' file.  uses pandas.
    for every file, apply:
    - energy calibration (peakfit results)
    - timestamp correction
    for a more general dsp_to_hit, maybe each function could be given in terms
    of an 'apply' on a dsp dataframe ...
    
    TODO: create entry config['rawe'] with list of energy pars to calibrate, as 
    in energy_cal.py
    """
    rawe = ['trapEmax']

    # create initial 'hit' DataFrame from dsp data
    hit_store = lh5.Store()
    data = hit_store.read_object(dg.config['input_table'], f_dsp)
    df_hit = data.get_dataframe()

    # 1. get energy calibration for this run from peakfit
    cal_db = db.TinyDB(storage=MemoryStorage)
    with open(dg.config['ecaldb']) as f:
        raw_db = json.load(f)
        cal_db.storage.write(raw_db)
    runs = dg.file_keys.run.unique()
    if len(runs) > 1:
        print("sorry, I can't do combined runs yet")
        exit()
    run = runs[0]
    for etype in rawe:
        tb = cal_db.table(f'peakfit_{etype}').all()
        df_cal = pd.DataFrame(tb)
        df_cal['run'] = df_cal['run'].astype(int)
        df_run = df_cal.loc[df_cal.run == run]
        cal_pars = df_run.iloc[0][['cal0', 'cal1', 'cal2']]
        pol = np.poly1d(cal_pars)  # handy numpy polynomial object
        df_hit[f'{etype}_cal'] = pol(df_hit[f'{etype}'])

    # 2. compute timestamp rollover correction (specific to struck 3302)
    clock = 100e6  # 100 MHz
    UINT_MAX = 4294967295  # (0xffffffff)
    t_max = UINT_MAX / clock
    ts = df_hit['timestamp'].values / clock
    tdiff = np.diff(ts)
    tdiff = np.insert(tdiff, 0, 0)
    iwrap = np.where(tdiff < 0)
    iloop = np.append(iwrap[0], len(ts))
    ts_new, t_roll = [], 0
    for i, idx in enumerate(iloop):
        ilo = 0 if i == 0 else iwrap[0][i - 1]
        ihi = idx
        ts_block = ts[ilo:ihi]
        t_last = ts[ilo - 1]
        t_diff = t_max - t_last
        ts_new.append(ts_block + t_roll)
        t_roll += t_last + t_diff
    df_hit['ts_sec'] = np.concatenate(ts_new)

    # 3. compute global timestamp
    if t_start is not None:
        df_hit['ts_glo'] = df_hit['ts_sec'] + t_start

    # write to LH5 file
    if os.path.exists(f_hit):
        os.remove(f_hit)
    sto = lh5.Store()
    tb_name = dg.config['input_table'].replace('dsp', 'hit')
    tb_lh5 = lh5.Table(size=len(df_hit))

    for col in df_hit.columns:
        tb_lh5.add_field(col, lh5.Array(df_hit[col].values,
                                        attrs={'units': ''}))
        print(col)

    print(f'Writing table: {tb_name} in file:\n   {f_hit}')
    sto.write_object(tb_lh5, tb_name, f_hit)
Esempio n. 25
0
def show_wfs():
    """
    show low-e waveforms in different enery regions
    """
    f_raw = '/Users/wisecg/Data/OPPI/raw/oppi_run0_cyc2027_raw.lh5'
    f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5'

    # use the hit file to select events
    tb_name = 'ORSIS3302DecoderForEnergy/raw'
    hit_store = lh5.Store()
    data = hit_store.read_object(tb_name, f_hit)
    df_hit = data.get_dataframe()

    # settings
    nwfs = 20
    elo, ehi, epb = 0, 100, 0.2

    # etype = 'energy_cal'  # noise stops @ 18 keV
    # noise_lo, noise_hi, phys_lo, phys_hi = 10, 15, 25, 30

    etype = 'trapE_cal'  # noise stops @ 35 keV
    noise_lo, noise_hi, phys_lo, phys_hi = 25, 30, 40, 45

    # # diagnostic plot
    # hE, bins, vE = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb)
    # xE = bins[1:]
    # plt.plot(xE, hE, c='b', ds='steps')
    # plt.show()
    # exit()

    # select noise and phys events
    idx_noise = df_hit[etype].loc[(df_hit[etype] > noise_lo)
                                  & (df_hit[etype] < noise_hi)].index[:nwfs]

    idx_phys = df_hit[etype].loc[(df_hit[etype] > phys_lo)
                                 & (df_hit[etype] < phys_hi)].index[:nwfs]

    # print(df_hit.loc[idx_noise])
    # print(df_hit.loc[idx_phys])

    # get phys waveforms, normalized by max value
    i_max = max(idx_noise[-1], idx_phys[-1])

    raw_store = lh5.Store()
    data_raw = raw_store.read_object(tb_name,
                                     f_raw,
                                     start_row=0,
                                     n_rows=i_max + 1)

    wfs = data_raw['waveform']['values'].nda
    wfs_noise = wfs[idx_noise.values, :]
    wfs_phys = wfs[idx_phys.values, :]
    ts = np.arange(0, wfs_noise.shape[1], 1)

    # noise wfs
    for iwf in range(wfs_noise.shape[0]):
        plt.plot(ts, wfs_noise[iwf, :], lw=1)

    # # phys wfs
    # for iwf in range(wfs_phys.shape[0]):
    #     plt.plot(ts, wfs_phys[iwf,:], lw=1)

    plt.xlabel('time (clock ticks)', ha='right', x=1)
    plt.ylabel('ADC', ha='right', y=1)
    # plt.show()
    plt.savefig('./plots/noise_wfs.png', dpi=300)
    plt.cla()
Esempio n. 26
0
def get_runtimes(dg):
    """
    Requires DSP files.
    compute runtime (# minutes in run) and stopTime (unix timestamp) using
    the timestamps in the dsp file.
    """
    write_output = True

    df_keys = pd.read_hdf(dg.config['fileDB'])

    # clear new colums if they exist
    new_cols = ['stopTime', 'runtime']
    for col in new_cols:
        if col in df_keys.columns:
            df_keys.drop(col, axis=1, inplace=True)

    sto = lh5.Store()
    def get_runtime(df_row):

        # load timestamps from dsp file
        f_dsp = dg.lh5_dir + df_row['dsp_path'] + '/' + df_row['dsp_file']
        data = sto.read_object('ORSIS3302DecoderForEnergy/dsp', f_dsp)

        # correct for timestamp rollover
        clock = 100e6 # 100 MHz
        UINT_MAX = 4294967295 # (0xffffffff)
        t_max = UINT_MAX / clock

        # ts = data['timestamp'].nda.astype(np.int64) # must be signed for np.diff
        ts = data['timestamp'].nda / clock # converts to float

        tdiff = np.diff(ts)
        tdiff = np.insert(tdiff, 0 , 0)
        iwrap = np.where(tdiff < 0)
        iloop = np.append(iwrap[0], len(ts))

        ts_new, t_roll = [], 0
        for i, idx in enumerate(iloop):
            ilo = 0 if i==0 else iwrap[0][i-1]
            ihi = idx
            ts_block = ts[ilo:ihi]
            t_last = ts[ilo-1]
            t_diff = t_max - t_last
            ts_new.append(ts_block + t_roll)
            t_roll += t_last + t_diff
        ts_corr = np.concatenate(ts_new)

        # calculate runtime and unix stopTime
        rt = ts_corr[-1] / 60 # minutes
        st = int(np.ceil(df_row['startTime'] + rt * 60))

        return pd.Series({'stopTime':st, 'runtime':rt})

    df_tmp = df_keys.progress_apply(get_runtime, axis=1)
    df_keys[new_cols] = df_tmp

    print(df_keys)

    if write_output:
        df_keys.to_hdf(dg.config['fileDB'], key='file_keys')
        print(f"Wrote output file: {dg.config['fileDB']}")
Esempio n. 27
0
def data_cleaning():
    """
    using parameters in the hit file, plot 1d and 2d spectra to find cut values.

    columns in file:
        ['trapE', 'bl', 'bl_sig', 'A_10', 'AoE', 'packet_id', 'ievt', 'energy',
        'energy_first', 'timestamp', 'crate', 'card', 'channel', 'energy_cal',
        'trapE_cal']

    note, 'energy_first' from first value of energy gate.
    """
    i_plot = 3  # run all plots after this number

    f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5'

    tb_name = 'ORSIS3302DecoderForEnergy/raw'
    hit_store = lh5.Store()
    data = hit_store.read_object(tb_name, f_hit)
    df_hit = data.get_dataframe()

    # get info about df -- 'describe' is very convenient
    dsc = df_hit[['bl', 'bl_sig', 'A_10', 'energy_first',
                  'timestamp']].describe()
    # print(dsc)
    # print(dsc.loc['min','bl'])

    # correct energy_first (inplace) to allow negative values
    df_hit['energy_first'] = df_hit['energy_first'].astype(np.int64)
    efirst = df_hit['energy_first'].values
    idx = np.where(efirst > 4e9)
    eshift = efirst[idx] - 4294967295
    efirst[idx] = eshift
    # print(df_hit[['energy','energy_first','bl']])

    if i_plot <= 0:
        # bl vs energy

        elo, ehi, epb = 0, 250, 1
        blo, bhi, bpb = 54700, 61400, 100
        nbx = int((ehi - elo) / epb)
        nby = int((bhi - blo) / bpb)

        h = plt.hist2d(df_hit['trapE_cal'],
                       df_hit['bl'],
                       bins=[nbx, nby],
                       range=[[elo, ehi], [blo, bhi]],
                       cmap='jet')

        cb = plt.colorbar(h[3], ax=plt.gca())
        plt.xlabel('trapE_cal', ha='right', x=1)
        plt.ylabel('bl', ha='right', y=1)
        plt.tight_layout()
        # plt.show()
        plt.savefig('./plots/bl_vs_e.png', dpi=300)
        cb.remove()
        plt.cla()

        # make a formal baseline cut from 1d histogram
        hE, bins, vE = pgh.get_hist(df_hit['bl'], range=(blo, bhi), dx=bpb)
        xE = bins[1:]
        plt.semilogy(xE, hE, c='b', ds='steps')

        bl_cut_lo, bl_cut_hi = 57700, 58500
        plt.axvline(bl_cut_lo, c='r', lw=1)
        plt.axvline(bl_cut_hi, c='r', lw=1)

        plt.xlabel('bl', ha='right', x=1)
        plt.ylabel('counts', ha='right', y=1)
        # plt.show()
        plt.savefig('./plots/bl_cut.pdf')
        plt.cla()

    if i_plot <= 1:
        # energy_first vs. E

        flo, fhi, fpb = -565534, 70000, 1000
        elo, ehi, epb = 0, 250, 1

        nbx = int((ehi - elo) / epb)
        nby = int((fhi - flo) / fpb)

        h = plt.hist2d(df_hit['trapE_cal'],
                       df_hit['energy_first'],
                       bins=[nbx, nby],
                       range=[[elo, ehi], [flo, fhi]],
                       cmap='jet',
                       norm=LogNorm())

        cb = plt.colorbar(h[3], ax=plt.gca())
        plt.xlabel('trapE_cal', ha='right', x=1)
        plt.ylabel('energy_first', ha='right', y=1)
        plt.tight_layout()
        # plt.show()
        plt.savefig('./plots/efirst_vs_e.png', dpi=300)
        cb.remove()
        plt.cla()

        # make a formal baseline cut from 1d histogram
        flo, fhi, fpb = -20000, 20000, 100
        hE, xE, vE = pgh.get_hist(df_hit['energy_first'],
                                  range=(flo, fhi),
                                  dx=fpb)
        xE = xE[1:]
        plt.semilogy(xE, hE, c='b', ds='steps')

        ef_cut_lo, ef_cut_hi = -5000, 4000
        plt.axvline(ef_cut_lo, c='r', lw=1)
        plt.axvline(ef_cut_hi, c='r', lw=1)

        plt.xlabel('energy_first', ha='right', x=1)
        plt.ylabel('counts', ha='right', y=1)
        # plt.show()
        plt.savefig('./plots/efirst_cut.pdf')
        plt.cla()

    if i_plot <= 3:
        # trapE_cal - energy_cal vs trapE_cal

        # use baseline cut
        df_cut = df_hit.query('bl > 57700 and bl < 58500').copy()

        # add new diffE column
        df_cut['diffE'] = df_cut['trapE_cal'] - df_cut['energy_cal']

        elo, ehi, epb = 0, 3000, 1
        dlo, dhi, dpb = -10, 10, 0.1

        nbx = int((ehi - elo) / epb)
        nby = int((dhi - dlo) / dpb)

        h = plt.hist2d(df_cut['trapE_cal'],
                       df_cut['diffE'],
                       bins=[nbx, nby],
                       range=[[elo, ehi], [dlo, dhi]],
                       cmap='jet',
                       norm=LogNorm())

        plt.xlabel('trapE_cal', ha='right', x=1)
        plt.ylabel('diffE (trap-onbd)', ha='right', y=1)
        plt.tight_layout()
        # plt.show()
        plt.savefig('./plots/diffE.png', dpi=300)
        plt.cla()

    if i_plot <= 4:
        # A_10/trapE_cal vs trapE_cal (A/E vs E)

        # i doubt we want to introduce a pulse shape cut at this point,
        # since i'm tuning on bkg data and we don't know a priori what (if any)
        # features the Kr waveforms will have.  also, the efficiency as a
        # function of energy would have to be determined, which is hard.
        # so this is just for fun.

        # use baseline cut
        df_cut = df_hit.query('bl > 57700 and bl < 58500').copy()

        # add new A/E column
        df_cut['aoe'] = df_cut['A_10'] / df_cut['trapE_cal']

        # alo, ahi, apb = -1300, 350, 1
        # elo, ehi, epb = 0, 250, 1
        alo, ahi, apb = -0.5, 5, 0.05
        elo, ehi, epb = 0, 50, 0.2

        nbx = int((ehi - elo) / epb)
        nby = int((ahi - alo) / apb)

        h = plt.hist2d(df_cut['trapE_cal'],
                       df_cut['aoe'],
                       bins=[nbx, nby],
                       range=[[elo, ehi], [alo, ahi]],
                       cmap='jet',
                       norm=LogNorm())

        plt.xlabel('trapE_cal', ha='right', x=1)
        plt.ylabel('A/E', ha='right', y=1)
        plt.tight_layout()
        # plt.show()
        plt.savefig('./plots/aoe_vs_e_lowe.png', dpi=300)
        plt.cla()

    if i_plot <= 5:
        # show effect of cuts on energy spectrum

        # baseline cut and efirst cut are very similar
        df_cut = df_hit.query('bl > 57700 and bl < 58500')
        # df_cut = df_hit.query('energy_first > -5000 and energy_first < 4000')

        etype = 'trapE_cal'
        elo, ehi, epb = 0, 250, 0.5

        # no cuts
        h1, x1, v1 = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb)
        x1 = x1[1:]
        plt.plot(x1, h1, c='k', lw=1, ds='steps', label='raw')

        # baseline cut
        h2, x2, v2 = pgh.get_hist(df_cut[etype], range=(elo, ehi), dx=epb)
        plt.plot(x1, h2, c='b', lw=1, ds='steps', label='bl cut')

        plt.xlabel(etype, ha='right', x=1)
        plt.ylabel('counts', ha='right', y=1)
        plt.legend()
        # plt.show()
        plt.savefig('./plots/cut_spectrum.pdf')
        plt.cla()
Esempio n. 28
0
def process_ds(f_grid, f_opt, f_tier1, d_out, efilter):
    """
    process the windowed raw file 'f_tier1' and create the DSP file 'f_opt'
    """
    print("Grid file:",f_grid)
    df_grid = pd.read_hdf(f_grid)
    
    if os.path.exists(f_opt):
        os.remove(f_opt)

    if 'corr' in efilter:
        bfilter = efilter.split('corr')[0]
        try:
            df_res = pd.read_hdf(f'{d_out}/{bfilter}_results.h5',key='results')
            print("Extraction of best parameters for", bfilter)
        except:
            print(bfilter,"not optimized")
            return
    
    # open raw file
    lh5_in = lh5.Store()
    #groups = lh5_in.ls(f_tier1, '*/raw')
    f = h5py.File(f_tier1,'r')
    #print("File info: ",f.keys())
    
    t_start = time.time()
    #for group in groups:
    for idx, ged in enumerate(f.keys()):
        if idx == 4:
            diff = time.time() - t_start
            tot = diff/5 * len(df_grid) / 60
            tot -= diff / 60
            print(f"Estimated remaining time: {tot:.2f} mins")
        
        print("Detector:",ged)
        #data = lh5_in.read_object(group, f_tier1)
        data =  f[ged]['raw']
        
        #wf_in = data['waveform']['values'].nda
        #dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units'])
        wf_in = data['waveform']['values'][()]
        dt = data['waveform']['dt'][0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units'])
        bl_in = data['baseline'][()] #flashcam baseline values
        
        # Set up DSP processing chain -- very minimal
        block = 8 #waveforms to process simultaneously
        proc = ProcessingChain(block_width=block, clock_unit=dt, verbosity=False)
        proc.add_input_buffer("wf", wf_in, dtype='float32')
        proc.add_input_buffer("bl", bl_in, dtype='float32')
        
        wsize = wf_in.shape[1]
        dt0 = data['waveform']['dt'][0]*0.001
        
        #proc.add_processor(mean_stdev, "wf[0:1000]", "bl", "bl_sig")
        proc.add_processor(np.subtract, "wf", "bl", "wf_blsub")
        for i, row in df_grid.iterrows():
            if 'corr' in efilter: ct_const = row
            if 'trapE' in efilter:
                if 'corr' in efilter: rise, flat, rc = float(df_res['rise'][idx]), float(df_res['flat'][idx]), float(df_res['rc'][idx])
                else: rise, flat, rc = row
                proc.add_processor(pole_zero, "wf_blsub", rc*us, "wf_pz")
                proc.add_processor(trap_norm, "wf_pz", rise*us, flat*us, f"wf_trap_{i}")
                proc.add_processor(asymTrapFilter, "wf_pz", 0.05*us, 4*us, 4*us, "wf_atrap")
                proc.add_processor(time_point_thresh, "wf_pz", 0, "tp_0")
                proc.add_processor(np.amax, f"wf_trap_{i}", 1, f"trapE_{i}", signature='(n),()->()', types=['fi->f'])
                proc.add_processor(fixed_time_pickoff, f"wf_trap_{i}", f"tp_0+({rise*us}+{flat*us})", f"trapEftp_{i}")
            if 'zacE' in efilter:
                if 'corr' in efilter: sigma, flat, decay = float(df_res['sigma'][idx]), float(df_res['flat'][idx]), float(df_res['decay'][idx])
                else: sigma, flat, decay = row
                proc.add_processor(zac_filter(wsize, sigma/dt0, flat/dt0, decay/dt0),"wf", f"wf_zac_{i}(101, f)")
                proc.add_processor(np.amax, f"wf_zac_{i}", 1, f"zacE_{i}", signature='(n),()->()', types=['fi->f'])
            if 'cuspE' in efilter:
                if 'corr' in efilter: sigma, flat, decay = float(df_res['sigma'][idx]), float(df_res['flat'][idx]), float(df_res['decay'][idx])
                else: sigma, flat, decay = row
                proc.add_processor(cusp_filter(wsize, sigma/dt0, flat/dt0, decay/dt0),"wf_blsub", f"wf_cusp_{i}(101, f)")
                proc.add_processor(np.amax, f"wf_cusp_{i}", 1, f"cuspE_{i}", signature='(n),()->()', types=['fi->f'])
            if 'corr' in efilter:
                proc.add_processor(trap_pickoff, "wf_pz", 1.5*us, 0, "tp_0", "ct_corr")
                #proc.add_processor(trap_pickoff, "wf_pz", rise*us, flat*us, "tp_0", "ct_corr")
                proc.add_processor(np.multiply, ct_const, "ct_corr", f"ct_corr_cal_{i}")
                proc.add_processor(np.add, f"ct_corr_cal_{i}", f"{bfilter}_{i}", f"{efilter}_{i}")
                
        # Set up the LH5 output
        lh5_out = lh5.Table(size=proc._buffer_len)
        for i, row in df_grid.iterrows():
            lh5_out.add_field(f"{efilter}_{i}", lh5.Array(proc.get_output_buffer(f"{efilter}_{i}"), attrs={"units":"ADC"}))
        
        print("Processing:\n",proc)
        proc.execute()
        
        #groupname = group[:group.rfind('/')+1]+"data"
        #groupname = df_key+"/"+group+"/data"
        groupname = ged+"/data"
        print("Writing to: " + f_opt + "/" + groupname)
        lh5_in.write_object(lh5_out, groupname, f_opt)
        print("")
    
    #list the datasets of the output file
    data_opt = lh5_in.ls(f_opt)
    #data_opt_0 = lh5_in.ls(f_opt,'opt_0/*')
    data_opt_0 = lh5_in.ls(f_opt,'g024/data/*')
    diff = time.time() - t_start
    print(f"Time to process: {diff:.2f} s")
Esempio n. 29
0
def main():
    """
    Clone of pygama/apps/raw_to_dsp.py.  Intended for quick prototyping of dsp_to_hit
    processors.  Heavy lifting with many input/output files should be moved to a
    more specialized processing app, with raw_to_dsp and dsp_to_hit both moved to
    functions in pygama.io.
    """
    parser = argparse.ArgumentParser(
        description=
        """Process a 'pygama DSP LH5' file and produce a 'pygama HIT LH5' file."""
    )
    parser.add_argument('file', help="Input (dsp) LH5 file.")
    parser.add_argument(
        '-o',
        '--output',
        help=
        "Name of output file. By default, output to ./t2_[input file name].")

    parser.add_argument(
        '-g',
        '--group',
        default='',
        help=
        "Name of group in LH5 file. By default process all base groups. Supports wildcards."
    )
    args = parser.parse_args()

    # import h5py
    # f = h5py.File('/Users/wisecg/Data/LPGTA/raw/geds/cal/LPGTA_r0018_20200302T184433Z_cal_geds_raw.lh5')
    # # print(f['g024/raw'].keys())
    # # ['baseline', 'channel', 'energy', 'ievt', 'numtraces', 'packet_id', \
    # #  'timestamp', 'tracelist', 'waveform', 'wf_max', 'wf_std']
    # def print_attrs(name, obj):
    #     print(name)
    #     for key, val in obj.attrs.items():
    #         print("    attr: %s  val: %s" % (key, val))
    # # f = h5py.File(f,'r')
    # f.visititems(print_attrs)

    # exit()

    lh5_in = lh5.Store()
    groups = lh5_in.ls(args.file, args.group)

    out = args.output if args.output is not None else './d2h_test.lh5'
    print('output file:', out)

    for group in groups[:1]:
        print(group)

        print("Processing: " + args.file + '/' + group)

        #data = lh5_in.read_object(args.group, args.file, 0, args.chunk)

        data = lh5_in.read_object(group + '/raw', args.file)

        # print(type(data))#, data.keys())
        # print(data.keys())

        wf_in = data['waveform']['values'].nda
        dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(
            data['waveform']['dt'].attrs['units'])
        # print(wf_in.shape)

        ene_in = data['energy'].nda
        # print(ene_in.shape)
        # print(ene_in.dtype)
        # exit()

        n_block = 8
        verbose = 1

        proc = ProcessingChain(block_width=n_block,
                               clock_unit=dt,
                               verbosity=verbose)

        # proc.add_input_buffer("wf", wf_in, dtype='float32')

        proc.add_input_buffer("ene_in", ene_in, dtype='uint16')

        proc.add_processor(energy_cal, "ene_in")
Esempio n. 30
0
def get_superpulses(dfp, dg, f_super):
    """
    calculate average waveforms for each set of pulser data.
    save an output file with the superpulses for further analysis.
    """
    # find this with the show_spectra function above
    # ecal = 1460.8 / 2.005e6 # TODO: find the const for oct 2020
    ecal = 1460.8 / 2.005e6  # works for pulser dataset 2 (dec 2020)

    # more settings
    show_plots = True  # default True
    write_output = True
    nwfs = 1000  # limit number to go fast.  1000 is enough for a good measurement
    tp_align = 0.5  # pct timepoint to align wfs at
    e_window = 10  # plot (in keV) this window around each pulser peak
    n_pre, n_post = 50, 100  # num samples before/after tp_align
    bl_thresh = 10  # allowable baseline ADC deviation

    dsp_name = 'ORSIS3302DecoderForEnergy/dsp'
    raw_name = 'ORSIS3302DecoderForEnergy/raw/waveform'

    sto = lh5.Store()
    t_start = time.time()

    def analyze_pulser_run(df_row):
        """
        loop over each row of dfp and save the superpulse
        """
        epk, rt, vp, cyc = df_row[['E_keV', 'runtime', 'V_pulser', 'cycle']]
        rt *= 60  # sec
        if epk == 0: return []  # skip the bkg run

        # load pulser energies
        f_dsp = dg.lh5_dir + '/' + df_row.dsp_path + '/' + df_row.dsp_file
        pdata = lh5.load_nda([f_dsp], ['energy'], dsp_name)['energy'] * ecal

        # auto-narrow the window around the max pulser peak in two steps
        elo, ehi, epb = epk - 50, epk + 50, 0.5
        pdata_all = pdata[(pdata > elo) & (pdata < ehi)]
        hp, bp, _ = pgh.get_hist(pdata_all, range=(elo, ehi), dx=epb)
        pctr = bp[np.argmax(hp)]

        plo, phi, ppb = pctr - e_window, pctr + e_window, 0.1
        pdata_pk = pdata[(pdata > plo) & (pdata < phi)]
        hp, bp, _ = pgh.get_hist(pdata_pk, range=(plo, phi), dx=ppb)
        hp_rt = np.divide(hp, rt)
        hp_var = np.array([np.sqrt(h / (rt)) for h in hp])

        # fit a gaussian to get 1 sigma e-values
        ibin_bkg = 50
        bkg0 = np.mean(hp_rt[:ibin_bkg])
        b, h = bp[1:], hp_rt
        imax = np.argmax(h)
        upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h) / 2))][0]
        bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h) / 2))][-1]
        fwhm = upr_half - bot_half
        sig0 = fwhm / 2.355
        amp0 = np.amax(hp_rt) * fwhm
        p_init = [amp0, bp[imax], sig0, bkg0]
        p_fit, p_cov = pgf.fit_hist(pgf.gauss_bkg,
                                    hp_rt,
                                    bp,
                                    var=hp_var,
                                    guess=p_init)
        amp, mu, sigma, bkg = p_fit

        # select events within 1 sigma of the maximum
        # and pull the waveforms from the raw file to make a superpulse.
        idx = np.where((pdata >= mu - sigma) & (pdata <= mu + sigma))
        print(
            f'Pulser at {epk} keV, {len(idx[0])} events.  Limiting to {nwfs}.')
        if len(idx[0]) > nwfs:
            idx = idx[0][:nwfs]

        # grab the 2d numpy array of pulser wfs
        n_rows = idx[-1] + 1  # read up to this event and stop
        f_raw = dg.lh5_dir + '/' + df_row.raw_path + '/' + df_row.raw_file
        tb_wfs, n_wfs = sto.read_object(raw_name, f_raw, n_rows=n_rows)
        pwfs = tb_wfs['values'].nda[idx, :]
        # print(idx, len(idx), pwfs.shape, '\n', pwfs)

        # data cleaning step: remove events with outlier baselines
        bl_means = pwfs[:, :500].mean(axis=1)
        bl_mode = mode(bl_means.astype(int))[0][0]
        bl_ctr = np.subtract(bl_means, bl_mode)
        idx_dc = np.where(np.abs(bl_ctr) < bl_thresh)
        pwfs = pwfs[idx_dc[0], :]
        bl_means = bl_means[idx_dc]
        # print(pwfs.shape, bl_means.shape)

        # baseline subtract (trp when leading (not trailing) dim is the same)
        wfs = (pwfs.transpose() - bl_means).transpose()

        # time-align all wfs at their 50% timepoint (tricky!).
        # adapted from pygama/sandbox/old_dsp/[calculators,transforms].py
        # an alternate approach would be to use ProcessingChain here
        wf_maxes = np.amax(wfs, axis=1)
        timepoints = np.argmax(wfs >= wf_maxes[:, None] * tp_align, axis=1)
        wf_idxs = np.zeros([wfs.shape[0], n_pre + n_post], dtype=int)
        row_idxs = np.zeros_like(wf_idxs)
        for i, tp in enumerate(timepoints):
            wf_idxs[i, :] = np.arange(tp - n_pre, tp + n_post)
            row_idxs[i, :] = i
        wfs = wfs[row_idxs, wf_idxs]

        # take the average to get the superpulse
        superpulse = np.mean(wfs, axis=0)

        # normalize all wfs to the superpulse maximum
        wfmax, tmax = np.amax(superpulse), np.argmax(superpulse)
        superpulse = np.divide(superpulse, wfmax)
        wfs = np.divide(wfs, wfmax)

        # -- plot results --
        if show_plots:
            fig, (p0, p1) = plt.subplots(2, figsize=(7, 8))

            # plot fit result (top), and waveforms + superpulse (bottom)
            xfit = np.arange(plo, phi, ppb * 0.1)
            p0.plot(xfit,
                    pgf.gauss_bkg(xfit, *p_init),
                    '-',
                    c='orange',
                    label='init')
            p0.plot(xfit,
                    pgf.gauss_bkg(xfit, *p_fit),
                    '-',
                    c='red',
                    label='fit')

            # plot 1 sigma window
            p0.axvspan(mu - sigma,
                       mu + sigma,
                       color='m',
                       alpha=0.2,
                       label='1 sigma')

            # plot data
            p0.plot(bp[1:],
                    hp_rt,
                    ds='steps',
                    c='k',
                    lw=1,
                    label=f'{vp:.2f} V')
            p0.set_xlabel(f'onboard energy (keV, c={ecal:.2e})',
                          ha='right',
                          x=1)
            p0.set_ylabel('cts / s', ha='right', y=1)
            p0.legend(fontsize=10)

            # plot individ. wfs
            ts = np.arange(0, len(wfs[0, :]))
            for iwf in range(wfs.shape[0]):
                p1.plot(ts, wfs[iwf, :], '-k', lw=2, alpha=0.5)
            p1.plot(np.nan, np.nan, '-k', label=f'wfs, {epk:.0f} keV')

            # plot superpulse
            p1.plot(ts,
                    superpulse,
                    '-r',
                    lw=2,
                    label=f'superpulse, {vp:.2f} V')

            p1.set_xlabel('time (10 ns)', ha='right', x=1)
            p1.set_ylabel('amplitude', ha='right', y=1)
            p1.legend(fontsize=10)
            # plt.show()
            plt.savefig(f'./plots/superpulse_cyc{cyc}.png', dpi=150)
            plt.cla()

        # save the superpulse to our output file
        return superpulse

    dfp['superpulse'] = dfp.apply(analyze_pulser_run, axis=1)

    # drop the duplicated 'run' row before saving
    dfp = dfp.loc[:, ~dfp.columns.duplicated()]
    # print(dfp.columns)
    print(dfp)

    if write_output:
        print('Saving output file: ', f_super)
        dfp.to_hdf(f_super, key='superpulses')

    t_elap = (time.time() - t_start) / 60
    print(f'Done.  Elapsed: {t_elap:.2f} min.')