def show_lowe_wfs(): """ separate function to show really low-e waveforms after the data cleaning cut """ f_raw = '/Users/wisecg/Data/OPPI/raw/oppi_run0_cyc2027_raw.lh5' f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5' tb_name = 'ORSIS3302DecoderForEnergy/raw' hit_store = lh5.Store() data = hit_store.read_object(tb_name, f_hit) df_hit = data.get_dataframe() # correct energy_first (inplace) to allow negative values df_hit['energy_first'] = df_hit['energy_first'].astype(np.int64) efirst = df_hit['energy_first'].values idx = np.where(efirst > 4e9) eshift = efirst[idx] - 4294967295 efirst[idx] = eshift nwfs = 40 elo, ehi, epb = 1, 10, 0.1 blo, bhi = 57700, 58500 # cut values etype = 'trapE_cal' # noise stops @ 35 keV idx_lowe = df_hit[etype].loc[(df_hit[etype] > elo) & (df_hit[etype] < ehi) & (df_hit.bl > blo) & (df_hit.bl < bhi)] idx_lowe = idx_lowe.index[:nwfs] # print(df_hit.loc[idx_lowe]) # get phys waveforms, normalized by max value i_max = idx_lowe[-1] raw_store = lh5.Store() data_raw = raw_store.read_object(tb_name, f_raw, start_row=0, n_rows=i_max + 1) wfs = data_raw['waveform']['values'].nda wfs_lowe = wfs[idx_lowe.values, :] ts = np.arange(0, wfs_lowe.shape[1], 1) # plot wfs for iwf in range(wfs_lowe.shape[0]): plt.plot(ts, wfs_lowe[iwf, :], lw=1, alpha=0.5) plt.xlabel('time (clock ticks)', ha='right', x=1) plt.ylabel('ADC', ha='right', y=1) # plt.show() plt.savefig('./plots/lowe_wfs.png', dpi=300) plt.cla()
def show_raw_spectrum(): """ show spectrum w/ onbd energy and trapE - get calibration constants for onbd energy and 'trapE' energy - TODO: fit each expected peak and get resolution vs energy """ f_dsp = '/Users/wisecg/Data/OPPI/dsp/oppi_run0_cyc2027_dsp_test.lh5' # we will probably make this part simpler in the near future sto = lh5.Store() groups = sto.ls(f_dsp) data = sto.read_object('ORSIS3302DecoderForEnergy/raw', f_dsp) df_dsp = data.get_dataframe() # from here, we can use standard pandas to work with data print(df_dsp) # elo, ehi, epb, etype = 0, 8e6, 1000, 'energy' # elo, ehi, epb, etype = 0, 8e6, 1000, 'energy' # whole spectrum # elo, ehi, epb, etype = 0, 800000, 1000, 'energy' # < 250 keV elo, ehi, epb, etype = 0, 10000, 10, 'trapE' ene_uncal = df_dsp[etype] hist, bins, _ = pgh.get_hist(ene_uncal, range=(elo, ehi), dx=epb) bins = bins[1:] # trim zero bin, not needed with ds='steps' plt.plot(bins, hist, ds='steps', c='b', lw=2, label=etype) plt.xlabel(etype, ha='right', x=1) plt.ylabel('Counts', ha='right', y=1) plt.legend() plt.tight_layout() plt.show()
def show_cal_spectrum(): """ """ f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5' tb_name = 'ORSIS3302DecoderForEnergy/raw' sto = lh5.Store() groups = sto.ls(f_hit) data = sto.read_object(tb_name, f_hit) df_hit = data.get_dataframe() print(df_hit) # energy in keV elo, ehi, epb = 0, 3000, 0.5 # choose energy estimator etype = 'energy_cal' # etype = 'trapE_cal' hist, bins, _ = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb) bins = bins[1:] # trim zero bin, not needed with ds='steps' plt.plot(bins, hist, ds='steps', c='b', lw=2, label=etype) plt.xlabel(etype, ha='right', x=1) plt.ylabel('Counts', ha='right', y=1) plt.legend() plt.tight_layout() plt.show()
def main(): """ an example of loading an LH5 DSP file and converting to pandas DataFrame. """ # we will probably make this part simpler in the near future f = '/Users/wisecg/Data/lh5/hades_I02160A_r1_191021T162944_th_HS2_top_psa_dsp.lh5' sto = lh5.Store() groups = sto.ls(f) # the example file only has one group, 'raw' data = sto.read_object('raw', f) df_dsp = data.get_dataframe() # from here, we can use standard pandas to work with data print(df_dsp) # one example: create uncalibrated energy spectrum, # using a pygama helper function to get the histogram elo, ehi, epb = 0, 100000, 10 ene_uncal = df_dsp['trapE'] hist, bins, _ = pgh.get_hist(ene_uncal, range=(elo, ehi), dx=epb) bins = bins[1:] # trim zero bin, not needed with ds='steps' plt.semilogy(bins, hist, ds='steps', c='b', label='trapE') plt.xlabel('trapE', ha='right', x=1) plt.ylabel('Counts', ha='right', y=1) plt.legend() plt.tight_layout() plt.show()
def get_runtimes(dg): """ $ ./setup.py --runtime Get the Ge runtime of each cycle file (in seconds). Add a 'ge_runtime' column to the fileDB. Requires the raw LH5 files. """ dg.load_df() # dg.fileDB = dg.fileDB[50:55] # debug only # reset columns of interest new_cols = ['runtime', 'rt_std'] for col in new_cols: if col in dg.fileDB.columns: dg.fileDB.drop(col, axis=1, inplace=True) sto = lh5.Store() t_start = time.time() def runtime_cycle(df_row): # load raw file path (with {these} in it) f_raw = f'{dg.lh5_dir}/{df_row.raw_path}/{df_row.raw_file}' f_raw = f_raw.format_map({'sysn': 'geds'}) # always look for Ge f_key = df_row.raw_file.format_map({'sysn': 'geds'}) if not os.path.exists(f_raw): # print(f'no Ge data: {f_key}') return pd.Series({'runtime': 0, 'rt_std': 0}) # for PGT, compare the first three channels (for redundancy) rts = [] ge_groups = sto.ls(f_raw) for ge in ge_groups[:3]: ts = lh5.load_nda([f_raw], ['timestamp'], ge + '/raw/')['timestamp'] rts.append(ts[-1]) # take largest value & compute uncertainty runtime = max(rts) / 60 rt_std = np.std(np.array([rts])) # print(f_key, runtime, rt_std) return pd.Series({'runtime': runtime, 'rt_std': rt_std}) # df_tmp = dg.fileDB.apply(runtime_cycle, axis=1) dg.fileDB[new_cols] = dg.fileDB.progress_apply(runtime_cycle, axis=1) print(f'Done. Time elapsed: {(time.time()-t_start)/60:.2f} mins.') # save to fileDB if everything looks OK print(dg.fileDB) print(dg.fileDB.columns) print('FileDB location:', dg.config['fileDB']) ans = input('Save new fileDB? (y/n)') if ans.lower() == 'y': dg.save_df(dg.config['fileDB'])
def dsp_to_hit(): """ save calibrated energies into the dsp file. this is a good example of adding a column, reading & writing to an LH5 file. """ f_dsp = '/Users/wisecg/Data/OPPI/dsp/oppi_run0_cyc2027_dsp_test.lh5' f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5' sto = lh5.Store() groups = sto.ls(f_dsp) tb_name = 'ORSIS3302DecoderForEnergy/raw' data = sto.read_object(tb_name, f_dsp) df_dsp = data.get_dataframe() # add a new column for each energy estimator of interest for etype in ['energy', 'trapE']: ecal_name = etype + '_cal' pfit = linear_cal(etype) df_dsp[ecal_name] = df_dsp[etype] * pfit[0] + pfit[1] e_cal_lh5 = lh5.Array(df_dsp[ecal_name].values, attrs={'units': 'keV'}) data.add_field(f'{etype}_cal', e_cal_lh5) # write to hit file. delete if exists, LH5 overwrite is broken rn if os.path.exists(f_hit): os.remove(f_hit) sto.write_object(data, tb_name, f_hit)
def show_wfs(dg): """ show waveforms in different enery regions. use the hit file to select events """ # get file list and load hit data lh5_dir = os.path.expandvars(dg.config['lh5_dir']) hit_list = lh5_dir + dg.file_keys['hit_path'] + '/' + dg.file_keys[ 'hit_file'] df_hit = lh5.load_dfs(hit_list, ['trapEmax'], 'ORSIS3302DecoderForEnergy/hit') print(df_hit) print(df_hit.columns) # settings etype = 'trapEmax_cal' nwfs = 20 # elo, ehi, epb = 0, 100, 0.2 # low-e region elo, ehi, epb = 0, 20, 0.2 # noise region # elo, ehi, epb = 1458, 1468, 1 # good physics events # elo, ehi, epb = 6175, 6250, 1 # overflow peak # elo, ehi, epb = 5000, 5200, 0.2 # lower overflow peak # # diagnostic plot # hE, xE, vE = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb) # plt.plot(xE[1:], hE, c='b', ds='steps') # plt.show() # exit() # select waveforms idx = df_hit[etype].loc[(df_hit[etype] >= elo) & (df_hit[etype] <= ehi)].index[:nwfs] raw_store = lh5.Store() tb_name = 'ORSIS3302DecoderForEnergy/raw' raw_list = lh5_dir + dg.file_keys['raw_path'] + '/' + dg.file_keys[ 'raw_file'] f_raw = raw_list.values[0] # fixme, only works for one file rn data_raw = raw_store.read_object(tb_name, f_raw, start_row=0, n_rows=idx[-1] + 1) wfs_all = data_raw['waveform']['values'].nda wfs = wfs_all[idx.values, :] ts = np.arange(0, wfs.shape[1], 1) # plot wfs for iwf in range(wfs.shape[0]): plt.plot(ts, wfs[iwf, :], lw=1) plt.xlabel('time (clock ticks)', ha='right', x=1) plt.ylabel('ADC', ha='right', y=1) plt.show()
def get_data(files, groupname, e_param='trapE'): """ loop over file list, access energy array from LH5, concat arrays together return array """ dsp = lh5.Store() energies = [] if isinstance(files, list): for file in files: filename = os.path.expandvars(file) data = dsp.read_object(groupname, filename) energy = data[e_param].nda energies.extend(energy) else: filename = os.path.expandvars(files) data = dsp.read_object(groupname, filename) energy = data[e_param].nda energies.extend(energy) return np.asarray(energies)
def show_groups(): """ show example of accessing the names of the HDF5 groups in our LH5 files """ f_raw = '/Users/wisecg/Data/OPPI/raw/oppi_run0_cyc2027_raw.lh5' f_dsp = '/Users/wisecg/Data/OPPI/dsp/oppi_run0_cyc2027_dsp_test.lh5' f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5' # h5py method # hf = h5py.File(f_raw) # hf = h5py.File(f_dsp) # some examples of navigating the groups # print(hf.keys()) # print(hf['ORSIS3302DecoderForEnergy/raw'].keys()) # print(hf['ORSIS3302DecoderForEnergy/raw/waveform'].keys()) # exit() # lh5 method sto = lh5.Store() groups = sto.ls(f_dsp) data = sto.read_object('ORSIS3302DecoderForEnergy/raw', f_dsp) # testing -- make sure data columns all have same shape for col in data.keys(): print(col, data[col].nda.shape) # directly access timestamps in a raw file w/o loading all the wfs # groups = sto.ls(f_raw, 'ORSIS3302DecoderForEnergy/raw/') # data = sto.read_object('ORSIS3302DecoderForEnergy/raw/timestamp', f_raw) # ts = data.nda # check pandas conversion df_dsp = data.get_dataframe() print(df_dsp.columns) print(df_dsp)
def check_wfs(dg): """ somebody inevitably asks you, 'have you looked at the waveforms?' in this function, compare alpha wfs to gamma wfs use the temp_results file to pick indexes, and grab the corresponding wfs. LH5 doesn't let us only load particular indexes (yet), so we have to load all the waveforms in the file every time. butts. """ # load dsp results cycle = dg.fileDB['cycle'].values[0] df_dsp = pd.read_hdf(f'./temp_{cycle}.h5', 'opt_dcr') # load waveforms sto = lh5.Store() lh5_dir = os.path.expandvars(dg.config['lh5_dir']) raw_list = lh5_dir + dg.fileDB['raw_path'] + '/' + dg.fileDB['raw_file'] f_raw = raw_list.values[0] tb_wfs = sto.read_object('ORSIS3302DecoderForEnergy/raw/waveform', f_raw) # energy cut et = 'trapEmax' # elo, ehi = 8000, 16000 # elo, ehi = 8000, 10000 elo, ehi = 12000, 13000 # dcr cut # alp_lo, alp_hi = -0.5, 0.5 # gam_lo, gam_hi = 0.8, 1.2 # aoe cut alp_lo, alp_hi = 0.064, 0.068 gam_lo, gam_hi = 0.05, 0.06 # selection idx_alp = df_dsp[et].loc[(df_dsp[et] > elo) & (df_dsp[et] < ehi) & (df_dsp.aoe > alp_lo) & (df_dsp.aoe < alp_hi)].index idx_gam = df_dsp[et].loc[(df_dsp[et] > elo) & (df_dsp[et] < ehi) & (df_dsp.aoe > gam_lo) & (df_dsp.aoe < gam_hi)].index wfs_alp = tb_wfs['values'].nda[idx_alp] wfs_gam = tb_wfs['values'].nda[idx_gam] print(f'found {wfs_alp.shape[0]} alpha candidates') print(f'found {wfs_gam.shape[0]} gamma candidates') # plot # fig, (p0, p1) = plt.subplots(2, 1, figsize=(8, 8)) ts = np.arange(0, wfs_gam.shape[1], 1) n_gam = 10 if wfs_gam.shape[0] > 10 else wfs_gam.shape[0] for iwf in range(n_gam): max = np.amax(wfs_gam[iwf, :]) # max = df_dsp[et].values[iwf] plt.plot(ts[:-1], wfs_gam[iwf, :-1] / max, '-b', lw=1, alpha=0.5) n_alp = 10 if wfs_alp.shape[0] > 10 else wfs_alp.shape[0] for iwf in range(n_alp): max = np.amax(wfs_alp[iwf, :]) # max = df_dsp[et].values[iwf] plt.plot(ts[:-1], wfs_alp[iwf, :-1] / max, '-r', lw=1, alpha=0.5) # plt.xlim(1 plt.xlabel('time (clock ticks)', ha='right', x=1) plt.ylabel('ADC', ha='right', y=1) plt.show()
def optimize_dcr(dg): """ I don't have an a priori figure of merit for the DCR parameter, until I can verify that we're seeing alphas. So this function should just run processing on a CAGE run with known alpha events, and show you the 2d DCR vs. energy. Once we know we can reliably measure the alpha distribution somehow, then perhaps we can try a grid search optimization like the one done in optimize_trap. """ # files to consider. fixme: right now only works with one file sto = lh5.Store() lh5_dir = os.path.expandvars(dg.config['lh5_dir']) raw_list = lh5_dir + dg.fileDB['raw_path'] + '/' + dg.fileDB['raw_file'] f_raw = raw_list.values[0] tb_raw = 'ORSIS3302DecoderForEnergy/raw/' tb_data = sto.read_object(tb_raw, f_raw) cycle = dg.fileDB['cycle'].values[0] f_results = f'./temp_{cycle}.h5' write_output = True # adjust dsp config with open('opt_dcr.json') as f: dsp_config = json.load(f, object_pairs_hook=OrderedDict) # pprint(dsp_config) # exit() # set dcr parameters # rise, flat, dcr_tstart = 200, 1000, 'tp_0+1.5*us' # default # dcr_rise, dcr_flat, dcr_tstart = 100, 3000, 'tp_0+3*us' # best so far? dcr_rise, dcr_flat, dcr_tstart = 100, 2500, 'tp_0+1*us' dsp_config['processors']['dcr_raw']['args'][1] = dcr_rise dsp_config['processors']['dcr_raw']['args'][2] = dcr_flat dsp_config['processors']['dcr_raw']['args'][3] = dcr_tstart # set trap energy parameters # ene_rise, ene_flat = "2*us", "1*us" # best? from optimize_trap ene_rise, ene_flat = "10*us", "5*us" dsp_config['processors']['wf_trap']['args'][1] = ene_rise dsp_config['processors']['wf_trap']['args'][2] = ene_flat # adjust pole-zero constant dsp_config['processors']['wf_pz']['defaults']['db.pz.tau'] = '64.4*us' # dsp_config['processors']['wf_pz']['defaults']['db.pz.tau'] = '50*us' # dsp_config['processors']['wf_pz']['defaults']['db.pz.tau'] = '100*us' # run dsp print('Running DSP ...') t_start = time.time() pc, tb_out = build_processing_chain(tb_data, dsp_config, verbosity=1) pc.execute() t_elap = (time.time() - t_start) / 60 print(f'Done. Elapsed: {t_elap:.2f} min') df_out = tb_out.get_dataframe() if write_output: df_out.to_hdf(f_results, key='opt_dcr') print('Wrote output file:', f_results)
def raw_to_dsp(f_raw, f_dsp, dsp_config, lh5_tables=None, verbose=1, outputs=None, n_max=np.inf, overwrite=True, buffer_len=3200, block_width=8): """ Uses the ProcessingChain class. The list of processors is specifed via a JSON file. """ t_start = time.time() if isinstance(dsp_config, str): with open(dsp_config, 'r') as config_file: dsp_config = json.load(config_file, object_pairs_hook=OrderedDict) if not isinstance(dsp_config, dict): raise Exception('Error, dsp_config must be an dict') raw_store = lh5.Store() lh5_file = raw_store.gimme_file(f_raw, 'r') # if no group is specified, assume we want to decode every table in the file if lh5_tables is None: lh5_tables = [] lh5_tables_temp = raw_store.ls(f_raw) # sometimes 'raw' is nested, e.g g024/raw for tb in lh5_tables_temp: if "raw" not in tb: tbname = raw_store.ls(lh5_file[tb])[0] if "raw" in tbname: tb = tb + '/' + tbname # g024 + /raw lh5_tables.append(tb) # make sure every group points to waveforms, if not, remove the group for tb in lh5_tables: if 'raw' not in tb: lh5_tables.remove(tb) # delete the old file. TODO: ONCE BUGS ARE FIXED IN LH5 MODULE, DO THIS ONLY IF OVERWRITE IS TRUE! try: os.remove(f_dsp) print("Deleted", f_dsp) except: pass for tb in lh5_tables: # load primary table and build processing chain and output table tot_n_rows = raw_store.read_n_rows(tb, f_raw) if n_max and n_max < tot_n_rows: tot_n_rows = n_max lh5_in, n_rows_read = raw_store.read_object(tb, f_raw, 0, buffer_len) pc, tb_out = build_processing_chain(lh5_in, dsp_config, outputs, verbose, block_width) print(f'Processing table: {tb} ...') for start_row in range(0, tot_n_rows, buffer_len): if verbose > 0: update_progress(start_row / tot_n_rows) lh5_in, n_rows = raw_store.read_object(tb, f_raw, start_row=start_row, obj_buf=lh5_in) n_rows = min(tot_n_rows - start_row, n_rows) pc.execute(0, n_rows) raw_store.write_object(tb_out, tb.replace('/raw', '/dsp'), f_dsp, n_rows=n_rows) if verbose > 0: update_progress(1) print(f'Done. Writing to file ...') # write processing metadata dsp_info = lh5.Struct() dsp_info.add_field('timestamp', lh5.Scalar(np.uint64(time.time()))) dsp_info.add_field('python_version', lh5.Scalar(sys.version)) dsp_info.add_field('numpy_version', lh5.Scalar(np.version.version)) dsp_info.add_field('h5py_version', lh5.Scalar(h5py.version.version)) dsp_info.add_field('hdf5_version', lh5.Scalar(h5py.version.hdf5_version)) dsp_info.add_field('pygama_version', lh5.Scalar(pygama_version)) dsp_info.add_field('pygama_branch', lh5.Scalar(git.branch)) dsp_info.add_field('pygama_revision', lh5.Scalar(git.revision)) dsp_info.add_field('pygama_date', lh5.Scalar(git.commit_date)) dsp_info.add_field('dsp_config', lh5.Scalar(json.dumps(dsp_config, indent=2))) raw_store.write_object(dsp_info, 'dsp_info', f_dsp) t_elap = (time.time() - t_start) / 60 print(f'Done processing. Time elapsed: {t_elap:.2f} min.')
if len(sys.argv) != 5: print('Usage: python', sys.argv[0], '[filename] [table_path] [buffer_size] [arr_col]') print( ' where arr_col is the name of an Array-like object in one of the table columns.' ) sys.exit() filename = sys.argv[1] name = sys.argv[2] buffer_size = int(sys.argv[3]) arr_col = sys.argv[4] n_iter = 4 test_rows = n_iter * buffer_size store = lh5.Store() comp_table, n_rows_read = store.read_object(name, filename, n_rows=test_rows) table_buf = store.get_buffer(name, filename, size=buffer_size) success_its = 0 for i_it in range(n_iter): print('iteration', i_it) start_row = i_it * buffer_size table_buf, n_rows_read = store.read_object(name, filename, start_row=start_row, obj_buf=table_buf) if n_rows_read == 0: print('n_rows_read = 0')
def __init__(self, files_in, lh5_group, dsp_config=None, n_drawn=1, x_unit='ns', x_lim=None, waveforms='waveform', lines=None, legend=None, norm=None, align=None, selection=None, buffer_len=128, block_width=8, verbosity=1): """Constructor for WaveformBrowser: - file_in: name of file or list of names to browse. Can use wildcards - lh5_group: name of LH5 group in file to browse - dsp_config (optional): name of DSP config json file containing transforms available to draw - n_drawn (default 1): number of events to draw simultaneously when calling DrawNext - x_unit (default ns): unit for x-axis - x_lim (default auto): range of x-values passes as tuple - waveforms (default 'waveform'): name of wf or list of wf names to draw - lines (default None): name of parameter or list of parameters to draw hlines and vlines for - legend (default None): name of parameters to include in legend - norm (default None): name of parameter (probably energy) to use to normalize WFs; useful when drawing multiple - align (default None): name of time parameter to set as 0 time; useful for aligning multiple waveforms - selection (optional): selection of events to draw. Can be either a list of event indices or a numpy array mask (ala pandas). - buffer_len (default 128): number of waveforms to keep in memory at a time - block_width (default 8): block width for processing chain """ self.verbosity = verbosity # data i/o initialization self.lh5_st = lh5.Store(keep_open=True) if isinstance(files_in, str): files_in = [files_in] # Expand wildcards and map out the files self.lh5_files = [ f for f_wc in files_in for f in sorted(glob.glob(os.path.expandvars(f_wc))) ] self.lh5_group = lh5_group # file map is cumulative lenght of files up to file n. By doing searchsorted left, we can get the file for a given wf index self.file_map = np.array( [self.lh5_st.read_n_rows(lh5_group, f) for f in self.lh5_files], 'int64') np.cumsum(self.file_map, out=self.file_map) # Get the input buffer and read the first chunk self.lh5_in = self.lh5_st.get_buffer(self.lh5_group, self.lh5_files[0], buffer_len) self.lh5_st.read_object(self.lh5_group, self.lh5_files[0], 0, buffer_len, self.lh5_in) self.buffer_len = buffer_len self.current_file = None self.current_chunk = None # initialize stuff for iteration self.selection = selection self.index_it = None self.reset() self.n_drawn = n_drawn # initialize list of objects to draw if isinstance(waveforms, str): self.waveforms = [waveforms] elif waveforms is None: self.waveforms = [] else: self.waveforms = list(waveforms) if isinstance(lines, str): self.lines = [lines] elif lines is None: self.lines = [] else: self.lines = list(lines) if isinstance(legend, str): self.legend = [legend] elif legend is None: self.legend = [] else: self.legend = list(legend) self.labels = [] self.norm_par = norm self.align_par = align self.x_unit = units.unit_parser.parse_unit(x_unit) self.x_lim = x_lim # make processing chain and output buffer outputs = self.waveforms + self.lines + self.legend + ( [self.norm_par] if self.norm_par is not None else []) + ([self.align_par] if self.align_par is not None else []) self.proc_chain, self.lh5_out = build_processing_chain( self.lh5_in, dsp_config, outputs, verbosity=self.verbosity, block_width=block_width) self.fig = None self.ax = None
def raw_to_dsp(ds, overwrite=False, nevt=None, test=False, verbose=2, block=8, group=''): """ Run raw_to_dsp on a set of runs. [raw file] ---> [dsp_run{}.lh5] (digital signal processing results) """ for run in ds.runs: raw_file = "/lfs/l1/legend/users/dandrea/pygama/pgt/tier1/pgt_longtrace_run0117-20200110-105115-calib_raw.lh5" dsp_file = "/lfs/l1/legend/users/dandrea/pygama/pgt/tier2/pgt_longtrace_run0117-20200110-105115-calib_dsp.lh5" #raw_file = ds.paths[run]["raw_path"] #dsp_file = ds.paths[run]["dsp_path"] print("raw_file: ",raw_file) print("dsp_file: ",dsp_file) if dsp_file is not None and overwrite is False: continue if dsp_file is None: # declare new file name dsp_file = raw_file.replace('raw_', 'dsp_') if test: print("test mode (dry run), processing raw file:", raw_file) continue print("Definition of new LH5 version") #f_lh5 = lh5.Store() #data = f_lh5.read_object("raw", raw_file) #wf_in = data['waveform']['values'].nda #dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units']) lh5_in = lh5.Store() #groups = lh5_in.ls(raw_file, group) f = h5py.File(raw_file,'r') print("File info: ",f.keys()) for group in f.keys(): print("Processing: " + raw_file + '/' + group) #data = lh5_in.read_object(group, raw_file) data = f[group]['raw'] #wf_in = data['waveform']['values'].nda #dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units']) wf_in = data['waveform']['values'][()] dt = data['waveform']['dt'][0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units']) # Parameters for DCR calculation dcr_trap_int = 200 dcr_trap_flat = 1000 dcr_trap_startSample = 1200 # Set up processing chain proc = ProcessingChain(block_width=block, clock_unit=dt, verbosity=verbose) proc.add_input_buffer("wf", wf_in, dtype='float32') # Basic Filters proc.add_processor(mean_stdev, "wf[0:1000]", "bl", "bl_sig") proc.add_processor(np.subtract, "wf", "bl", "wf_blsub") proc.add_processor(pole_zero, "wf_blsub", 145*us, "wf_pz") proc.add_processor(trap_norm, "wf_pz", 10*us, 5*us, "wf_trap") proc.add_processor(asymTrapFilter, "wf_pz", 0.05*us, 2*us, 4*us, "wf_atrap") # Timepoint calculation proc.add_processor(np.argmax, "wf_blsub", 1, "t_max", signature='(n),()->()', types=['fi->i']) proc.add_processor(time_point_frac, "wf_blsub", 0.95, "t_max", "tp_95") proc.add_processor(time_point_frac, "wf_blsub", 0.8, "t_max", "tp_80") proc.add_processor(time_point_frac, "wf_blsub", 0.5, "t_max", "tp_50") proc.add_processor(time_point_frac, "wf_blsub", 0.2, "t_max", "tp_20") proc.add_processor(time_point_frac, "wf_blsub", 0.05, "t_max", "tp_05") proc.add_processor(time_point_thresh, "wf_atrap[0:2000]", 0, "tp_0") # Energy calculation proc.add_processor(np.amax, "wf_trap", 1, "trapEmax", signature='(n),()->()', types=['fi->f']) proc.add_processor(fixed_time_pickoff, "wf_trap", "tp_0+(5*us+9*us)", "trapEftp") proc.add_processor(trap_pickoff, "wf_pz", 1.5*us, 0, "tp_0", "ct_corr") # Current calculation proc.add_processor(avg_current, "wf_pz", 10, "curr(len(wf_pz)-10, f)") proc.add_processor(np.amax, "curr", 1, "curr_amp", signature='(n),()->()', types=['fi->f']) proc.add_processor(np.divide, "curr_amp", "trapEftp", "aoe") # DCR calculation: use slope using 1000 samples apart and averaging 200 # samples, with the start 1.5 us offset from t0 proc.add_processor(trap_pickoff, "wf_pz", 200, 1000, "tp_0+1.5*us", "dcr_unnorm") proc.add_processor(np.divide, "dcr_unnorm", "trapEftp", "dcr") # Tail slope. Basically the same as DCR, except with no PZ correction proc.add_processor(linear_fit, "wf_blsub[3000:]", "wf_b", "wf_m") proc.add_processor(np.divide, "-wf_b", "wf_m", "tail_rc") #add zac filter energy calculation sigma = 10*us flat = 1*us decay = 160*us proc.add_processor(zac_filter, "wf", sigma, flat, decay, "wf_zac(101, f)") proc.add_processor(np.amax, "wf_zac", 1, "zacE", signature='(n),()->()', types=['fi->f']) # Set up the LH5 output lh5_out = lh5.Table(size=proc._buffer_len) lh5_out.add_field("zacE", lh5.Array(proc.get_output_buffer("zacE"), attrs={"units":"ADC"})) lh5_out.add_field("trapEmax", lh5.Array(proc.get_output_buffer("trapEmax"), attrs={"units":"ADC"})) lh5_out.add_field("trapEftp", lh5.Array(proc.get_output_buffer("trapEftp"), attrs={"units":"ADC"})) lh5_out.add_field("ct_corr", lh5.Array(proc.get_output_buffer("ct_corr"), attrs={"units":"ADC*ns"})) lh5_out.add_field("bl", lh5.Array(proc.get_output_buffer("bl"), attrs={"units":"ADC"})) lh5_out.add_field("bl_sig", lh5.Array(proc.get_output_buffer("bl_sig"), attrs={"units":"ADC"})) lh5_out.add_field("A", lh5.Array(proc.get_output_buffer("curr_amp"), attrs={"units":"ADC"})) lh5_out.add_field("AoE", lh5.Array(proc.get_output_buffer("aoe"), attrs={"units":"ADC"})) lh5_out.add_field("dcr", lh5.Array(proc.get_output_buffer("dcr"), attrs={"units":"ADC"})) lh5_out.add_field("tp_max", lh5.Array(proc.get_output_buffer("tp_95", unit=us), attrs={"units":"us"})) lh5_out.add_field("tp_95", lh5.Array(proc.get_output_buffer("tp_95", unit=us), attrs={"units":"us"})) lh5_out.add_field("tp_80", lh5.Array(proc.get_output_buffer("tp_80", unit=us), attrs={"units":"us"})) lh5_out.add_field("tp_50", lh5.Array(proc.get_output_buffer("tp_50", unit=us), attrs={"units":"us"})) lh5_out.add_field("tp_20", lh5.Array(proc.get_output_buffer("tp_20", unit=us), attrs={"units":"us"})) lh5_out.add_field("tp_05", lh5.Array(proc.get_output_buffer("tp_05", unit=us), attrs={"units":"us"})) lh5_out.add_field("tp_0", lh5.Array(proc.get_output_buffer("tp_0", unit=us), attrs={"units":"us"})) lh5_out.add_field("tail_rc", lh5.Array(proc.get_output_buffer("tail_rc", unit=us), attrs={"units":"us"})) print("Processing:\n",proc) proc.execute() #groupname = group[:group.rfind('/')+1]+"data" groupname = group+"/data" print("Writing to: " + dsp_file + "/" + groupname) lh5_in.write_object(lh5_out, groupname, dsp_file)
def get_runtimes(dg, overwrite=False, batch_mode=False): """ $ ./setup.py --rt Compute runtime (# minutes in run) and stopTime (unix timestamp) using the timestamps in the DSP file. NOTE: Could change this to use the raw file timestamps instead of dsp file, but that still makes this function dependent on a processing step. NOTE: CAGE uses struck channel 2 (0-indexed) """ print('Scanning DSP files for runtimes ...') # load existing fileDB dg.load_df() # first-time setup if 'runtime' not in dg.file_keys.columns or overwrite: df_keys = dg.file_keys.copy() update_existing = False print('Re-scanning entire fileDB') elif 'runtime' in dg.file_keys.columns: # look for any rows with nans to update idx = dg.file_keys.loc[pd.isna(dg.file_keys['runtime']), :].index if len(idx) > 0: df_keys = dg.file_keys.loc[idx].copy() print(f'Found {len(df_keys)} new files without runtime:') print(df_keys) update_existing = True else: print('No empty runtime values found.') if len(df_keys) == 0: print('No files to update. Exiting...') exit() # clear new colums if they exist new_cols = ['stopTime', 'runtime'] for col in new_cols: if col in df_keys.columns: df_keys.drop(col, axis=1, inplace=True) sto = lh5.Store() def get_runtime(df_row): # load timestamps from dsp file f_dsp = dg.lh5_dir + df_row['dsp_path'] + '/' + df_row['dsp_file'] if not os.path.exists(f_dsp) and not df_row.skip: print(f"Error, file doesn't exist:\n {f_dsp}") exit() elif df_row.skip: print(f'Skipping cycle file:\n {f_dsp}') return pd.Series({'stopTime':0, 'runtime':0}) data, n_rows = sto.read_object('ORSIS3302DecoderForEnergy/dsp', f_dsp) # correct for timestamp rollover clock = 100e6 # 100 MHz UINT_MAX = 4294967295 # (0xffffffff) t_max = UINT_MAX / clock # ts = data['timestamp'].nda.astype(np.int64) # must be signed for np.diff ts = data['timestamp'].nda / clock # converts to float tdiff = np.diff(ts) tdiff = np.insert(tdiff, 0 , 0) iwrap = np.where(tdiff < 0) iloop = np.append(iwrap[0], len(ts)) ts_new, t_roll = [], 0 for i, idx in enumerate(iloop): ilo = 0 if i==0 else iwrap[0][i-1] ihi = idx ts_block = ts[ilo:ihi] t_last = ts[ilo-1] t_diff = t_max - t_last ts_new.append(ts_block + t_roll) t_roll += t_last + t_diff ts_corr = np.concatenate(ts_new) # calculate runtime and unix stopTime rt = ts_corr[-1] / 60 # minutes st = int(np.ceil(df_row['startTime'] + rt * 60)) return pd.Series({'stopTime':st, 'runtime':rt}) df_tmp = df_keys.progress_apply(get_runtime, axis=1) df_keys[new_cols] = df_tmp if update_existing: idx = dg.file_keys.loc[pd.isna(dg.file_keys['runtime']), :].index dg.file_keys.loc[idx] = df_keys else: dg.file_keys = df_keys dbg_cols = ['run', 'cycle', 'unique_key', 'startTime', 'runtime'] print(dg.file_keys[dbg_cols]) print('Ready to save. This will overwrite any existing fileDB.') if not batch_mode: ans = input('Save updated fileDB? (y/n):') if ans.lower() == 'y': dg.file_keys = df_keys dg.save_df(dg.config['fileDB']) print('fileDB updated.') else: dg.file_keys = df_keys dg.save_df(dg.config['fileDB']) print('fileDB updated.')
def plot_dsp(dg): """ create a DataFrame from the dsp files and make some 1d and 2d diagnostic plots. for reference, current 12/30/20 dsp parameters: ['channel', 'timestamp', 'energy', 'bl', 'bl_sig', 'trapEftp', 'trapEmax', 'triE', 'tp_max', 'tp_0', 'tp_10', 'tp_50', 'tp_80', 'tp_90', 'A_10', 'AoE', 'dcr_raw', 'dcr_max', 'dcr_ftp', 'hf_max'] columns added by this code: ['run', 'cycle', 'ts_sec', 'ts_glo'] """ sto = lh5.Store() dsp_name = 'ORSIS3302DecoderForEnergy/dsp' wfs_name = 'ORSIS3302DecoderForEnergy/raw/waveform' def get_dsp_dfs(df_row): """ grab the dsp df, add some columns, and return it """ f_dsp = dg.lh5_dir + '/' + df_row.dsp_path + '/' + df_row.dsp_file if len(f_dsp) > 1: print('Error, this part is supposed to only load individual files') exit() f_dsp = f_dsp.iloc[0] run, cyc = df_row.run.iloc[0], df_row.cycle.iloc[0] # print(run, cyc, f_dsp) # grab the dataframe and add some columns tb, nr = sto.read_object(dsp_name, f_dsp) df = tb.get_dataframe() df['run'] = run df['cycle'] = cyc # need global timestamp. just calculate here instead of making hit files clock = 100e6 # 100 MHz UINT_MAX = 4294967295 # (0xffffffff) t_max = UINT_MAX / clock ts = df['timestamp'].values / clock tdiff = np.diff(ts) tdiff = np.insert(tdiff, 0, 0) iwrap = np.where(tdiff < 0) iloop = np.append(iwrap[0], len(ts)) ts_new, t_roll = [], 0 for i, idx in enumerate(iloop): ilo = 0 if i == 0 else iwrap[0][i - 1] ihi = idx ts_block = ts[ilo:ihi] t_last = ts[ilo - 1] t_diff = t_max - t_last ts_new.append(ts_block + t_roll) t_roll += t_last + t_diff df['ts_sec'] = np.concatenate(ts_new) t_start = df_row.startTime.iloc[0] df['ts_glo'] = df['ts_sec'] + t_start # print(df) return df # create the multi-cycle DataFrame df_dsp = dg.fileDB.groupby(['cycle']).apply(get_dsp_dfs) df_dsp.reset_index(inplace=True, drop=True) # << VERY IMPORTANT! print(df_dsp) print(df_dsp.columns) # 1. 1d energy histogram -- use this to select energy range of interest et = 'trapEmax' elo, ehi, epb = 0, 10000, 10 edata = df_dsp.trapEmax.values hist, bins, _ = pgh.get_hist(edata, range=(elo, ehi), dx=epb) plt.semilogy(bins[1:], hist, ds='steps', c='b', lw=1) plt.xlabel(et, ha='right', x=1) plt.ylabel('Counts', ha='right', y=1) # plt.show() plt.savefig('./plots/risingedge_1dspec.pdf') plt.cla() # 2. 2d histo: show risetime vs. time for wfs in an energy range # choose risetime range (usec) # rlo, rhi, rpb = 0, 5, 0.1 # run 110 (good) rlo, rhi, rpb = 0, 50, 1 # run 111 (bad) # select energy range elo, ehi, epb = 1500, 1600, 0.5 df = df_dsp.query(f'trapEmax > {elo} and trapEmax < {ehi}').copy() # calculate timestamp range t0 = df_dsp.iloc[0]['ts_glo'] df['ts_adj'] = (df.ts_glo - t0) / 60 # minutes after t0 tlo, thi, tpb = 0, df.ts_adj.max(), 1 # compute t50-100 risetime df['rt_us'] = (df.tp_max - df.tp_50) / 1e3 # convert ns to us # print(df[['tp_max', 'tp_50', 'rt_us']]) nbx, nby = int((thi - tlo) / tpb), int((rhi - rlo) / rpb) plt.hist2d(df['ts_adj'], df['rt_us'], bins=[nbx, nby], range=[[tlo, thi], [rlo, rhi]], cmap='jet') plt.xlabel('Time (min)', ha='right', x=1) plt.ylabel('Rise Time (t50-100), usec', ha='right', y=1) # plt.show() plt.savefig('./plots/risingedge_2dRisetime.png', dpi=150) plt.cla() # 3. 1st 10 wfs from energy region selection (requires raw file) # this assumes the first file has 10 events db = dg.fileDB.iloc[0] cyc = db.cycle f_raw = dg.lh5_dir + '/' + db.raw_path + '/' + db.raw_file f_dsp = dg.lh5_dir + '/' + db.dsp_path + '/' + db.dsp_file edata = lh5.load_nda([f_dsp], ['trapEmax'], dsp_name)['trapEmax'] idx = np.where((edata >= elo) & (edata <= ehi)) nwfs = 10 idx_sel = idx[0][:nwfs] n_rows = idx_sel[-1] + 1 # read up to this event and stop tb_wfs, n_wfs = sto.read_object(wfs_name, f_raw, n_rows=n_rows) # grab the 2d numpy array of waveforms wfs = tb_wfs['values'].nda[idx_sel, :] ts = np.arange(0, len(wfs[0, :-2])) / 1e2 # usec for iwf in range(wfs.shape[0]): plt.plot(ts, wfs[iwf, :-2], lw=2, alpha=0.5) plt.xlabel('Time (us)', ha='right', x=1) plt.ylabel('ADC', ha='right', y=1) plt.show()
def optimize_trap(dg): """ Generate a file with grid points to search, and events from the target peak. Then run DSP a bunch of times on the small table, and fit the peak w/ the peakshape function. NOTE: run table-to-table DSP (no file I/O) """ f_peak = './temp_peak.lh5' # lh5 f_results = './temp_results.h5' # pandas grp_data, grp_grid = '/optimize_data', '/optimize_grid' # epar, elo, ehi, epb = 'energy', 0, 1e7, 10000 # full range epar, elo, ehi, epb = 'energy', 3.88e6, 3.92e6, 500 # K40 peak show_movie = True write_output = True n_rows = None # default None with open('opt_trap.json') as f: dsp_config = json.load(f, object_pairs_hook=OrderedDict) # files to consider. fixme: right now only works with one file sto = lh5.Store() lh5_dir = os.path.expandvars(dg.config['lh5_dir']) raw_list = lh5_dir + dg.fileDB['raw_path'] + '/' + dg.fileDB['raw_file'] f_raw = raw_list.values[0] tb_raw = 'ORSIS3302DecoderForEnergy/raw/' # quick check of the energy range # ene_raw = sto.read_object(tb_raw+'/'+epar, f_raw).nda # hist, bins, var = pgh.get_hist(ene_raw, range=(elo, ehi), dx=epb) # plt.plot(bins[1:], hist, ds='steps') # plt.show() # exit() # set grid parameters # TODO: jason's suggestions, knowing the expected shape of the noise curve # e_rises = np.linspace(-1, 0, sqrt(sqrt(3)) # e_rises # make another list which is 10^pwr of this list # np.linspace(log_tau_min, log_tau_max) # try this too e_rises = np.arange(1, 12, 1) e_flats = np.arange(1, 6, 1) # rc_consts = np.arange(54, 154, 10) # changing this here messes up DCR # -- create the grid search file the first time -- # NOTE: this makes a linear grid, and is editable by the arrays above. # jason also proposed a more active gradient-descent style search # like with Brent's method. (https://en.wikipedia.org/wiki/Brent%27s_method) if True: # if not os.path.exists(f_peak): print('Recreating grid search file') # create the grid file # NOTE: save it as an lh5 Table just as an example of writing/reading one lists = [e_rises, e_flats] #, rc_consts] prod = list(itertools.product(*lists)) # clint <3 stackoverflow df_grid = pd.DataFrame(prod, columns=['rise', 'flat']) #,'rc']) lh5_grid = {} for i, dfcol in df_grid.iteritems(): lh5_grid[dfcol.name] = lh5.Array(dfcol.values) tb_grid = lh5.Table(col_dict=lh5_grid) sto.write_object(tb_grid, grp_grid, f_peak) # filter events by onboard energy ene_raw = sto.read_object(tb_raw + '/' + epar, f_raw).nda # hist, bins, var = pgh.get_hist(ene_raw, range=(elo, ehi), dx=epb) # plt.plot(bins[1:], hist, ds='steps') # plt.show() if n_rows is not None: ene_raw = ene_raw[:n_rows] idx = np.where((ene_raw > elo) & (ene_raw < ehi)) # create a filtered table with correct waveform and attrs # TODO: move this into a function in lh5.py which takes idx as an input tb_data, wf_tb_data = lh5.Table(), lh5.Table() # read non-wf cols (lh5 Arrays) data_raw = sto.read_object(tb_raw, f_raw, n_rows=n_rows) for col in data_raw.keys(): if col == 'waveform': continue newcol = lh5.Array(data_raw[col].nda[idx], attrs=data_raw[col].attrs) tb_data.add_field(col, newcol) # handle waveform column (lh5 Table) data_wfs = sto.read_object(tb_raw + '/waveform', f_raw, n_rows=n_rows) for col in data_wfs.keys(): attrs = data_wfs[col].attrs if isinstance(data_wfs[col], lh5.ArrayOfEqualSizedArrays): # idk why i can't put the filtered array into the constructor aoesa = lh5.ArrayOfEqualSizedArrays(attrs=attrs, dims=[1, 1]) aoesa.nda = data_wfs[col].nda[idx] newcol = aoesa else: newcol = lh5.Array(data_wfs[col].nda[idx], attrs=attrs) wf_tb_data.add_field(col, newcol) tb_data.add_field('waveform', wf_tb_data) tb_data.attrs = data_raw.attrs sto.write_object(tb_data, grp_data, f_peak) else: print('Loading peak file. groups:', sto.ls(f_peak)) tb_grid = sto.read_object(grp_grid, f_peak) tb_data = sto.read_object(grp_data, f_peak) # filtered file # tb_data = sto.read_object(tb_raw, f_raw) # orig file df_grid = tb_grid.get_dataframe() # check shape of input table print('input table attributes:') for key in tb_data.keys(): obj = tb_data[key] if isinstance(obj, lh5.Table): for key2 in obj.keys(): obj2 = obj[key2] print(' ', key, key2, obj2.nda.shape, obj2.attrs) else: print(' ', key, obj.nda.shape, obj.attrs) # clear new colums if they exist new_cols = ['e_fit', 'fwhm_fit', 'rchisq', 'xF_err', 'fwhm_ovr_mean'] for col in new_cols: if col in df_grid.columns: df_grid.drop(col, axis=1, inplace=True) t_start = time.time() def run_dsp(dfrow): """ run dsp on the test file, editing the processor list alternate idea: generate a long list of processors with different names """ # adjust dsp config dictionary rise, flat = dfrow # dsp_config['processors']['wf_pz']['defaults']['db.pz.tau'] = f'{tau}*us' dsp_config['processors']['wf_trap']['args'][1] = f'{rise}*us' dsp_config['processors']['wf_trap']['args'][2] = f'{flat}*us' # pprint(dsp_config) # run dsp pc, tb_out = build_processing_chain(tb_data, dsp_config, verbosity=0) pc.execute() # analyze peak e_peak = 1460. etype = 'trapEmax' elo, ehi, epb = 4000, 4500, 3 # the peak moves around a bunch energy = tb_out[etype].nda # get histogram hE, bins, vE = pgh.get_hist(energy, range=(elo, ehi), dx=epb) xE = bins[1:] # should I center the max at 1460? # simple numerical width i_max = np.argmax(hE) h_max = hE[i_max] upr_half = xE[(xE > xE[i_max]) & (hE <= h_max / 2)][0] bot_half = xE[(xE < xE[i_max]) & (hE >= h_max / 2)][0] fwhm = upr_half - bot_half sig = fwhm / 2.355 # fit to gaussian: amp, mu, sig, bkg fit_func = pgf.gauss_bkg amp = h_max * fwhm bg0 = np.mean(hE[:20]) x0 = [amp, xE[i_max], sig, bg0] xF, xF_cov = pgf.fit_hist(fit_func, hE, bins, var=vE, guess=x0) # collect results e_fit = xF[0] xF_err = np.sqrt(np.diag(xF_cov)) e_err = xF fwhm_fit = xF[1] * 2.355 * 1460. / e_fit fwhm_err = xF_err[2] * 2.355 * 1460. / e_fit chisq = [] for i, h in enumerate(hE): model = fit_func(xE[i], *xF) diff = (model - h)**2 / model chisq.append(abs(diff)) rchisq = sum(np.array(chisq) / len(hE)) fwhm_ovr_mean = fwhm_fit / e_fit if show_movie: plt.plot(xE, hE, ds='steps', c='b', lw=2, label=f'{etype} {rise}--{flat}') # peak shape plt.plot(xE, fit_func(xE, *x0), '-', c='orange', alpha=0.5, label='init. guess') plt.plot(xE, fit_func(xE, *xF), '-r', alpha=0.8, label='peakshape fit') plt.plot(np.nan, np.nan, '-w', label=f'mu={e_fit:.1f}, fwhm={fwhm_fit:.2f}') plt.xlabel(etype, ha='right', x=1) plt.ylabel('Counts', ha='right', y=1) plt.legend(loc=2) # show a little movie plt.show(block=False) plt.pause(0.01) plt.cla() # return results return pd.Series({ 'e_fit': e_fit, 'fwhm_fit': fwhm_fit, 'rchisq': rchisq, 'fwhm_err': xF_err[0], 'fwhm_ovr_mean': fwhm_ovr_mean }) # df_grid=df_grid[:10] df_tmp = df_grid.progress_apply(run_dsp, axis=1) df_grid[new_cols] = df_tmp # print(df_grid) if show_movie: plt.close() print('elapsed:', time.time() - t_start) if write_output: df_grid.to_hdf(f_results, key=grp_grid) print(f"Wrote output file: {f_results}")
const=1, dest='writemode', help= "Update existing file with new values. Useful with the --outpar option. Mutually exclusive with --recreate and --append THIS IS NOT IMPLEMENTED YET!" ) arg('-a', '--append', action='store_const', const=1, dest='writemode', help= "Append values to existing file. Mutually exclusive with --recreate and --update THIS IS NOT IMPLEMENTED YET!" ) args = parser.parse_args() lh5_in = lh5.Store() groups = lh5_in.ls(args.file, args.group) out = args.output if out is None: out = 't2_' + args.file[args.file.rfind('/') + 1:].replace('t1_', '') for group in groups: print("Processing: " + args.file + '/' + group) #data = lh5_in.read_object(args.group, args.file, 0, args.chunk) data = lh5_in.read_object(group, args.file) wf_in = data['waveform']['values'].nda chan_in = data['channel'].nda dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit( data['waveform']['dt'].attrs['units'])
def get_resolution(): """ """ # load hit file f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5' tb_name = 'ORSIS3302DecoderForEnergy/raw' sto = lh5.Store() groups = sto.ls(f_hit) data = sto.read_object(tb_name, f_hit) df_hit = data.get_dataframe() # load parameters e_peak = 1460.8 etype = 'trapE_cal' # etype = 'energy_cal' elo, ehi, epb = 1445, 1475, 0.2 # get histogram hE, bins, vE = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb) xE = bins[1:] # simple numerical width i_max = np.argmax(hE) h_max = hE[i_max] upr_half = xE[(xE > xE[i_max]) & (hE <= h_max / 2)][0] bot_half = xE[(xE < xE[i_max]) & (hE >= h_max / 2)][0] fwhm = upr_half - bot_half sig = fwhm / 2.355 # # fit to gaussian: amp, mu, sig, bkg # amp = h_max * fwhm # bg0 = np.mean(hE[:20]) # x0 = [amp, xE[i_max], sig, bg0] # xF, xF_cov = pgf.fit_hist(pgf.gauss_bkg, hE, bins, var=vE, guess=x0) # fit_func = pgf.gauss_bkg # fit to radford peak: mu, sigma, hstep, htail, tau, bg0, amp amp = h_max * fwhm hstep = 0.001 # fraction that the step contributes htail = 0.1 tau = 10 bg0 = np.mean(hE[:20]) x0 = [xE[i_max], sig, hstep, htail, tau, bg0, amp] xF, xF_cov = pgf.fit_hist(pgf.radford_peak, hE, bins, var=vE, guess=x0) fit_func = pgf.radford_peak xF_err = np.sqrt(np.diag(xF_cov)) chisq = [] for i, h in enumerate(hE): model = fit_func(xE[i], *xF) diff = (model - h)**2 / model chisq.append(abs(diff)) # collect results (for output, should use a dict or DataFrame) e_fit = xF[0] fwhm_fit = xF[1] * 2.355 # * e_peak / e_fit print(fwhm, fwhm_fit) fwhmerr = xF_err[1] * 2.355 * e_peak / e_fit rchisq = sum(np.array(chisq) / len(hE)) # plotting plt.plot(xE, hE, ds='steps', c='b', lw=2, label=etype) # peak shape plt.plot(xE, fit_func(xE, *x0), '-', c='orange', alpha=0.5, label='init. guess') plt.plot(xE, fit_func(xE, *xF), '-r', alpha=0.8, label='peakshape fit') plt.plot(np.nan, np.nan, '-w', label=f'mu={e_fit:.1f}, fwhm={fwhm_fit:.2f}') plt.xlabel(etype, ha='right', x=1) plt.ylabel('Counts', ha='right', y=1) plt.legend(loc=2) plt.tight_layout() # plt.show() plt.savefig(f'./plots/resolution_1460_{etype}.pdf') plt.cla()
def pole_zero(dg): """ """ # load hit data lh5_dir = os.path.expandvars(dg.config['lh5_dir']) hit_list = lh5_dir + dg.file_keys['hit_path'] + '/' + dg.file_keys[ 'hit_file'] df_hit = lh5.load_dfs(hit_list, ['trapEmax'], 'ORSIS3302DecoderForEnergy/hit') df_hit.reset_index(inplace=True) rt_min = dg.file_keys['runtime'].sum() # print(f'runtime: {rt_min:.2f} min') # load waveforms etype = 'trapEmax_cal' nwfs = 20 elo, ehi = 1455, 1465 # select waveforms idx = df_hit[etype].loc[(df_hit[etype] >= elo) & (df_hit[etype] <= ehi)].index[:nwfs] raw_store = lh5.Store() tb_name = 'ORSIS3302DecoderForEnergy/raw' raw_list = lh5_dir + dg.file_keys['raw_path'] + '/' + dg.file_keys[ 'raw_file'] f_raw = raw_list.values[0] # fixme, only works for one file rn data_raw = raw_store.read_object(tb_name, f_raw, start_row=0, n_rows=idx[-1] + 1) wfs_all = data_raw['waveform']['values'].nda wfs = wfs_all[idx.values, :] df_wfs = pd.DataFrame(wfs) # print(df_wfs) # simple test function to compute pole-zero constant for a few wfs. # the final one should become a dsp processor clock = 1e8 # 100 MHz istart = 5000 iwinlo, iwinhi, iwid = 500, 2500, 20 # two-point slope # ts = np.arange(istart, df_wfs.shape[1]-1, 1) / 1e3 # usec ts = np.arange(0, df_wfs.shape[1] - 1 - istart, 1) / 1e3 # usec def get_rc(row): # two-point method wf = row[istart:-1].values wflog = np.log(wf) win1 = np.mean(np.log(row[istart + iwinlo:istart + iwinlo + iwid])) win2 = np.mean(np.log(row[istart + iwinhi:istart + iwinhi + iwid])) slope = (win2 - win1) / (ts[iwinhi] - ts[iwinlo]) tau = 1 / slope # # diagnostic plot: check against expo method # guess_tau = 60 # a = wf.max() # expdec = lambda x : a * np.exp(-x / guess_tau) # logdec = lambda x : np.log(a * np.exp(-x / guess_tau)) # slopeway = lambda x: wflog[0] + x / tau # plt.plot(ts, wflog, '-r', lw=1) # plt.plot(ts, logdec(ts), '-b', lw=1) # plt.plot(ts, slopeway(ts), '-k', lw=1) # plt.show() # exit() return tau # return tau res = df_wfs.apply(get_rc, axis=1) tau_avg, tau_std = res.mean(), res.std() print(f'average RC decay constant: {tau_avg:.2f} pm {tau_std:.2f}')
) arg('-B', '--block', default=16, type=int, help="Number of waveforms to process simultaneously. Default is 8") arg('-C', '--chunk', default=3200, type=int, help="Number of waveforms to read from disk at a time. Default is 256.") args = parser.parse_args() lh5_st = lh5.Store() chans = lh5_st.ls(args.file, args.channel) rc_range = tuple([round(float(tc), 1) for tc in args.range.split('-')]) if len(rc_range) != 2: print("Range must have exactly two values") n_bins = int((rc_range[1] - rc_range[0]) / 0.1) rc_const_lib = {} np.seterr(all='ignore') for chan_name in chans: group = chan_name + '/raw' print("Processing: " + args.file + '/' + group)
def raw_to_dsp(ds, overwrite=False, nevt=None, test=False, verbose=2, block=8, group='daqdata'): """ Run raw_to_dsp on a set of runs. [raw file] ---> [dsp_run{}.lh5] (digital signal processing results) """ for run in ds.runs: raw_file = ds.paths[run]["raw_path"] dsp_file = ds.paths[run]["dsp_path"] if dsp_file is not None and overwrite is False: continue if dsp_file is None: # declare new file name dsp_file = raw_file.replace('raw', 'dsp') if test: print("test mode (dry run), processing raw file:", raw_file) continue # new LH5 version lh5_in = lh5.Store() data = lh5_in.read_object("/ORSIS3302DecoderForEnergy", raw_file) wf_in = data['waveform']['values'].nda dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit( data['waveform']['dt'].attrs['units']) # Parameters for DCR calculation dcr_trap_int = 200 dcr_trap_flat = 1000 dcr_trap_startSample = 1200 # Set up processing chain proc = ProcessingChain(block_width=block, clock_unit=dt, verbosity=verbose) proc.add_input_buffer("wf", wf_in, dtype='float32') proc.add_processor(mean_stdev, "wf[0:1000]", "bl", "bl_sig") proc.add_processor(np.subtract, "wf", "bl", "wf_blsub") proc.add_processor(pole_zero, "wf_blsub", 70 * us, "wf_pz") proc.add_processor(asymTrapFilter, "wf_pz", 10 * us, 5 * us, 10 * us, "wf_atrap") proc.add_processor(np.amax, "wf_atrap", 1, "atrapE", signature='(n),()->()', types=['fi->f']) # proc.add_processor(np.divide, "atrapmax", 10*us, "atrapE") proc.add_processor(trap_norm, "wf_pz", 10 * us, 5 * us, "wf_trap") proc.add_processor(np.amax, "wf_trap", 1, "trapE", signature='(n),()->()', types=['fi->f']) proc.add_processor(avg_current, "wf_pz", 10, "curr") proc.add_processor(np.amax, "curr", 1, "A_10", signature='(n),()->()', types=['fi->f']) proc.add_processor(np.divide, "A_10", "trapE", "AoE") proc.add_processor(trap_pickoff, "wf_pz", dcr_trap_int, dcr_trap_flat, dcr_trap_startSample, "dcr") # Set up the LH5 output lh5_out = lh5.Table(size=proc._buffer_len) lh5_out.add_field( "trapE", lh5.Array(proc.get_output_buffer("trapE"), attrs={"units": "ADC"})) lh5_out.add_field( "bl", lh5.Array(proc.get_output_buffer("bl"), attrs={"units": "ADC"})) lh5_out.add_field( "bl_sig", lh5.Array(proc.get_output_buffer("bl_sig"), attrs={"units": "ADC"})) lh5_out.add_field( "A", lh5.Array(proc.get_output_buffer("A_10"), attrs={"units": "ADC"})) lh5_out.add_field( "AoE", lh5.Array(proc.get_output_buffer("AoE"), attrs={"units": "ADC"})) lh5_out.add_field( "dcr", lh5.Array(proc.get_output_buffer("dcr"), attrs={"units": "ADC"})) print("Processing:\n", proc) proc.execute() print("Writing to: ", dsp_file) f_lh5.write_object(lh5_out, "data", dsp_file)
def dsp_to_hit_cage(f_dsp, f_hit, dg, n_max=None, verbose=False, t_start=None): """ non-general placeholder for creating a pygama 'hit' file. uses pandas. for every file, apply: - energy calibration (peakfit results) - timestamp correction for a more general dsp_to_hit, maybe each function could be given in terms of an 'apply' on a dsp dataframe ... TODO: create entry config['rawe'] with list of energy pars to calibrate, as in energy_cal.py """ rawe = ['trapEmax'] # create initial 'hit' DataFrame from dsp data hit_store = lh5.Store() data = hit_store.read_object(dg.config['input_table'], f_dsp) df_hit = data.get_dataframe() # 1. get energy calibration for this run from peakfit cal_db = db.TinyDB(storage=MemoryStorage) with open(dg.config['ecaldb']) as f: raw_db = json.load(f) cal_db.storage.write(raw_db) runs = dg.file_keys.run.unique() if len(runs) > 1: print("sorry, I can't do combined runs yet") exit() run = runs[0] for etype in rawe: tb = cal_db.table(f'peakfit_{etype}').all() df_cal = pd.DataFrame(tb) df_cal['run'] = df_cal['run'].astype(int) df_run = df_cal.loc[df_cal.run == run] cal_pars = df_run.iloc[0][['cal0', 'cal1', 'cal2']] pol = np.poly1d(cal_pars) # handy numpy polynomial object df_hit[f'{etype}_cal'] = pol(df_hit[f'{etype}']) # 2. compute timestamp rollover correction (specific to struck 3302) clock = 100e6 # 100 MHz UINT_MAX = 4294967295 # (0xffffffff) t_max = UINT_MAX / clock ts = df_hit['timestamp'].values / clock tdiff = np.diff(ts) tdiff = np.insert(tdiff, 0, 0) iwrap = np.where(tdiff < 0) iloop = np.append(iwrap[0], len(ts)) ts_new, t_roll = [], 0 for i, idx in enumerate(iloop): ilo = 0 if i == 0 else iwrap[0][i - 1] ihi = idx ts_block = ts[ilo:ihi] t_last = ts[ilo - 1] t_diff = t_max - t_last ts_new.append(ts_block + t_roll) t_roll += t_last + t_diff df_hit['ts_sec'] = np.concatenate(ts_new) # 3. compute global timestamp if t_start is not None: df_hit['ts_glo'] = df_hit['ts_sec'] + t_start # write to LH5 file if os.path.exists(f_hit): os.remove(f_hit) sto = lh5.Store() tb_name = dg.config['input_table'].replace('dsp', 'hit') tb_lh5 = lh5.Table(size=len(df_hit)) for col in df_hit.columns: tb_lh5.add_field(col, lh5.Array(df_hit[col].values, attrs={'units': ''})) print(col) print(f'Writing table: {tb_name} in file:\n {f_hit}') sto.write_object(tb_lh5, tb_name, f_hit)
def show_wfs(): """ show low-e waveforms in different enery regions """ f_raw = '/Users/wisecg/Data/OPPI/raw/oppi_run0_cyc2027_raw.lh5' f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5' # use the hit file to select events tb_name = 'ORSIS3302DecoderForEnergy/raw' hit_store = lh5.Store() data = hit_store.read_object(tb_name, f_hit) df_hit = data.get_dataframe() # settings nwfs = 20 elo, ehi, epb = 0, 100, 0.2 # etype = 'energy_cal' # noise stops @ 18 keV # noise_lo, noise_hi, phys_lo, phys_hi = 10, 15, 25, 30 etype = 'trapE_cal' # noise stops @ 35 keV noise_lo, noise_hi, phys_lo, phys_hi = 25, 30, 40, 45 # # diagnostic plot # hE, bins, vE = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb) # xE = bins[1:] # plt.plot(xE, hE, c='b', ds='steps') # plt.show() # exit() # select noise and phys events idx_noise = df_hit[etype].loc[(df_hit[etype] > noise_lo) & (df_hit[etype] < noise_hi)].index[:nwfs] idx_phys = df_hit[etype].loc[(df_hit[etype] > phys_lo) & (df_hit[etype] < phys_hi)].index[:nwfs] # print(df_hit.loc[idx_noise]) # print(df_hit.loc[idx_phys]) # get phys waveforms, normalized by max value i_max = max(idx_noise[-1], idx_phys[-1]) raw_store = lh5.Store() data_raw = raw_store.read_object(tb_name, f_raw, start_row=0, n_rows=i_max + 1) wfs = data_raw['waveform']['values'].nda wfs_noise = wfs[idx_noise.values, :] wfs_phys = wfs[idx_phys.values, :] ts = np.arange(0, wfs_noise.shape[1], 1) # noise wfs for iwf in range(wfs_noise.shape[0]): plt.plot(ts, wfs_noise[iwf, :], lw=1) # # phys wfs # for iwf in range(wfs_phys.shape[0]): # plt.plot(ts, wfs_phys[iwf,:], lw=1) plt.xlabel('time (clock ticks)', ha='right', x=1) plt.ylabel('ADC', ha='right', y=1) # plt.show() plt.savefig('./plots/noise_wfs.png', dpi=300) plt.cla()
def get_runtimes(dg): """ Requires DSP files. compute runtime (# minutes in run) and stopTime (unix timestamp) using the timestamps in the dsp file. """ write_output = True df_keys = pd.read_hdf(dg.config['fileDB']) # clear new colums if they exist new_cols = ['stopTime', 'runtime'] for col in new_cols: if col in df_keys.columns: df_keys.drop(col, axis=1, inplace=True) sto = lh5.Store() def get_runtime(df_row): # load timestamps from dsp file f_dsp = dg.lh5_dir + df_row['dsp_path'] + '/' + df_row['dsp_file'] data = sto.read_object('ORSIS3302DecoderForEnergy/dsp', f_dsp) # correct for timestamp rollover clock = 100e6 # 100 MHz UINT_MAX = 4294967295 # (0xffffffff) t_max = UINT_MAX / clock # ts = data['timestamp'].nda.astype(np.int64) # must be signed for np.diff ts = data['timestamp'].nda / clock # converts to float tdiff = np.diff(ts) tdiff = np.insert(tdiff, 0 , 0) iwrap = np.where(tdiff < 0) iloop = np.append(iwrap[0], len(ts)) ts_new, t_roll = [], 0 for i, idx in enumerate(iloop): ilo = 0 if i==0 else iwrap[0][i-1] ihi = idx ts_block = ts[ilo:ihi] t_last = ts[ilo-1] t_diff = t_max - t_last ts_new.append(ts_block + t_roll) t_roll += t_last + t_diff ts_corr = np.concatenate(ts_new) # calculate runtime and unix stopTime rt = ts_corr[-1] / 60 # minutes st = int(np.ceil(df_row['startTime'] + rt * 60)) return pd.Series({'stopTime':st, 'runtime':rt}) df_tmp = df_keys.progress_apply(get_runtime, axis=1) df_keys[new_cols] = df_tmp print(df_keys) if write_output: df_keys.to_hdf(dg.config['fileDB'], key='file_keys') print(f"Wrote output file: {dg.config['fileDB']}")
def data_cleaning(): """ using parameters in the hit file, plot 1d and 2d spectra to find cut values. columns in file: ['trapE', 'bl', 'bl_sig', 'A_10', 'AoE', 'packet_id', 'ievt', 'energy', 'energy_first', 'timestamp', 'crate', 'card', 'channel', 'energy_cal', 'trapE_cal'] note, 'energy_first' from first value of energy gate. """ i_plot = 3 # run all plots after this number f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5' tb_name = 'ORSIS3302DecoderForEnergy/raw' hit_store = lh5.Store() data = hit_store.read_object(tb_name, f_hit) df_hit = data.get_dataframe() # get info about df -- 'describe' is very convenient dsc = df_hit[['bl', 'bl_sig', 'A_10', 'energy_first', 'timestamp']].describe() # print(dsc) # print(dsc.loc['min','bl']) # correct energy_first (inplace) to allow negative values df_hit['energy_first'] = df_hit['energy_first'].astype(np.int64) efirst = df_hit['energy_first'].values idx = np.where(efirst > 4e9) eshift = efirst[idx] - 4294967295 efirst[idx] = eshift # print(df_hit[['energy','energy_first','bl']]) if i_plot <= 0: # bl vs energy elo, ehi, epb = 0, 250, 1 blo, bhi, bpb = 54700, 61400, 100 nbx = int((ehi - elo) / epb) nby = int((bhi - blo) / bpb) h = plt.hist2d(df_hit['trapE_cal'], df_hit['bl'], bins=[nbx, nby], range=[[elo, ehi], [blo, bhi]], cmap='jet') cb = plt.colorbar(h[3], ax=plt.gca()) plt.xlabel('trapE_cal', ha='right', x=1) plt.ylabel('bl', ha='right', y=1) plt.tight_layout() # plt.show() plt.savefig('./plots/bl_vs_e.png', dpi=300) cb.remove() plt.cla() # make a formal baseline cut from 1d histogram hE, bins, vE = pgh.get_hist(df_hit['bl'], range=(blo, bhi), dx=bpb) xE = bins[1:] plt.semilogy(xE, hE, c='b', ds='steps') bl_cut_lo, bl_cut_hi = 57700, 58500 plt.axvline(bl_cut_lo, c='r', lw=1) plt.axvline(bl_cut_hi, c='r', lw=1) plt.xlabel('bl', ha='right', x=1) plt.ylabel('counts', ha='right', y=1) # plt.show() plt.savefig('./plots/bl_cut.pdf') plt.cla() if i_plot <= 1: # energy_first vs. E flo, fhi, fpb = -565534, 70000, 1000 elo, ehi, epb = 0, 250, 1 nbx = int((ehi - elo) / epb) nby = int((fhi - flo) / fpb) h = plt.hist2d(df_hit['trapE_cal'], df_hit['energy_first'], bins=[nbx, nby], range=[[elo, ehi], [flo, fhi]], cmap='jet', norm=LogNorm()) cb = plt.colorbar(h[3], ax=plt.gca()) plt.xlabel('trapE_cal', ha='right', x=1) plt.ylabel('energy_first', ha='right', y=1) plt.tight_layout() # plt.show() plt.savefig('./plots/efirst_vs_e.png', dpi=300) cb.remove() plt.cla() # make a formal baseline cut from 1d histogram flo, fhi, fpb = -20000, 20000, 100 hE, xE, vE = pgh.get_hist(df_hit['energy_first'], range=(flo, fhi), dx=fpb) xE = xE[1:] plt.semilogy(xE, hE, c='b', ds='steps') ef_cut_lo, ef_cut_hi = -5000, 4000 plt.axvline(ef_cut_lo, c='r', lw=1) plt.axvline(ef_cut_hi, c='r', lw=1) plt.xlabel('energy_first', ha='right', x=1) plt.ylabel('counts', ha='right', y=1) # plt.show() plt.savefig('./plots/efirst_cut.pdf') plt.cla() if i_plot <= 3: # trapE_cal - energy_cal vs trapE_cal # use baseline cut df_cut = df_hit.query('bl > 57700 and bl < 58500').copy() # add new diffE column df_cut['diffE'] = df_cut['trapE_cal'] - df_cut['energy_cal'] elo, ehi, epb = 0, 3000, 1 dlo, dhi, dpb = -10, 10, 0.1 nbx = int((ehi - elo) / epb) nby = int((dhi - dlo) / dpb) h = plt.hist2d(df_cut['trapE_cal'], df_cut['diffE'], bins=[nbx, nby], range=[[elo, ehi], [dlo, dhi]], cmap='jet', norm=LogNorm()) plt.xlabel('trapE_cal', ha='right', x=1) plt.ylabel('diffE (trap-onbd)', ha='right', y=1) plt.tight_layout() # plt.show() plt.savefig('./plots/diffE.png', dpi=300) plt.cla() if i_plot <= 4: # A_10/trapE_cal vs trapE_cal (A/E vs E) # i doubt we want to introduce a pulse shape cut at this point, # since i'm tuning on bkg data and we don't know a priori what (if any) # features the Kr waveforms will have. also, the efficiency as a # function of energy would have to be determined, which is hard. # so this is just for fun. # use baseline cut df_cut = df_hit.query('bl > 57700 and bl < 58500').copy() # add new A/E column df_cut['aoe'] = df_cut['A_10'] / df_cut['trapE_cal'] # alo, ahi, apb = -1300, 350, 1 # elo, ehi, epb = 0, 250, 1 alo, ahi, apb = -0.5, 5, 0.05 elo, ehi, epb = 0, 50, 0.2 nbx = int((ehi - elo) / epb) nby = int((ahi - alo) / apb) h = plt.hist2d(df_cut['trapE_cal'], df_cut['aoe'], bins=[nbx, nby], range=[[elo, ehi], [alo, ahi]], cmap='jet', norm=LogNorm()) plt.xlabel('trapE_cal', ha='right', x=1) plt.ylabel('A/E', ha='right', y=1) plt.tight_layout() # plt.show() plt.savefig('./plots/aoe_vs_e_lowe.png', dpi=300) plt.cla() if i_plot <= 5: # show effect of cuts on energy spectrum # baseline cut and efirst cut are very similar df_cut = df_hit.query('bl > 57700 and bl < 58500') # df_cut = df_hit.query('energy_first > -5000 and energy_first < 4000') etype = 'trapE_cal' elo, ehi, epb = 0, 250, 0.5 # no cuts h1, x1, v1 = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb) x1 = x1[1:] plt.plot(x1, h1, c='k', lw=1, ds='steps', label='raw') # baseline cut h2, x2, v2 = pgh.get_hist(df_cut[etype], range=(elo, ehi), dx=epb) plt.plot(x1, h2, c='b', lw=1, ds='steps', label='bl cut') plt.xlabel(etype, ha='right', x=1) plt.ylabel('counts', ha='right', y=1) plt.legend() # plt.show() plt.savefig('./plots/cut_spectrum.pdf') plt.cla()
def process_ds(f_grid, f_opt, f_tier1, d_out, efilter): """ process the windowed raw file 'f_tier1' and create the DSP file 'f_opt' """ print("Grid file:",f_grid) df_grid = pd.read_hdf(f_grid) if os.path.exists(f_opt): os.remove(f_opt) if 'corr' in efilter: bfilter = efilter.split('corr')[0] try: df_res = pd.read_hdf(f'{d_out}/{bfilter}_results.h5',key='results') print("Extraction of best parameters for", bfilter) except: print(bfilter,"not optimized") return # open raw file lh5_in = lh5.Store() #groups = lh5_in.ls(f_tier1, '*/raw') f = h5py.File(f_tier1,'r') #print("File info: ",f.keys()) t_start = time.time() #for group in groups: for idx, ged in enumerate(f.keys()): if idx == 4: diff = time.time() - t_start tot = diff/5 * len(df_grid) / 60 tot -= diff / 60 print(f"Estimated remaining time: {tot:.2f} mins") print("Detector:",ged) #data = lh5_in.read_object(group, f_tier1) data = f[ged]['raw'] #wf_in = data['waveform']['values'].nda #dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units']) wf_in = data['waveform']['values'][()] dt = data['waveform']['dt'][0] * unit_parser.parse_unit(data['waveform']['dt'].attrs['units']) bl_in = data['baseline'][()] #flashcam baseline values # Set up DSP processing chain -- very minimal block = 8 #waveforms to process simultaneously proc = ProcessingChain(block_width=block, clock_unit=dt, verbosity=False) proc.add_input_buffer("wf", wf_in, dtype='float32') proc.add_input_buffer("bl", bl_in, dtype='float32') wsize = wf_in.shape[1] dt0 = data['waveform']['dt'][0]*0.001 #proc.add_processor(mean_stdev, "wf[0:1000]", "bl", "bl_sig") proc.add_processor(np.subtract, "wf", "bl", "wf_blsub") for i, row in df_grid.iterrows(): if 'corr' in efilter: ct_const = row if 'trapE' in efilter: if 'corr' in efilter: rise, flat, rc = float(df_res['rise'][idx]), float(df_res['flat'][idx]), float(df_res['rc'][idx]) else: rise, flat, rc = row proc.add_processor(pole_zero, "wf_blsub", rc*us, "wf_pz") proc.add_processor(trap_norm, "wf_pz", rise*us, flat*us, f"wf_trap_{i}") proc.add_processor(asymTrapFilter, "wf_pz", 0.05*us, 4*us, 4*us, "wf_atrap") proc.add_processor(time_point_thresh, "wf_pz", 0, "tp_0") proc.add_processor(np.amax, f"wf_trap_{i}", 1, f"trapE_{i}", signature='(n),()->()', types=['fi->f']) proc.add_processor(fixed_time_pickoff, f"wf_trap_{i}", f"tp_0+({rise*us}+{flat*us})", f"trapEftp_{i}") if 'zacE' in efilter: if 'corr' in efilter: sigma, flat, decay = float(df_res['sigma'][idx]), float(df_res['flat'][idx]), float(df_res['decay'][idx]) else: sigma, flat, decay = row proc.add_processor(zac_filter(wsize, sigma/dt0, flat/dt0, decay/dt0),"wf", f"wf_zac_{i}(101, f)") proc.add_processor(np.amax, f"wf_zac_{i}", 1, f"zacE_{i}", signature='(n),()->()', types=['fi->f']) if 'cuspE' in efilter: if 'corr' in efilter: sigma, flat, decay = float(df_res['sigma'][idx]), float(df_res['flat'][idx]), float(df_res['decay'][idx]) else: sigma, flat, decay = row proc.add_processor(cusp_filter(wsize, sigma/dt0, flat/dt0, decay/dt0),"wf_blsub", f"wf_cusp_{i}(101, f)") proc.add_processor(np.amax, f"wf_cusp_{i}", 1, f"cuspE_{i}", signature='(n),()->()', types=['fi->f']) if 'corr' in efilter: proc.add_processor(trap_pickoff, "wf_pz", 1.5*us, 0, "tp_0", "ct_corr") #proc.add_processor(trap_pickoff, "wf_pz", rise*us, flat*us, "tp_0", "ct_corr") proc.add_processor(np.multiply, ct_const, "ct_corr", f"ct_corr_cal_{i}") proc.add_processor(np.add, f"ct_corr_cal_{i}", f"{bfilter}_{i}", f"{efilter}_{i}") # Set up the LH5 output lh5_out = lh5.Table(size=proc._buffer_len) for i, row in df_grid.iterrows(): lh5_out.add_field(f"{efilter}_{i}", lh5.Array(proc.get_output_buffer(f"{efilter}_{i}"), attrs={"units":"ADC"})) print("Processing:\n",proc) proc.execute() #groupname = group[:group.rfind('/')+1]+"data" #groupname = df_key+"/"+group+"/data" groupname = ged+"/data" print("Writing to: " + f_opt + "/" + groupname) lh5_in.write_object(lh5_out, groupname, f_opt) print("") #list the datasets of the output file data_opt = lh5_in.ls(f_opt) #data_opt_0 = lh5_in.ls(f_opt,'opt_0/*') data_opt_0 = lh5_in.ls(f_opt,'g024/data/*') diff = time.time() - t_start print(f"Time to process: {diff:.2f} s")
def main(): """ Clone of pygama/apps/raw_to_dsp.py. Intended for quick prototyping of dsp_to_hit processors. Heavy lifting with many input/output files should be moved to a more specialized processing app, with raw_to_dsp and dsp_to_hit both moved to functions in pygama.io. """ parser = argparse.ArgumentParser( description= """Process a 'pygama DSP LH5' file and produce a 'pygama HIT LH5' file.""" ) parser.add_argument('file', help="Input (dsp) LH5 file.") parser.add_argument( '-o', '--output', help= "Name of output file. By default, output to ./t2_[input file name].") parser.add_argument( '-g', '--group', default='', help= "Name of group in LH5 file. By default process all base groups. Supports wildcards." ) args = parser.parse_args() # import h5py # f = h5py.File('/Users/wisecg/Data/LPGTA/raw/geds/cal/LPGTA_r0018_20200302T184433Z_cal_geds_raw.lh5') # # print(f['g024/raw'].keys()) # # ['baseline', 'channel', 'energy', 'ievt', 'numtraces', 'packet_id', \ # # 'timestamp', 'tracelist', 'waveform', 'wf_max', 'wf_std'] # def print_attrs(name, obj): # print(name) # for key, val in obj.attrs.items(): # print(" attr: %s val: %s" % (key, val)) # # f = h5py.File(f,'r') # f.visititems(print_attrs) # exit() lh5_in = lh5.Store() groups = lh5_in.ls(args.file, args.group) out = args.output if args.output is not None else './d2h_test.lh5' print('output file:', out) for group in groups[:1]: print(group) print("Processing: " + args.file + '/' + group) #data = lh5_in.read_object(args.group, args.file, 0, args.chunk) data = lh5_in.read_object(group + '/raw', args.file) # print(type(data))#, data.keys()) # print(data.keys()) wf_in = data['waveform']['values'].nda dt = data['waveform']['dt'].nda[0] * unit_parser.parse_unit( data['waveform']['dt'].attrs['units']) # print(wf_in.shape) ene_in = data['energy'].nda # print(ene_in.shape) # print(ene_in.dtype) # exit() n_block = 8 verbose = 1 proc = ProcessingChain(block_width=n_block, clock_unit=dt, verbosity=verbose) # proc.add_input_buffer("wf", wf_in, dtype='float32') proc.add_input_buffer("ene_in", ene_in, dtype='uint16') proc.add_processor(energy_cal, "ene_in")
def get_superpulses(dfp, dg, f_super): """ calculate average waveforms for each set of pulser data. save an output file with the superpulses for further analysis. """ # find this with the show_spectra function above # ecal = 1460.8 / 2.005e6 # TODO: find the const for oct 2020 ecal = 1460.8 / 2.005e6 # works for pulser dataset 2 (dec 2020) # more settings show_plots = True # default True write_output = True nwfs = 1000 # limit number to go fast. 1000 is enough for a good measurement tp_align = 0.5 # pct timepoint to align wfs at e_window = 10 # plot (in keV) this window around each pulser peak n_pre, n_post = 50, 100 # num samples before/after tp_align bl_thresh = 10 # allowable baseline ADC deviation dsp_name = 'ORSIS3302DecoderForEnergy/dsp' raw_name = 'ORSIS3302DecoderForEnergy/raw/waveform' sto = lh5.Store() t_start = time.time() def analyze_pulser_run(df_row): """ loop over each row of dfp and save the superpulse """ epk, rt, vp, cyc = df_row[['E_keV', 'runtime', 'V_pulser', 'cycle']] rt *= 60 # sec if epk == 0: return [] # skip the bkg run # load pulser energies f_dsp = dg.lh5_dir + '/' + df_row.dsp_path + '/' + df_row.dsp_file pdata = lh5.load_nda([f_dsp], ['energy'], dsp_name)['energy'] * ecal # auto-narrow the window around the max pulser peak in two steps elo, ehi, epb = epk - 50, epk + 50, 0.5 pdata_all = pdata[(pdata > elo) & (pdata < ehi)] hp, bp, _ = pgh.get_hist(pdata_all, range=(elo, ehi), dx=epb) pctr = bp[np.argmax(hp)] plo, phi, ppb = pctr - e_window, pctr + e_window, 0.1 pdata_pk = pdata[(pdata > plo) & (pdata < phi)] hp, bp, _ = pgh.get_hist(pdata_pk, range=(plo, phi), dx=ppb) hp_rt = np.divide(hp, rt) hp_var = np.array([np.sqrt(h / (rt)) for h in hp]) # fit a gaussian to get 1 sigma e-values ibin_bkg = 50 bkg0 = np.mean(hp_rt[:ibin_bkg]) b, h = bp[1:], hp_rt imax = np.argmax(h) upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h) / 2))][0] bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h) / 2))][-1] fwhm = upr_half - bot_half sig0 = fwhm / 2.355 amp0 = np.amax(hp_rt) * fwhm p_init = [amp0, bp[imax], sig0, bkg0] p_fit, p_cov = pgf.fit_hist(pgf.gauss_bkg, hp_rt, bp, var=hp_var, guess=p_init) amp, mu, sigma, bkg = p_fit # select events within 1 sigma of the maximum # and pull the waveforms from the raw file to make a superpulse. idx = np.where((pdata >= mu - sigma) & (pdata <= mu + sigma)) print( f'Pulser at {epk} keV, {len(idx[0])} events. Limiting to {nwfs}.') if len(idx[0]) > nwfs: idx = idx[0][:nwfs] # grab the 2d numpy array of pulser wfs n_rows = idx[-1] + 1 # read up to this event and stop f_raw = dg.lh5_dir + '/' + df_row.raw_path + '/' + df_row.raw_file tb_wfs, n_wfs = sto.read_object(raw_name, f_raw, n_rows=n_rows) pwfs = tb_wfs['values'].nda[idx, :] # print(idx, len(idx), pwfs.shape, '\n', pwfs) # data cleaning step: remove events with outlier baselines bl_means = pwfs[:, :500].mean(axis=1) bl_mode = mode(bl_means.astype(int))[0][0] bl_ctr = np.subtract(bl_means, bl_mode) idx_dc = np.where(np.abs(bl_ctr) < bl_thresh) pwfs = pwfs[idx_dc[0], :] bl_means = bl_means[idx_dc] # print(pwfs.shape, bl_means.shape) # baseline subtract (trp when leading (not trailing) dim is the same) wfs = (pwfs.transpose() - bl_means).transpose() # time-align all wfs at their 50% timepoint (tricky!). # adapted from pygama/sandbox/old_dsp/[calculators,transforms].py # an alternate approach would be to use ProcessingChain here wf_maxes = np.amax(wfs, axis=1) timepoints = np.argmax(wfs >= wf_maxes[:, None] * tp_align, axis=1) wf_idxs = np.zeros([wfs.shape[0], n_pre + n_post], dtype=int) row_idxs = np.zeros_like(wf_idxs) for i, tp in enumerate(timepoints): wf_idxs[i, :] = np.arange(tp - n_pre, tp + n_post) row_idxs[i, :] = i wfs = wfs[row_idxs, wf_idxs] # take the average to get the superpulse superpulse = np.mean(wfs, axis=0) # normalize all wfs to the superpulse maximum wfmax, tmax = np.amax(superpulse), np.argmax(superpulse) superpulse = np.divide(superpulse, wfmax) wfs = np.divide(wfs, wfmax) # -- plot results -- if show_plots: fig, (p0, p1) = plt.subplots(2, figsize=(7, 8)) # plot fit result (top), and waveforms + superpulse (bottom) xfit = np.arange(plo, phi, ppb * 0.1) p0.plot(xfit, pgf.gauss_bkg(xfit, *p_init), '-', c='orange', label='init') p0.plot(xfit, pgf.gauss_bkg(xfit, *p_fit), '-', c='red', label='fit') # plot 1 sigma window p0.axvspan(mu - sigma, mu + sigma, color='m', alpha=0.2, label='1 sigma') # plot data p0.plot(bp[1:], hp_rt, ds='steps', c='k', lw=1, label=f'{vp:.2f} V') p0.set_xlabel(f'onboard energy (keV, c={ecal:.2e})', ha='right', x=1) p0.set_ylabel('cts / s', ha='right', y=1) p0.legend(fontsize=10) # plot individ. wfs ts = np.arange(0, len(wfs[0, :])) for iwf in range(wfs.shape[0]): p1.plot(ts, wfs[iwf, :], '-k', lw=2, alpha=0.5) p1.plot(np.nan, np.nan, '-k', label=f'wfs, {epk:.0f} keV') # plot superpulse p1.plot(ts, superpulse, '-r', lw=2, label=f'superpulse, {vp:.2f} V') p1.set_xlabel('time (10 ns)', ha='right', x=1) p1.set_ylabel('amplitude', ha='right', y=1) p1.legend(fontsize=10) # plt.show() plt.savefig(f'./plots/superpulse_cyc{cyc}.png', dpi=150) plt.cla() # save the superpulse to our output file return superpulse dfp['superpulse'] = dfp.apply(analyze_pulser_run, axis=1) # drop the duplicated 'run' row before saving dfp = dfp.loc[:, ~dfp.columns.duplicated()] # print(dfp.columns) print(dfp) if write_output: print('Saving output file: ', f_super) dfp.to_hdf(f_super, key='superpulses') t_elap = (time.time() - t_start) / 60 print(f'Done. Elapsed: {t_elap:.2f} min.')