def runtime_cycle(df_row): # load raw file path (with {these} in it) f_raw = f'{dg.lh5_dir}/{df_row.raw_path}/{df_row.raw_file}' f_raw = f_raw.format_map({'sysn': 'geds'}) # always look for Ge f_key = df_row.raw_file.format_map({'sysn': 'geds'}) if not os.path.exists(f_raw): # print(f'no Ge data: {f_key}') return pd.Series({'runtime': 0, 'rt_std': 0}) # for PGT, compare the first three channels (for redundancy) rts = [] ge_groups = sto.ls(f_raw) for ge in ge_groups[:3]: ts = lh5.load_nda([f_raw], ['timestamp'], ge + '/raw/')['timestamp'] rts.append(ts[-1]) # take largest value & compute uncertainty runtime = max(rts) / 60 rt_std = np.std(np.array([rts])) # print(f_key, runtime, rt_std) return pd.Series({'runtime': runtime, 'rt_std': rt_std})
def show_cal_spectrum(dg): """ apply calibration to dsp file """ # get file list and load energy data (numpy array) lh5_dir = os.path.expandvars(dg.config['lh5_dir']) dsp_list = lh5_dir + dg.file_keys['dsp_path'] + '/' + dg.file_keys[ 'dsp_file'] edata = lh5.load_nda(dsp_list, ['trapEmax'], 'ORSIS3302DecoderForEnergy/dsp') rt_min = dg.file_keys['runtime'].sum() u_start = dg.file_keys.iloc[0]['startTime'] t_start = pd.to_datetime(u_start, unit='s') # str print('Found energy data:', [(et, len(ev)) for et, ev in edata.items()]) print(f'Runtime (min): {rt_min:.2f}') # load calibration from peakfit cal_db = db.TinyDB(storage=MemoryStorage) with open('ecalDB.json') as f: raw_db = json.load(f) cal_db.storage.write(raw_db) runs = dg.file_keys.run.unique() if len(runs) > 1: print("sorry, I can't do combined runs yet") exit() run = runs[0] tb = cal_db.table("peakfit_trapEmax").all() df_cal = pd.DataFrame(tb) df_cal['run'] = df_cal['run'].astype(int) df_run = df_cal.loc[df_cal.run == run] cal_pars = df_run.iloc[0][['cal0', 'cal1', 'cal2']] # compute calibrated energy pol = np.poly1d(cal_pars) # handy numpy polynomial object cal_data = pol(edata['trapEmax']) elo, ehi, epb, etype = 0, 3000, 1, 'trapEmax_cal' # gamma region elo, ehi, epb, etype = 2500, 8000, 10, 'trapEmax_cal' # overflow region # elo, ehi, epb, etype = 0, 250, 1, 'trapEmax_cal' # low-e region hist, bins, _ = pgh.get_hist(cal_data, range=(elo, ehi), dx=epb) # normalize by runtime hist_rt = np.divide(hist, rt_min * 60) plt.plot(np.nan, np.nan, '-w', lw=1, label=f'start: {t_start}') plt.plot(bins[1:], hist_rt, ds='steps', c='b', lw=1, label=f'{etype}, {rt_min:.2f} mins') plt.xlabel(etype, ha='right', x=1) plt.ylabel('cts / sec', ha='right', y=1) plt.legend(loc=1, fontsize=12) plt.tight_layout() plt.savefig('./plots/CalSpectrum.png')
def check_raw_spectrum(dg, config, db_ecal): """ $ ./energy_cal.py -q 'query' --raw """ import h5py # load energy data lh5_dir = os.path.expandvars(config['lh5_dir']) dsp_list = lh5_dir + dg.file_keys['dsp_path'] + '/' + dg.file_keys[ 'dsp_file'] raw_data = lh5.load_nda(dsp_list, config['rawe'], config['input_table']) runtime_min = dg.file_keys['runtime'].sum() print('\nShowing raw spectra ...') for etype in config['rawe']: xlo, xhi, xpb = config['init_vals'][etype]["raw_range"] # load energy data for this estimator data = raw_data[etype] # print columns of table file_info = db_ecal.table('_file_info').all()[0] tb_in = file_info['input_table'] with h5py.File(dsp_list.iloc[0], 'r') as hf: print("LH5 columns:", list(hf[f'{tb_in}'].keys())) # generate histogram hist, bins, var = pgh.get_hist(data, range=(xlo, xhi), dx=xpb) bins = bins[1:] # trim zero bin, not needed with ds='steps' # normalize by runtime hist_rt = np.divide(hist, runtime_min * 60) print( '\nPlease determine the following parameters for ecal config file:\n' " - 'raw_range': Optimal binning, and hi/lo raw energy limits\n" " - 'peakdet_thresh': ~1/2 the height of a target peak\n" " - 'lowe_cut' energy threshold for peak detection") print( f'\nRaw E: {etype}, {len(data)} cts, runtime: {runtime_min:.2f} min' ) plt.plot(bins, hist_rt, ds='steps', c='b', lw=1, label=etype) plt.xlabel(etype, ha='right', x=1) plt.ylabel(f'cts/sec, {xpb}/bin', ha='right', y=1) if config['batch_mode']: plt.savefig('./plots/cal_spec_test.png') else: plt.show() plt.close()
def check_timestamps(f_raw): """ fc daq timestamps are in seconds, from beginning of file: https://github.com/legend-exp/pygama/blob/master/pygama/io/fcdaq.py#L27 """ ts = lh5.load_nda([f_raw], ['timestamp'], 'g024/raw')['timestamp'] print(ts) print(ts.shape) print(f'first: {ts[0]} {min(ts)} last: {ts[-1]} {max(ts)}') rt = ts[-1] / 60 # runtime in min plt.plot(np.arange(len(ts)), ts, '.b', label=f'runtime: {rt:.1f} min') plt.xlabel('entry', ha='right', x=1) plt.ylabel('timestamp', ha='right', y=1) plt.legend() plt.savefig('./plots/ts_check.png', dpi=100)
def show_raw_spectrum(dg): """ show spectrum w/ onbd energy and trapE - get calibration constants for onbd energy and 'trapE' energy - TODO: fit each expected peak and get resolution vs energy """ # get file list and load energy data (numpy array) lh5_dir = os.path.expandvars(dg.config['lh5_dir']) dsp_list = lh5_dir + dg.file_keys['dsp_path'] + '/' + dg.file_keys[ 'dsp_file'] edata = lh5.load_nda(dsp_list, ['trapEmax'], 'ORSIS3302DecoderForEnergy/dsp') rt_min = dg.file_keys['runtime'].sum() u_start = dg.file_keys.iloc[0]['startTime'] t_start = pd.to_datetime(u_start, unit='s') # str print('Found energy data:', [(et, len(ev)) for et, ev in edata.items()]) print(f'Runtime (min): {rt_min:.2f}') elo, ehi, epb, etype = 0, 25000, 10, 'trapEmax' ene_uncal = edata[etype] hist, bins, _ = pgh.get_hist(ene_uncal, range=(elo, ehi), dx=epb) # normalize by runtime hist_rt = np.divide(hist, rt_min * 60) plt.plot(np.nan, np.nan, '-w', lw=1, label=t_start) plt.semilogy(bins[1:], hist_rt, ds='steps', c='b', lw=1, label=f'{etype}, {rt_min:.2f} mins') plt.xlabel(etype, ha='right', x=1) plt.ylabel('cts / sec', ha='right', y=1) plt.legend() plt.tight_layout() # plt.show() plt.savefig('./plots/uncalSpectrum.png')
def peakfit_group(df_group, config, db_ecal): """ """ # get list of peaks to look for epeaks = config['expected_peaks'] + config['test_peaks'] epeaks = np.array(sorted(epeaks)) # right now a lookup by 'run' is hardcoded. # in principle the lookup should stay general using the gb_cols, # but it's kind of hard to see right now how to write the right db queries gb_run = df_group['run'].unique() if len(gb_run) > 1: print("Multi-run (or other) groupbys aren't supported yet, sorry") exit() # load data lh5_dir = os.path.expandvars(config['lh5_dir']) dsp_list = lh5_dir + df_group['dsp_path'] + '/' + df_group['dsp_file'] raw_data = lh5.load_nda(dsp_list, config['rawe'], config['input_table']) runtime_min = df_group['runtime'].sum() # loop over energy estimators of interest pf_results = {} for et in config['rawe']: # load first-guess calibration constant from its table in the DB db_table = db_ecal.table(f'peakdet_{et}').all() df_cal = pd.DataFrame(db_table) lin_cal = df_cal.loc[df_cal.run == str(gb_run[0])]['lincal'].values[0] cal_data = raw_data[et] * lin_cal # compute expected peak locations and widths (fit to Gaussians) fit_results = {} for ie, epk in enumerate(epeaks): # adjust the window. resolution goes as roughly sqrt(energy) window = np.sqrt(epk) * 0.8 xlo, xhi = epk - window/2, epk + window/2 nbins = int(window) * 5 xpb = (xhi-xlo)/nbins ibin_bkg = int(nbins * 0.2) # get histogram, error, normalize by runtime pk_data = cal_data[(cal_data >= xlo) & (cal_data <= xhi)] hist, bins, _ = pgh.get_hist(pk_data, range=(xlo, xhi), dx=xpb) hist_norm = np.divide(hist, runtime_min * 60) hist_var = np.array([np.sqrt(h / (runtime_min * 60)) for h in hist]) # compute expected peak location and width (simple Gaussian) bkg0 = np.mean(hist_norm[:ibin_bkg]) b, h = bins[1:], hist_norm - bkg0 imax = np.argmax(h) upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h)/2))][0] bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h)/2))][-1] fwhm = upr_half - bot_half sig0 = fwhm / 2.355 # # fit to simple gaussian # amp0 = np.amax(h) * fwhm # p_init = [amp0, bins[imax], sig0, bkg0] # a, mu, sigma, bkg # p_fit, p_cov = pgf.fit_hist(pgf.gauss_bkg, hist_norm, bins, # var=hist_var, guess=p_init) # fit_func = pgf.gauss_bkg # p_err = np.sqrt(np.diag(p_cov)) # # goodness of fit # chisq = [] # for i, h in enumerate(hist_norm): # model = fit_func(b[i], *p_fit) # diff = (model - h)**2 / model # chisq.append(abs(diff)) # rchisq = sum(np.array(chisq) / len(hist_norm)) # # fwhm_err = p_err[1] * 2.355 * e_peak / e_fit # # collect interesting results for this row # fit_results[ie] = { # 'epk':epk, # 'mu':p_fit[1], 'fwhm':p_fit[2]*2.355, 'sig':p_fit[2], # 'amp':p_fit[0], 'bkg':p_fit[3], 'rchisq':rchisq, # 'mu_raw':p_fit[1] / lin_cal, # <-- this is in terms of raw E # 'mu_unc':p_err[1] / lin_cal # } # print(fit_results[ie]) # fit to radford peak: mu, sigma, hstep, htail, tau, bg0, amp amp0 = np.amax(h) * fwhm hstep = 0.001 # fraction that the step contributes htail = 0.1 tau = 10 p_init = [bins[imax], sig0, hstep, htail, tau, bkg0, amp0] p_fit, p_cov = pgf.fit_hist(pgf.radford_peak, hist_norm, bins, var=hist_var, guess=p_init) fit_func = pgf.radford_peak #just for debugging print('Len Fit params:', len(p_fit)) p_err = np.sqrt(np.diag(p_cov)) # goodness of fit chisq = [] for i, h in enumerate(hist_norm): model = fit_func(b[i], *p_fit) diff = (model - h)**2 / model chisq.append(abs(diff)) rchisq = sum(np.array(chisq) / len(hist_norm)) # fwhm_err = p_err[1] * 2.355 * e_peak / e_fit # collect interesting results for this row fit_results[ie] = { 'epk':epk, 'mu':p_fit[1], 'fwhm':p_fit[2]*2.355, 'sig':p_fit[2], 'amp':p_fit[0], 'bkg':p_fit[3], 'rchisq':rchisq, 'mu_raw':p_fit[1] / lin_cal, # <-- this is in terms of raw E 'mu_unc':p_err[1] / lin_cal } # print('Len Fit params:', len(p_fit)) print('Fit results: ', fit_results[ie]) # diagnostic plot, don't delete if config['show_plot']: plt.axvline(bins[ibin_bkg], c='m', label='bkg region') xfit = np.arange(xlo, xhi, xpb * 0.1) plt.plot(xfit, fit_func(xfit, *p_init), '-', c='orange', label='init') plt.plot(xfit, fit_func(xfit, *p_fit), '-', c='red', label='fit') plt.plot(bins[1:], hist_norm, c='b', lw=1.5, ds='steps') plt.xlabel('pass-1 energy (kev)', ha='right', x=1) plt.legend(fontsize=12) if config['batch_mode']: plt.savefig('./plots/fit%d_peakfit.png' %ie) else: plt.show() plt.close() exit() # ---------------------------------------------------------------------- # compute energy calibration by matrix inversion (thanks Tim and Jason!) view_cols = ['epk', 'mu', 'fwhm', 'bkg', 'rchisq', 'mu_raw'] df_fits = pd.DataFrame(fit_results).T print(df_fits[view_cols]) true_peaks = df_fits['epk'] raw_peaks, raw_error = df_fits['mu_raw'], df_fits['mu_unc'] error = raw_error / raw_peaks * true_peaks cov = np.diag(error**2) weights = np.diag(1 / error**2) degree = config['pol_order'] raw_peaks_matrix = np.zeros((len(raw_peaks), degree+1)) for i, pk in enumerate(raw_peaks): temp_degree = degree row = np.array([]) while temp_degree >= 0: row = np.append(row, pk**temp_degree) temp_degree -= 1 raw_peaks_matrix[i] += row print(raw_peaks_matrix) # perform matrix inversion xTWX = np.dot(np.dot(raw_peaks_matrix.T, weights), raw_peaks_matrix) xTWY = np.dot(np.dot(raw_peaks_matrix.T, weights), true_peaks) if np.linalg.det(xTWX) == 0: print("singular matrix, determinant is 0, can't get cal constants") exit() xTWX_inv = np.linalg.inv(xTWX) # get polynomial coefficients and error cal_pars = np.dot(xTWX_inv, xTWY) cal_errs = np.sqrt(np.diag(xTWX_inv)) n = len(cal_pars) print(f'Fit:', ' '.join([f'p{i}:{cal_pars[i]:.4e}' for i in range(n)])) print(f'Unc:', ' '.join([f'p{i}:{cal_errs[i]:.4e}' for i in range(n)])) # ---------------------------------------------------------------------- # repeat the peak fit with the calibrated energy (affects widths) # compute calibrated energy pol = np.poly1d(cal_pars) # handy numpy polynomial object cal_data = pol(raw_data[et]) fit_results = {} # print('fit_results', fit_results) print('cal_data', cal_data) for ie, epk in enumerate(epeaks): print('epk:', epk, '\n epeaks:', epeaks) # adjust the window. resolution goes as roughly sqrt(energy) window = np.sqrt(epk) * 0.5 xlo, xhi = epk - window/2, epk + window/2 nbins = int(window) * 5 xpb = (xhi-xlo)/nbins ibin_bkg = int(nbins * 0.2) print('xhi: ', xhi, 'xlo:', xlo) # get histogram, error, normalize by runtime pk_data = cal_data[(cal_data >= xlo) & (cal_data <= xhi)] hist, bins, _ = pgh.get_hist(pk_data, range=(xlo, xhi), dx=xpb) hist_norm = np.divide(hist, runtime_min * 60) hist_var = np.array([np.sqrt(h / (runtime_min * 60)) for h in hist]) print('cal_data:', cal_data) print('bins:', bins) # compute expected peak location and width (simple Gaussian) bkg0 = np.mean(hist_norm[:ibin_bkg]) # print(bkg0) b, h = bins[1:], hist_norm - bkg0 imax = np.argmax(h) upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h)/2))][0] bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h)/2))][-1] fwhm = upr_half - bot_half sig0 = fwhm / 2.355 amp0 = np.amax(h) * fwhm p_init = [amp0, bins[imax], sig0, bkg0] # a, mu, sigma, bkg p_fit, p_cov = pgf.fit_hist(pgf.gauss_bkg, hist_norm, bins, var=hist_var, guess=p_init) p_err = np.sqrt(np.diag(p_cov)) print('p_err: ', p_err) # save results fit_results[ie] = { 'epk':epk, 'mu':p_fit[1], 'fwhm':p_fit[2] * 2.355, 'sig':p_fit[2], 'amp':p_fit[0], 'bkg':p_fit[3], } print('fit results:', fit_results[ie]) # consolidate results again view_cols = ['epk', 'mu', 'fwhm', 'residual'] df_fits = pd.DataFrame(fit_results).T # compute the difference between lit and measured values cal_peaks = pol(raw_peaks) df_fits['residual'] = true_peaks - cal_peaks print(df_fits[view_cols]) # fit fwhm vs. energy # FWHM(E) = sqrt(A_noise^2 + A_fano^2 * E + A_qcol^2 E^2) # Ref: Eq. 3 of https://arxiv.org/abs/1902.02299 # TODO: fix error handling def sqrt_fwhm(x, a_n, a_f, a_c): return np.sqrt(a_n**2 + a_f**2 * x + a_c**2 * x**2) p_guess = [0.3, 0.05, 0.001] p_fit, p_cov = curve_fit(sqrt_fwhm, df_fits['mu'], df_fits['fwhm'], p0=p_guess)#, sigma = np.sqrt(h), absolute_sigma=True) p_err = np.sqrt(np.diag(p_cov)) if config['show_plot']: # show a split figure with calibrated spectrum + used peaks on top, # and calib.function and resolution vs. energy on bottom fig, (p0, p1) = plt.subplots(2, 1, figsize=(8, 8), sharex=True) # gridspec_kw={'height_ratios':[2, 1]})) # get histogram (cts / keV / d) xlo, xhi, xpb = config['cal_range'] hist, bins, _ = pgh.get_hist(cal_data, range=(xlo, xhi), dx=xpb) hist_norm = np.divide(hist, runtime_min * 60 * xpb) # show peaks cmap = plt.cm.get_cmap('brg', len(df_fits)+1) for i, row in df_fits.iterrows(): # get a pretty label for the isotope lbl = config['pks'][str(row['epk'])] iso = ''.join(r for r in re.findall('[0-9]+', lbl)) ele = ''.join(r for r in re.findall('[a-z]', lbl, re.I)) pk_lbl = r'$^{%s}$%s' % (iso, ele) pk_diff = row['epk'] - row['mu'] p0.axvline(row['epk'], ls='--', c=cmap(i), lw=1, label=f"{pk_lbl} : {row['epk']} + {pk_diff:.3f}") p0.semilogy(bins[1:], hist_norm, ds='steps', c='b', lw=1) p0.set_ylabel('cts / s / keV', ha='right', y=1) p0.legend(loc=3, fontsize=11) # TODO: add fwhm errorbar x_fit = np.arange(xlo, xhi, xpb) y_init = sqrt_fwhm(x_fit, *p_guess) p1.plot(x_fit, y_init, '-', lw=1, c='orange', label='guess') y_fit = sqrt_fwhm(x_fit, *p_fit) a_n, a_f, a_c = p_fit fit_label = r'$\sqrt{(%.2f)^2 + (%.3f)^2 E + (%.4f)^2 E^2}$' % (a_n, a_f, a_c) p1.plot(x_fit, y_fit, '-r', lw=1, label=f'fit: {fit_label}') p1.plot(df_fits['mu'], df_fits['fwhm'], '.b') p1.set_xlabel('Energy (keV)', ha='right', x=1) p1.set_ylabel('FWHM (keV)', ha='right', y=1) p1.legend(fontsize=11) if config['batch_mode']: plt.savefig('./plots/peakfit.png') else: plt.show() # the order of the polynomial should be in the table name pf_results[f'{et}_Anoise'] = p_fit[0] pf_results[f'{et}_Afano'] = p_fit[1] pf_results[f'{et}_Aqcol'] = p_fit[2] for i in range(len(cal_pars)): pf_results[f'{et}_cal{i}'] = cal_pars[i] for i in range(len(cal_pars)): pf_results[f'{et}_unc{i}'] = cal_errs[i] return pd.Series(pf_results)
def peakdet_group(df_group, config): """ Access all files in this group, load energy histograms, and find the "first guess" linear calibration constant. Return the value, and a bool indicating success. """ # get file list and load energy data lh5_dir = os.path.expandvars(config['lh5_dir']) dsp_list = lh5_dir + df_group['dsp_path'] + '/' + df_group['dsp_file'] edata = lh5.load_nda(dsp_list, config['rawe'], config['input_table']) print('Found energy data:', [(et, len(ev)) for et, ev in edata.items()]) runtime_min = df_group['runtime'].sum() print(f'Runtime (min): {runtime_min:.2f}') # loop over energy estimators of interest pd_results = {} for et in config['rawe']: # get histogram, error, normalize by runtime, and derivative xlo, xhi, xpb = config['init_vals'][et]['raw_range'] hist, bins, var = pgh.get_hist(edata[et], range=(xlo, xhi), dx=xpb) hist_norm = np.divide(hist, runtime_min * 60) hist_err = np.array([np.sqrt(hbin / (runtime_min * 60)) for hbin in hist]) # plt.plot(bins[1:], hist_norm, ds='steps') # plt.show() # hist_deriv = np.diff(hist_norm) # hist_deriv = np.insert(hist_deriv, 0, 0) # run peakdet pd_thresh = config['init_vals'][et]['peakdet_thresh'] lowe_cut = config['init_vals'][et]['lowe_cut'] ctr_bins = (bins[:-1] + bins[1:]) / 2. idx = np.where(ctr_bins > lowe_cut) maxes, mins = pgc.peakdet(hist_norm[idx], pd_thresh, ctr_bins[idx]) # maxes, mins = pgc.peakdet(hist_deriv[idx], pd_thresh, ctr_bins[idx]) if len(maxes)==0: print('warning, no maxima! adjust peakdet threshold') # print(maxes) # x (energy) [:,0], y (counts) [:,1] # run peak matching exp_pks = config['expected_peaks'] tst_pks = config['test_peaks'] mode = config['match_mode'] etol = config['raw_ene_tol'] lin_cal, mp_success = match_peaks(maxes, exp_pks, tst_pks, mode, etol) if config['show_plot']: # plot uncalibrated and calibrated energy spectrum, w/ maxima fig, (p0, p1) = plt.subplots(2, 1, figsize=(8, 8)) idx = np.where(bins[1:] > lowe_cut) imaxes = [np.where(np.isclose(ctr_bins, x[0]))[0][0] for x in maxes] imaxes = np.asarray(imaxes) # energy, uncalibrated p0.plot(bins[imaxes], hist_norm[imaxes], '.m') p0.plot(bins[idx], hist_norm[idx], ds='steps', c='b', lw=1, label=et) p0.set_ylabel(f'cts/s, {xpb}/bin', ha='right', y=1) p0.set_xlabel(et, ha='right', x=1) # energy, with rough calibration bins_cal = bins[1:] * lin_cal p1.plot(bins_cal, hist_norm, ds='steps', c='b', lw=1, label=f'E = {lin_cal:.3f}*{et}') # compute best-guess location of all peaks, assuming rough calibration cal_maxes = lin_cal * maxes[:, 0] all_pks = np.concatenate((exp_pks, tst_pks)) raw_guesses = [] for pk in all_pks: imatch = np.isclose(cal_maxes, pk, atol=config['mp_tol']) if imatch.any(): # print(pk, cal_maxes[imatch], maxes[:,0][imatch]) raw_guesses.append([pk, maxes[:,0][imatch][0]]) rg = np.asarray(raw_guesses) rg = rg[rg[:,0].argsort()] # sort by energy cmap = plt.cm.get_cmap('jet', len(rg)) for i, epk in enumerate(rg): idx_nearest = (np.abs(bins_cal - epk[0])).argmin() cts_nearest = hist_norm[idx_nearest] p1.plot(epk[0], cts_nearest, '.r', c=cmap(i), label=f'{epk[0]:.1f} keV') p1.set_xlabel(f'{et}, pass-1 cal', ha='right', x=1) p1.set_ylabel(f'cts/s, {xpb} kev/bin', ha='right', y=1) p1.legend(fontsize=10) if config['batch_mode']: plt.savefig(f'./plots/peakdet_cal_{et}.pdf') else: plt.show() pd_results[f'{et}_lincal'] = lin_cal pd_results[f'{et}_lcpass'] = str(mp_success) return pd.Series(pd_results)
def plot_dsp(dg): """ create a DataFrame from the dsp files and make some 1d and 2d diagnostic plots. for reference, current 12/30/20 dsp parameters: ['channel', 'timestamp', 'energy', 'bl', 'bl_sig', 'trapEftp', 'trapEmax', 'triE', 'tp_max', 'tp_0', 'tp_10', 'tp_50', 'tp_80', 'tp_90', 'A_10', 'AoE', 'dcr_raw', 'dcr_max', 'dcr_ftp', 'hf_max'] columns added by this code: ['run', 'cycle', 'ts_sec', 'ts_glo'] """ sto = lh5.Store() dsp_name = 'ORSIS3302DecoderForEnergy/dsp' wfs_name = 'ORSIS3302DecoderForEnergy/raw/waveform' def get_dsp_dfs(df_row): """ grab the dsp df, add some columns, and return it """ f_dsp = dg.lh5_dir + '/' + df_row.dsp_path + '/' + df_row.dsp_file if len(f_dsp) > 1: print('Error, this part is supposed to only load individual files') exit() f_dsp = f_dsp.iloc[0] run, cyc = df_row.run.iloc[0], df_row.cycle.iloc[0] # print(run, cyc, f_dsp) # grab the dataframe and add some columns tb, nr = sto.read_object(dsp_name, f_dsp) df = tb.get_dataframe() df['run'] = run df['cycle'] = cyc # need global timestamp. just calculate here instead of making hit files clock = 100e6 # 100 MHz UINT_MAX = 4294967295 # (0xffffffff) t_max = UINT_MAX / clock ts = df['timestamp'].values / clock tdiff = np.diff(ts) tdiff = np.insert(tdiff, 0, 0) iwrap = np.where(tdiff < 0) iloop = np.append(iwrap[0], len(ts)) ts_new, t_roll = [], 0 for i, idx in enumerate(iloop): ilo = 0 if i == 0 else iwrap[0][i - 1] ihi = idx ts_block = ts[ilo:ihi] t_last = ts[ilo - 1] t_diff = t_max - t_last ts_new.append(ts_block + t_roll) t_roll += t_last + t_diff df['ts_sec'] = np.concatenate(ts_new) t_start = df_row.startTime.iloc[0] df['ts_glo'] = df['ts_sec'] + t_start # print(df) return df # create the multi-cycle DataFrame df_dsp = dg.fileDB.groupby(['cycle']).apply(get_dsp_dfs) df_dsp.reset_index(inplace=True, drop=True) # << VERY IMPORTANT! print(df_dsp) print(df_dsp.columns) # 1. 1d energy histogram -- use this to select energy range of interest et = 'trapEmax' elo, ehi, epb = 0, 10000, 10 edata = df_dsp.trapEmax.values hist, bins, _ = pgh.get_hist(edata, range=(elo, ehi), dx=epb) plt.semilogy(bins[1:], hist, ds='steps', c='b', lw=1) plt.xlabel(et, ha='right', x=1) plt.ylabel('Counts', ha='right', y=1) # plt.show() plt.savefig('./plots/risingedge_1dspec.pdf') plt.cla() # 2. 2d histo: show risetime vs. time for wfs in an energy range # choose risetime range (usec) # rlo, rhi, rpb = 0, 5, 0.1 # run 110 (good) rlo, rhi, rpb = 0, 50, 1 # run 111 (bad) # select energy range elo, ehi, epb = 1500, 1600, 0.5 df = df_dsp.query(f'trapEmax > {elo} and trapEmax < {ehi}').copy() # calculate timestamp range t0 = df_dsp.iloc[0]['ts_glo'] df['ts_adj'] = (df.ts_glo - t0) / 60 # minutes after t0 tlo, thi, tpb = 0, df.ts_adj.max(), 1 # compute t50-100 risetime df['rt_us'] = (df.tp_max - df.tp_50) / 1e3 # convert ns to us # print(df[['tp_max', 'tp_50', 'rt_us']]) nbx, nby = int((thi - tlo) / tpb), int((rhi - rlo) / rpb) plt.hist2d(df['ts_adj'], df['rt_us'], bins=[nbx, nby], range=[[tlo, thi], [rlo, rhi]], cmap='jet') plt.xlabel('Time (min)', ha='right', x=1) plt.ylabel('Rise Time (t50-100), usec', ha='right', y=1) # plt.show() plt.savefig('./plots/risingedge_2dRisetime.png', dpi=150) plt.cla() # 3. 1st 10 wfs from energy region selection (requires raw file) # this assumes the first file has 10 events db = dg.fileDB.iloc[0] cyc = db.cycle f_raw = dg.lh5_dir + '/' + db.raw_path + '/' + db.raw_file f_dsp = dg.lh5_dir + '/' + db.dsp_path + '/' + db.dsp_file edata = lh5.load_nda([f_dsp], ['trapEmax'], dsp_name)['trapEmax'] idx = np.where((edata >= elo) & (edata <= ehi)) nwfs = 10 idx_sel = idx[0][:nwfs] n_rows = idx_sel[-1] + 1 # read up to this event and stop tb_wfs, n_wfs = sto.read_object(wfs_name, f_raw, n_rows=n_rows) # grab the 2d numpy array of waveforms wfs = tb_wfs['values'].nda[idx_sel, :] ts = np.arange(0, len(wfs[0, :-2])) / 1e2 # usec for iwf in range(wfs.shape[0]): plt.plot(ts, wfs[iwf, :-2], lw=2, alpha=0.5) plt.xlabel('Time (us)', ha='right', x=1) plt.ylabel('ADC', ha='right', y=1) plt.show()
def analyze_pulser_run(df_row): """ loop over each row of dfp and save the superpulse """ epk, rt, vp, cyc = df_row[['E_keV', 'runtime', 'V_pulser', 'cycle']] rt *= 60 # sec if epk == 0: return [] # skip the bkg run # load pulser energies f_dsp = dg.lh5_dir + '/' + df_row.dsp_path + '/' + df_row.dsp_file pdata = lh5.load_nda([f_dsp], ['energy'], dsp_name)['energy'] * ecal # auto-narrow the window around the max pulser peak in two steps elo, ehi, epb = epk - 50, epk + 50, 0.5 pdata_all = pdata[(pdata > elo) & (pdata < ehi)] hp, bp, _ = pgh.get_hist(pdata_all, range=(elo, ehi), dx=epb) pctr = bp[np.argmax(hp)] plo, phi, ppb = pctr - e_window, pctr + e_window, 0.1 pdata_pk = pdata[(pdata > plo) & (pdata < phi)] hp, bp, _ = pgh.get_hist(pdata_pk, range=(plo, phi), dx=ppb) hp_rt = np.divide(hp, rt) hp_var = np.array([np.sqrt(h / (rt)) for h in hp]) # fit a gaussian to get 1 sigma e-values ibin_bkg = 50 bkg0 = np.mean(hp_rt[:ibin_bkg]) b, h = bp[1:], hp_rt imax = np.argmax(h) upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h) / 2))][0] bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h) / 2))][-1] fwhm = upr_half - bot_half sig0 = fwhm / 2.355 amp0 = np.amax(hp_rt) * fwhm p_init = [amp0, bp[imax], sig0, bkg0] p_fit, p_cov = pgf.fit_hist(pgf.gauss_bkg, hp_rt, bp, var=hp_var, guess=p_init) amp, mu, sigma, bkg = p_fit # select events within 1 sigma of the maximum # and pull the waveforms from the raw file to make a superpulse. idx = np.where((pdata >= mu - sigma) & (pdata <= mu + sigma)) print( f'Pulser at {epk} keV, {len(idx[0])} events. Limiting to {nwfs}.') if len(idx[0]) > nwfs: idx = idx[0][:nwfs] # grab the 2d numpy array of pulser wfs n_rows = idx[-1] + 1 # read up to this event and stop f_raw = dg.lh5_dir + '/' + df_row.raw_path + '/' + df_row.raw_file tb_wfs, n_wfs = sto.read_object(raw_name, f_raw, n_rows=n_rows) pwfs = tb_wfs['values'].nda[idx, :] # print(idx, len(idx), pwfs.shape, '\n', pwfs) # data cleaning step: remove events with outlier baselines bl_means = pwfs[:, :500].mean(axis=1) bl_mode = mode(bl_means.astype(int))[0][0] bl_ctr = np.subtract(bl_means, bl_mode) idx_dc = np.where(np.abs(bl_ctr) < bl_thresh) pwfs = pwfs[idx_dc[0], :] bl_means = bl_means[idx_dc] # print(pwfs.shape, bl_means.shape) # baseline subtract (trp when leading (not trailing) dim is the same) wfs = (pwfs.transpose() - bl_means).transpose() # time-align all wfs at their 50% timepoint (tricky!). # adapted from pygama/sandbox/old_dsp/[calculators,transforms].py # an alternate approach would be to use ProcessingChain here wf_maxes = np.amax(wfs, axis=1) timepoints = np.argmax(wfs >= wf_maxes[:, None] * tp_align, axis=1) wf_idxs = np.zeros([wfs.shape[0], n_pre + n_post], dtype=int) row_idxs = np.zeros_like(wf_idxs) for i, tp in enumerate(timepoints): wf_idxs[i, :] = np.arange(tp - n_pre, tp + n_post) row_idxs[i, :] = i wfs = wfs[row_idxs, wf_idxs] # take the average to get the superpulse superpulse = np.mean(wfs, axis=0) # normalize all wfs to the superpulse maximum wfmax, tmax = np.amax(superpulse), np.argmax(superpulse) superpulse = np.divide(superpulse, wfmax) wfs = np.divide(wfs, wfmax) # -- plot results -- if show_plots: fig, (p0, p1) = plt.subplots(2, figsize=(7, 8)) # plot fit result (top), and waveforms + superpulse (bottom) xfit = np.arange(plo, phi, ppb * 0.1) p0.plot(xfit, pgf.gauss_bkg(xfit, *p_init), '-', c='orange', label='init') p0.plot(xfit, pgf.gauss_bkg(xfit, *p_fit), '-', c='red', label='fit') # plot 1 sigma window p0.axvspan(mu - sigma, mu + sigma, color='m', alpha=0.2, label='1 sigma') # plot data p0.plot(bp[1:], hp_rt, ds='steps', c='k', lw=1, label=f'{vp:.2f} V') p0.set_xlabel(f'onboard energy (keV, c={ecal:.2e})', ha='right', x=1) p0.set_ylabel('cts / s', ha='right', y=1) p0.legend(fontsize=10) # plot individ. wfs ts = np.arange(0, len(wfs[0, :])) for iwf in range(wfs.shape[0]): p1.plot(ts, wfs[iwf, :], '-k', lw=2, alpha=0.5) p1.plot(np.nan, np.nan, '-k', label=f'wfs, {epk:.0f} keV') # plot superpulse p1.plot(ts, superpulse, '-r', lw=2, label=f'superpulse, {vp:.2f} V') p1.set_xlabel('time (10 ns)', ha='right', x=1) p1.set_ylabel('amplitude', ha='right', y=1) p1.legend(fontsize=10) # plt.show() plt.savefig(f'./plots/superpulse_cyc{cyc}.png', dpi=150) plt.cla() # save the superpulse to our output file return superpulse
def show_spectra(dfp, dg): """ plot events from each pulser peak on top of a background spectrum run, to show where in the spectrum we sampled from. let's use the E_keV column to find the pulser peaks. need to figure out the proper calibration constant (use onboard energy) so load the bkg run and figure out the calibration constant. that's the parameter we need for get_superpulses. """ run_diagnostic = False f_dsp = dg.lh5_dir + '/' + dfp.dsp_path + '/' + dfp.dsp_file f_bkg = f_dsp.iloc[0] # bkg run is 0 by dfn print('Background run:', f_bkg) # dataframe method - pulls all values from table # sto = lh5.Store() # tb_data, n_rows = sto.read_object('ORSIS3302DecoderForEnergy/dsp', f_bkg) # df_data = tb_data.get_dataframe() # load_nda method - just grab onboard energy tb_name = 'ORSIS3302DecoderForEnergy/dsp' edata = lh5.load_nda([f_bkg], ['energy'], tb_name)['energy'] # use this flag to figure out the calibration of the 1460 line if run_diagnostic: elo, ehi, epb = 0, 1e7, 10000 hist, bins, _ = pgh.get_hist(edata, range=(elo, ehi), dx=epb) plt.semilogy(bins[1:], hist, ds='steps', c='b', lw=1) plt.show() exit() ecal = 1460.8 / 2.005e6 # works for pulser dataset 2 (dec 2020) elo, ehi, epb = 0, 5000, 10 hist, bins, _ = pgh.get_hist(edata * ecal, range=(elo, ehi), dx=epb) runtime = dfp.iloc[0].runtime * 60 # sec hist_rt = np.divide(hist, runtime) print(f'bkg runtime: {runtime:.2f} min') cmap = plt.cm.get_cmap('jet', len(dfp)) for i, df_row in dfp.iterrows(): epk, rt, vp = df_row[['E_keV', 'runtime', 'V_pulser']] rt *= 60 # sec if epk == 0: continue # skip the bkg run # draw the expected peak location based on our input table plt.axvline(epk, lw=1, alpha=0.5) # load pulser data f_dsp = dg.lh5_dir + '/' + df_row.dsp_path + '/' + df_row.dsp_file pdata = lh5.load_nda([f_dsp], ['energy'], tb_name)['energy'] * ecal # take a wide window around where we expect the pulser peak pdata = pdata[(pdata > epk - 50) & (pdata < epk + 50)] hp, bp, _ = pgh.get_hist(pdata, range=(elo, ehi), dx=epb) hp_rt = np.divide(hp, rt) plt.semilogy(bp[1:], hp_rt, ds='steps', lw=1, c=cmap(i), label=f'{vp:.2f} V') plt.semilogy(bins[1:], hist_rt, ds='steps', c='k', lw=1, label='bkg data') plt.xlabel(f'onboard energy (keV, c={ecal:.2e})', ha='right', x=1) plt.ylabel('cts / s', ha='right', y=1) plt.legend(fontsize=10) plt.savefig('./plots/transferfn_peaks.pdf') plt.show() plt.clf()
#!/usr/bin/env python3 import pandas as pd import pygama.io.lh5 as lh5 fin = '/global/project/projectdirs/legend/users/gothman/CAGE' fin += '/dsp/cage_run110_cyc1186_dsp.lh5' # run 110 is cycles 1184--1190 name = 'ORSIS3302DecoderForEnergy/dsp' energy = lh5.load_nda([fin], ['trapEmax'], name)['trapEmax'] energy = pd.Series(energy) print(energy, len(energy)) # print(energy.loc[(energy > 3597) & (energy < 3617)]) emask = (energy > 3597) & (energy < 3617) print(emask.value_counts()) # print(energy.max()) # sto = lh5.Store() # tb, n = sto.read_object(name, fin)