def window_ds(ds, f_tier1): """ Take a DataSet and window it so that the output file only contains events near the calibration peak at 2614.5 keV. """ print("Creating windowed raw file:",f_tier1) f_win = h5py.File(f_tier1, 'w') raw_dir = ds.config["raw_dir"] geds = ds.config["daq_to_raw"]["ch_groups"]["g{ch:0>3d}"]["ch_range"] cols = ['energy','baseline','ievt','numtraces','timestamp','wf_max','wf_std','waveform/values','waveform/dt'] for ged in range(geds[0],geds[1]+1): ged = f"g{ged:0>3d}" count = 0 for p, d, files in os.walk(raw_dir): for f in files: if (f.endswith(".lh5")) & ("calib" in f): print("Opening raw file:",f) f_raw = h5py.File(f"{raw_dir}/{f}",'r') if count == 0: cdate, ctime = f.split('run')[-1].split('-')[1], f.split('run')[-1].split('-')[2] dsets = [ f_raw[ged]['raw'][col][()] for col in cols ] else: for i, col in enumerate(cols): dsets[i] = np.append(dsets[i],f_raw[ged]['raw'][col][()],axis=0) count += 1 # search for 2.6 MeV peak energies = dsets[0] maxe = np.amax(energies) h, b, v = ph.get_hist(energies, bins=3500, range=(maxe/4,maxe)) xp = b[np.where(h > h.max()*0.1)][-1] h, b = h[np.where(b < xp-200)], b[np.where(b < xp-200)] bin_max = b[np.where(h == h.max())][0] min_ene = int(bin_max*0.95) max_ene = int(bin_max*1.05) hist, bins, var = ph.get_hist(energies, bins=500, range=(min_ene, max_ene)) print(ged,"Raw energy max",maxe,"histogram max",h.max(),"at",bin_max ) # windowing for i, col in enumerate(cols): dsets[i] = dsets[i][(energies>min_ene) & (energies<max_ene)] d_dt = f_win.create_dataset(ged+"/raw/"+col,dtype='f',data=dsets[i]) d_dt.attrs['units'] = 'ns' f_win.attrs['datatype'] = 'table{cols}' print("Created datasets",ged+"/raw") f_win.close() print("wrote file:", f_tier1)
def show_spectrum(ds, etype="e_ftp"): """ display the raw spectrum of an (uncalibrated) energy estimator, use it to tune the x-axis range and binning in preparation for the first pass calibration. TODO -- it would be neat to use this function to display an estimate for the peakdet threshold we need later in the code, based on the number of counts in each bin or something ... """ t2df = ds.get_t2df() print(t2df.columns) ene = "e_ftp" # built-in pandas histogram # t2df.hist(etype, bins=1000) # pygama histogram xlo, xhi, xpb = 0, 6000, 10 # gamma spectrum hE, xE = ph.get_hist(t2df[ene], range=(xlo, xhi), dx=xpb) plt.semilogy(xE, hE, ls='steps', lw=1, c='r') plt.xlabel("Energy (uncal.)", ha='right', x=1) plt.ylabel("Counts", ha='right', y=1) plt.show()
def main(): """ an example of loading an LH5 DSP file and converting to pandas DataFrame. """ # we will probably make this part simpler in the near future f = '/Users/wisecg/Data/lh5/hades_I02160A_r1_191021T162944_th_HS2_top_psa_dsp.lh5' sto = lh5.Store() groups = sto.ls(f) # the example file only has one group, 'raw' data = sto.read_object('raw', f) df_dsp = data.get_dataframe() # from here, we can use standard pandas to work with data print(df_dsp) # one example: create uncalibrated energy spectrum, # using a pygama helper function to get the histogram elo, ehi, epb = 0, 100000, 10 ene_uncal = df_dsp['trapE'] hist, bins, _ = pgh.get_hist(ene_uncal, range=(elo, ehi), dx=epb) bins = bins[1:] # trim zero bin, not needed with ds='steps' plt.semilogy(bins, hist, ds='steps', c='b', label='trapE') plt.xlabel('trapE', ha='right', x=1) plt.ylabel('Counts', ha='right', y=1) plt.legend() plt.tight_layout() plt.show()
def show_cal_spectrum(): """ """ f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5' tb_name = 'ORSIS3302DecoderForEnergy/raw' sto = lh5.Store() groups = sto.ls(f_hit) data = sto.read_object(tb_name, f_hit) df_hit = data.get_dataframe() print(df_hit) # energy in keV elo, ehi, epb = 0, 3000, 0.5 # choose energy estimator etype = 'energy_cal' # etype = 'trapE_cal' hist, bins, _ = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb) bins = bins[1:] # trim zero bin, not needed with ds='steps' plt.plot(bins, hist, ds='steps', c='b', lw=2, label=etype) plt.xlabel(etype, ha='right', x=1) plt.ylabel('Counts', ha='right', y=1) plt.legend() plt.tight_layout() plt.show()
def show_raw_spectrum(): """ show spectrum w/ onbd energy and trapE - get calibration constants for onbd energy and 'trapE' energy - TODO: fit each expected peak and get resolution vs energy """ f_dsp = '/Users/wisecg/Data/OPPI/dsp/oppi_run0_cyc2027_dsp_test.lh5' # we will probably make this part simpler in the near future sto = lh5.Store() groups = sto.ls(f_dsp) data = sto.read_object('ORSIS3302DecoderForEnergy/raw', f_dsp) df_dsp = data.get_dataframe() # from here, we can use standard pandas to work with data print(df_dsp) # elo, ehi, epb, etype = 0, 8e6, 1000, 'energy' # elo, ehi, epb, etype = 0, 8e6, 1000, 'energy' # whole spectrum # elo, ehi, epb, etype = 0, 800000, 1000, 'energy' # < 250 keV elo, ehi, epb, etype = 0, 10000, 10, 'trapE' ene_uncal = df_dsp[etype] hist, bins, _ = pgh.get_hist(ene_uncal, range=(elo, ehi), dx=epb) bins = bins[1:] # trim zero bin, not needed with ds='steps' plt.plot(bins, hist, ds='steps', c='b', lw=2, label=etype) plt.xlabel(etype, ha='right', x=1) plt.ylabel('Counts', ha='right', y=1) plt.legend() plt.tight_layout() plt.show()
def show_cal_spectrum(dg): """ apply calibration to dsp file """ # get file list and load energy data (numpy array) lh5_dir = os.path.expandvars(dg.config['lh5_dir']) dsp_list = lh5_dir + dg.file_keys['dsp_path'] + '/' + dg.file_keys[ 'dsp_file'] edata = lh5.load_nda(dsp_list, ['trapEmax'], 'ORSIS3302DecoderForEnergy/dsp') rt_min = dg.file_keys['runtime'].sum() u_start = dg.file_keys.iloc[0]['startTime'] t_start = pd.to_datetime(u_start, unit='s') # str print('Found energy data:', [(et, len(ev)) for et, ev in edata.items()]) print(f'Runtime (min): {rt_min:.2f}') # load calibration from peakfit cal_db = db.TinyDB(storage=MemoryStorage) with open('ecalDB.json') as f: raw_db = json.load(f) cal_db.storage.write(raw_db) runs = dg.file_keys.run.unique() if len(runs) > 1: print("sorry, I can't do combined runs yet") exit() run = runs[0] tb = cal_db.table("peakfit_trapEmax").all() df_cal = pd.DataFrame(tb) df_cal['run'] = df_cal['run'].astype(int) df_run = df_cal.loc[df_cal.run == run] cal_pars = df_run.iloc[0][['cal0', 'cal1', 'cal2']] # compute calibrated energy pol = np.poly1d(cal_pars) # handy numpy polynomial object cal_data = pol(edata['trapEmax']) elo, ehi, epb, etype = 0, 3000, 1, 'trapEmax_cal' # gamma region elo, ehi, epb, etype = 2500, 8000, 10, 'trapEmax_cal' # overflow region # elo, ehi, epb, etype = 0, 250, 1, 'trapEmax_cal' # low-e region hist, bins, _ = pgh.get_hist(cal_data, range=(elo, ehi), dx=epb) # normalize by runtime hist_rt = np.divide(hist, rt_min * 60) plt.plot(np.nan, np.nan, '-w', lw=1, label=f'start: {t_start}') plt.plot(bins[1:], hist_rt, ds='steps', c='b', lw=1, label=f'{etype}, {rt_min:.2f} mins') plt.xlabel(etype, ha='right', x=1) plt.ylabel('cts / sec', ha='right', y=1) plt.legend(loc=1, fontsize=12) plt.tight_layout() plt.savefig('./plots/CalSpectrum.png')
def resolution(par, e_array, peaks, initial_guesses, degree): params = initial_guesses ecal = np.zeros((1, len(e_array))) for i in range(len(par)): ecal += e_array**degree * par[i] degree -= 1 resolution = np.array([]) res_error = np.array([]) for i, pk in enumerate(peaks): h, e_range, var = ph.get_hist(ecal, range=[ pk - (1.2 * params[i][2] * 2.355), pk + (1.2 * params[i][2] * 2.355) ], dx=.5) i_max = np.argmax(h) h_max = h[i_max] amp = h_max * params[i][2] * 2.355 # hstep = 0.01 # fraction that the step contributes # htail = 0.1 # tau = 10 # bg0 = params[i][4] + params[i][3]*e_range[0] # x0 = [params[i][0], params[i][2], hstep, htail, tau, bg0, amp] # radford_par, radford_cov = pf.fit_hist(pf.radford_peak, h, e_range, var=np.sqrt(h), guess=x0) # radford_err = np.sqrt(np.diag(radford_cov)) # fit_func = pf.radford_peak p0 = [e_range[i_max], h_max, params[i][2], e_range[2]] par1, pcov = curve_fit( gauss, e_range[1:], h, p0=p0) #, sigma = np.sqrt(h), absolute_sigma=True) perr = np.sqrt(np.diag(pcov)) # plt.plot(e_range[1:], h, ls='steps', lw=1, c='r') # plt.plot(e_range[1:], gauss(e_range[1:], *par1)) # plt.show() resolution = np.append(resolution, par1[2] * 2.355) res_error = np.append(res_error, perr[2] * 2.355) # exit() plt.errorbar(peaks, resolution, yerr=res_error, ls='none', capsize=5, marker=".", ms=10) plt.title("Resolution vs E") plt.xlabel("keV") plt.ylabel("FWHM") # plt.show() plt.savefig('e_resolution.png')
def histo_data(array, elo, ehi, epb): """ return histo array """ hE, xE, var = ph.get_hist(array, range=[elo, ehi], dx=epb) return hE, xE, var
def window_ds(): """ Take a single DataSet and window it so that the file only contains events near an expected peak location. Create some temporary in/out files s/t the originals aren't overwritten. """ # run = 42 # ds = DataSet(run=run, md="runDB.json") ds_num = 3 ds = DataSet(ds_num, md="runDB.json") # specify temporary I/O locations p_tmp = "~/Data/cage" f_tier1 = "~/Data/cage/cage_ds3_t1.h5" f_tier2 = "~/Data/cage/cage_ds3_t2.h5" # figure out the uncalibrated energy range of the K40 peak # xlo, xhi, xpb = 0, 2e6, 2000 # show phys. spectrum (top feature is 2615 pk) xlo, xhi, xpb = 990000, 1030000, 250 # k40 peak, ds 3 t2df = ds.get_t2df() hE, xE = ph.get_hist(t2df["energy"], range=(xlo, xhi), dx=xpb) plt.semilogy(xE, hE, ls='steps', lw=1, c='r') import matplotlib.ticker as ticker plt.gca().xaxis.set_major_formatter(ticker.FormatStrFormatter('%0.4e')) plt.locator_params(axis='x', nbins=5) plt.xlabel("Energy (uncal.)", ha='right', x=1) plt.ylabel("Counts", ha='right', y=1) plt.savefig(f"./plots/cage_ds{ds_num}_winK40.pdf") # exit() # write a windowed tier 1 file containing only waveforms near the peak t1df = pd.DataFrame() for run in ds.paths: ft1 = ds.paths[run]["t1_path"] print(f"Scanning ds {ds_num}, run {run}\n file: {ft1}") for chunk in pd.read_hdf(ft1, 'ORSIS3302DecoderForEnergy', chunksize=5e4): t1df_win = chunk.loc[(chunk.energy > xlo) & (chunk.energy < xhi)] print(t1df_win.shape) t1df = pd.concat([t1df, t1df_win], ignore_index=True) # -- save to HDF5 output file -- h5_opts = { "mode":"w", # overwrite existing "append":False, "format":"table", "complib":"blosc:zlib", "complevel":1, "data_columns":["ievt"] } t1df.reset_index(inplace=True) t1df.to_hdf(f_tier1, key="df_windowed", **h5_opts) print("wrote file:", f_tier1)
def mode_hist(df, param, a_bins=1000, alo=0.005, ahi=0.075, cut=False, cut_str=''): # get the mode of a section of a histogram. Default params based on AoE values if cut==True: print(f'Using cut before finding mode: {cut_str}') df_plot = df.query(cut_str) else: df_plot = df hist, bins, vars = pgh.get_hist(df_plot[param], bins=a_bins, range=[alo, ahi]) pars, cov = pgf.gauss_mode_width_max(hist, bins, vars) mode = pars[0] return(mode)
def check_raw_spectrum(dg, config, db_ecal): """ $ ./energy_cal.py -q 'query' --raw """ # load energy data dsp_list = config['lh5_dir'] + dg.fileDB['dsp_path'] + '/' + dg.fileDB[ 'dsp_file'] raw_data = lh5.load_nda(dsp_list, config['rawe'], config['input_table'], verbose=False) runtime_min = dg.fileDB['runtime'].sum() print('\nShowing raw spectra ...') for etype in config['rawe']: xlo, xhi, xpb = config['init_vals'][etype]["raw_range"] # load energy data for this estimator data = raw_data[etype] # print columns of table file_info = db_ecal.table('_file_info').all()[0] tb_in = file_info['input_table'] with h5py.File(dsp_list.iloc[0], 'r') as hf: print("LH5 columns:", list(hf[f'{tb_in}'].keys())) # generate histogram hist, bins, var = pgh.get_hist(data, range=(xlo, xhi), dx=xpb) bins = bins[1:] # trim zero bin, not needed with ds='steps' # normalize by runtime hist_rt = np.divide(hist, runtime_min * 60) print( '\nPlease determine the following parameters for ecal config file:\n' " - 'raw_range': Optimal binning, and hi/lo raw energy limits\n" " - 'peakdet_thresh': ~1/2 the height of a target peak\n" " - 'lowe_cut' energy threshold for peak detection") print( f'\nRaw E: {etype}, {len(data)} cts, runtime: {runtime_min:.2f} min' ) plt.semilogy(bins, hist_rt, ds='steps', c='b', lw=1, label=etype) plt.xlabel(etype, ha='right', x=1) plt.ylabel(f'cts/sec, {xpb}/bin', ha='right', y=1) if config['batch_mode']: plt.savefig('./plots/energy_cal/cal_spec_test.png') else: plt.show() plt.close()
def peak_drift(dg): """ show any drift of the 1460 peak (5 minute bins) """ cols = ['trapEmax', 'ts_glo'] lh5_dir = os.path.expandvars(dg.config['lh5_dir']) hit_list = lh5_dir + dg.fileDB['hit_path'] + '/' + dg.fileDB['hit_file'] df_hit = lh5.load_dfs(hit_list, cols, 'ORSIS3302DecoderForEnergy/hit') df_hit.reset_index(inplace=True) rt_min = dg.fileDB['runtime'].sum() print(f'runtime: {rt_min:.2f} min') # settings # use uncalibrated energy elo, ehi, epb, etype = 3400, 3800, 1, 'trapEmax' df_hit = df_hit.query(f'trapEmax > {elo} and trapEmax < {ehi}').copy() # use calibrated energy (hit file) # elo, ehi, epb, etype = 1450, 1470, 1, 'trapEmax_cal' # df_hit = df_hit.query(f'trapEmax_cal > {elo} and trapEmax_cal < {ehi}').copy() # # diagnostic plot hE, xE, vE = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb) plt.plot(xE[1:], hE, c='b', ds='steps', lw=1) # plt.show() plt.savefig('./plots/oppi_1460_hist.pdf') plt.cla() t0 = df_hit['ts_glo'].values[0] df_hit['ts_adj'] = (df_hit['ts_glo'] - t0) / 60 # minutes after 0 tlo, thi, tpb = 0, df_hit['ts_adj'].max(), 1 nbx = int((thi - tlo) / tpb) nby = int((ehi - elo) / epb) h = plt.hist2d(df_hit['ts_adj'], df_hit['trapEmax'], bins=[nbx, nby], range=[[tlo, thi], [elo, ehi]], cmap='jet') plt.xlabel(f'Time ({tpb:.1f} min/bin)', ha='right', x=1) plt.ylabel('trapEmax', ha='right', y=1) plt.tight_layout() # plt.show() plt.savefig('./plots/oppi_1460_drift.png', dpi=300)
def show_raw_spectrum(dg): """ show spectrum w/ onbd energy and trapE - get calibration constants for onbd energy and 'trapE' energy - TODO: fit each expected peak and get resolution vs energy """ # get file list and load energy data (numpy array) # lh5_dir = os.path.expandvars(dg.config['lh5_dir']) lh5_dir = dg.lh5_dir dsp_list = lh5_dir + dg.fileDB['dsp_path'] + '/' + dg.fileDB['dsp_file'] edata = lh5.load_nda(dsp_list, ['trapEmax'], 'ORSIS3302DecoderForEnergy/dsp') rt_min = dg.fileDB['runtime'].sum() u_start = dg.fileDB.iloc[0]['startTime'] t_start = pd.to_datetime(u_start, unit='s') # str print('Found energy data:', [(et, len(ev)) for et, ev in edata.items()]) print(f'Runtime (min): {rt_min:.2f}') elo, ehi, epb, etype = 6000, 8000, 10, 'trapEmax' ene_uncal = edata[etype] hist, bins, _ = pgh.get_hist(ene_uncal, range=(elo, ehi), dx=epb) # normalize by runtime hist_rt = np.divide(hist, rt_min * 60) plt.plot(np.nan, np.nan, '-w', lw=1, label=t_start) plt.semilogy(bins[1:], hist_rt, ds='steps', c='b', lw=1, label=f'{etype}, {rt_min:.2f} mins') plt.xlabel(etype, ha='right', x=1) plt.ylabel('cts / sec', ha='right', y=1) plt.legend() plt.tight_layout() # plt.show() plt.savefig('./plots/normScan/e_zoom.png', dpi=200)
def tier2_spec(): """ show a few examples of energy spectra (onboard E and offline E) """ run = 42 ds = DataSet(run=run, md="runDB.json") t2df = ds.get_t2df() # print(t2df.columns) # onboard E ene = "energy" # xlo, xhi, xpb = 0, 20e6, 5000 # show muon peak (full dyn. range) xlo, xhi, xpb = 0, 2e6, 2000 # show phys. spectrum (top feature is 2615 pk) # # trap_max E # ene = "etrap_max" # xlo, xhi, xpb = 0, 50000, 100 # muon peak # xlo, xhi, xpb = 0, 6000, 10 # gamma spectrum # # fixed time pickoff E # ene = "e_ftp" # # xlo, xhi, xpb = 0, 50000, 100 # muon peak # xlo, xhi, xpb = 0, 6000, 10 # gamma spectrum # get histogram hE, xE = ph.get_hist(t2df[ene], range=(xlo, xhi), dx=xpb) # make the plot plt.semilogy(xE, hE, ls='steps', lw=1, c='r', label=f'run {run}') plt.xlabel("Energy (uncal.)", ha='right', x=1) plt.ylabel("Counts", ha='right', y=1) # show a couple formatting tricks import matplotlib.ticker as ticker plt.gca().xaxis.set_major_formatter(ticker.FormatStrFormatter('%0.1e')) plt.locator_params(axis='x', nbins=5) plt.grid(linestyle=':') plt.legend() # plt.show() plt.savefig(f"./plots/cage_run{run}_{ene}.pdf")
def show_calspectrum(ds, df, fdb, run, etype="e_ftp", p1=True, p2=False): """ display the linearly calibrated energy spectrum """ calDB = db.TinyDB(fdb) query = db.Query() table = calDB.table("cal_pass1") vals1 = table.all() if (p2): table = calDB.table("cal_pass2") vals2 = table.all() print("CalVals=", vals1) energy = df[etype] * vals1[0]['p1cal'] if (p2): energy = energy / vals2[0]['p2acal'] - vals2[0]['p2bcal'] hist, bins, var = pgh.get_hist(energy, range=[0, 4000], dx=1) plt.plot(hist) plt.yscale('log') plt.savefig(path_to_files + 'plots/calEnergy_spectrum_' + str(run) + '.png', bbox_inches='tight', transparent=True)
def ct_corr_E_var(tb_out, verbosity): EE = tb_out['trapEftp'].nda cc = tb_out['ct_corr'].nda / EE # bad gretina waveforms: need to cull them # first remove crazy ct_corr values idx = np.where((cc > 0) & (cc < 1) & (EE > 0)) EE = EE[idx] cc = cc[idx] # now zoom in on energy twice E_ave = np.average(EE) idx = np.where((EE > 0.9 * E_ave) & (EE < 1.1 * E_ave)) EE = EE[idx] cc = cc[idx] E_ave = np.average(EE) idx = np.where((EE > 0.99 * E_ave) & (EE < 1.01 * E_ave)) EE = EE[idx] cc = cc[idx] # now go to +/- 3*sigma # it's non-gaus so this should be wide enough E_ave = np.average(EE) E_3sig = 3. * np.sqrt(np.var(EE)) idx = np.where((EE > E_ave - E_3sig) & (EE < E_ave + E_3sig)) EE = EE[idx] cc = cc[idx] # do a PCA for a first guess E_ave = np.average(EE) c_ave = np.average(cc) pca = PCA(n_components=2) pca.fit(np.vstack((EE - E_ave, cc - c_ave)).T) i_max = np.argmax(pca.explained_variance_) dE, dc = pca.components_[i_max] EEc = EE - dE / dc * cc # now cut in EEc Ec_ave = np.average(EEc) Ec_3sig = 3. * np.sqrt(np.var(EEc)) idx = np.where((EEc > Ec_ave - Ec_3sig) & (EEc < Ec_ave + Ec_3sig)) EE = EE[idx] cc = cc[idx] # now move to histograms, and vary dE until the peak is sharpest dEs = np.linspace(0, 2, 21) * dE bins = 100 hrange = (Ec_ave - 3 * Ec_3sig, Ec_ave + 3 * Ec_3sig) max_height = 0 max_dE = 0 for dE_i in dEs: hist, bins, var = pgh.get_hist(EE - dE_i / dc * cc, bins, hrange) height = np.amax(hist) if height > max_height: max_dE = dE_i max_height = height EEc = EE - max_dE / dc * cc Ec_ave = np.average(EEc) Ec_sig = np.sqrt(np.var(EEc)) if verbosity > 0: print(f'var: {E_3sig} -> {3*Ec_sig}') return Ec_sig / Ec_ave
def run_dsp(dfrow): """ run dsp on the test file, editing the processor list alternate idea: generate a long list of processors with different names """ # adjust dsp config dictionary rise, flat = dfrow # dsp_config['processors']['wf_pz']['defaults']['db.pz.tau'] = f'{tau}*us' dsp_config['processors']['wf_trap']['args'][1] = f'{rise}*us' dsp_config['processors']['wf_trap']['args'][2] = f'{flat}*us' # pprint(dsp_config) # run dsp pc, tb_out = build_processing_chain(tb_data, dsp_config, verbosity=0) pc.execute() # analyze peak e_peak = 1460. etype = 'trapEmax' elo, ehi, epb = 4000, 4500, 3 # the peak moves around a bunch energy = tb_out[etype].nda # get histogram hE, bins, vE = pgh.get_hist(energy, range=(elo, ehi), dx=epb) xE = bins[1:] # should I center the max at 1460? # simple numerical width i_max = np.argmax(hE) h_max = hE[i_max] upr_half = xE[(xE > xE[i_max]) & (hE <= h_max / 2)][0] bot_half = xE[(xE < xE[i_max]) & (hE >= h_max / 2)][0] fwhm = upr_half - bot_half sig = fwhm / 2.355 # fit to gaussian: amp, mu, sig, bkg fit_func = pgf.gauss_bkg amp = h_max * fwhm bg0 = np.mean(hE[:20]) x0 = [amp, xE[i_max], sig, bg0] xF, xF_cov = pgf.fit_hist(fit_func, hE, bins, var=vE, guess=x0) # collect results e_fit = xF[0] xF_err = np.sqrt(np.diag(xF_cov)) e_err = xF fwhm_fit = xF[1] * 2.355 * 1460. / e_fit fwhm_err = xF_err[2] * 2.355 * 1460. / e_fit chisq = [] for i, h in enumerate(hE): model = fit_func(xE[i], *xF) diff = (model - h)**2 / model chisq.append(abs(diff)) rchisq = sum(np.array(chisq) / len(hE)) fwhm_ovr_mean = fwhm_fit / e_fit if show_movie: plt.plot(xE, hE, ds='steps', c='b', lw=2, label=f'{etype} {rise}--{flat}') # peak shape plt.plot(xE, fit_func(xE, *x0), '-', c='orange', alpha=0.5, label='init. guess') plt.plot(xE, fit_func(xE, *xF), '-r', alpha=0.8, label='peakshape fit') plt.plot(np.nan, np.nan, '-w', label=f'mu={e_fit:.1f}, fwhm={fwhm_fit:.2f}') plt.xlabel(etype, ha='right', x=1) plt.ylabel('Counts', ha='right', y=1) plt.legend(loc=2) # show a little movie plt.show(block=False) plt.pause(0.01) plt.cla() # return results return pd.Series({ 'e_fit': e_fit, 'fwhm_fit': fwhm_fit, 'rchisq': rchisq, 'fwhm_err': xF_err[0], 'fwhm_ovr_mean': fwhm_ovr_mean })
def plot_fwhm(f_grid,f_opt,d_out,efilter, verbose=False): """ select the best energy resolution, plot best result fit and fwhm vs parameters """ print("Grid file:",f_grid) df_grid = pd.read_hdf(f_grid) f_res = f"{d_out}/{efilter}_results.h5" if 'trapE' in efilter: df = pd.DataFrame(columns=['ged','rise','flat','rc','fwhm','fwhmerr']) if efilter == 'zacE' or efilter == 'cuspE': df = pd.DataFrame(columns=['ged','sigma','flat','decay','fwhm','fwhmerr']) f = h5py.File(f_opt,'r') for chn, ged in enumerate(f.keys()): d_det = f"{d_out}/{ged}" try: os.mkdir(d_det) except: pass d_det = f"{d_det}/{efilter}" try: os.mkdir(d_det) except: pass data = f[ged]['data'] try: # find fwhm minimum values df_grid = df_grid.loc[(df_grid[f"rchi2_{ged}"]<100)&(df_grid[f"fwhm_{ged}"]>0)] minidx = df_grid[f'fwhm_{ged}'].idxmin() df_min = df_grid.loc[minidx] #plot best result fit energies = data[f"{efilter}_{minidx}"][()] mean = np.mean(energies) bins = 12000 hE, xE, vE = ph.get_hist(energies,bins,(mean/2,mean*2)) mu = xE[np.argmax(hE)] hmax = hE[np.argmax(hE)] idx = np.where(hE > hmax/2) ilo, ihi = idx[0][0], idx[0][-1] sig = (xE[ihi] - xE[ilo]) / 2.355 idx = np.where(((xE-mu) > -8 * sig) & ((xE-mu) < 8 * sig)) ilo, ihi = idx[0][0], idx[0][-1] xE, hE, vE = xE[ilo:ihi+1], hE[ilo:ihi], vE[ilo:ihi] x0 = [hmax, mu, sig, 1, 0] xF, xF_cov = pf.fit_hist(pf.gauss_step, hE, xE, var=vE, guess=x0) xF_err = np.sqrt(np.diag(xF_cov)) fwhm = xF[2] * 2.355 * 2614.5 / mu fwhmerr = xF_err[2] * 2.355 * 2614.5 / mu plt.plot(xE, pf.gauss_step(xE, *xF), c='r', label='peakshape') gaus, step = pf.gauss_step(xE, *xF, components=True) gaus = np.array(gaus) step = np.array(step) plt.plot(xE, gaus, ls="--", lw=2, c='g', label="gaus") plt.plot(xE, step, ls='--', lw=2, c='m', label='step + bg') plt.plot(xE[1:], hE, lw=1, c='b', label=f"data {ged}") plt.xlabel(f"ADC channels", ha='right', x=1) plt.ylabel("Counts", ha='right', y=1) plt.legend(loc=2, fontsize=10,title=f"FWHM = {fwhm:.2f} $\pm$ {fwhmerr:.2f} keV") plt.savefig(f"{d_det}/Fit_{ged}-{efilter}.pdf") plt.cla() except: print("FWHM minimum not find for detector",ged) continue if efilter=='zacE' or efilter=='cuspE': #try: sigma, flat, decay = df_min[:3] results = [ged, f'{sigma:.2f}', f'{flat:.2f}', f'{decay:.2f}', f'{fwhm:.2f}', f'{fwhmerr:.2f}'] # 1. vary the sigma cusp df_sigma = df_grid.loc[(df_grid.flat==flat)&(df_grid.decay==decay)&(df_grid.decay==decay)] x, y, err = df_sigma['sigma'], df_sigma[f'fwhm_{ged}'], df_sigma[f'fwhmerr_{ged}'] plt.errorbar(x,y,err,fmt='o') plt.xlabel("Sigma Cusp ($\mu$s)", ha='right', x=1) plt.ylabel(r"FWHM (keV)", ha='right', y=1) plt.savefig(f"{d_det}/FWHM_vs_Sigma_{ged}-{efilter}.pdf") plt.cla() # 2. vary the flat time df_flat = df_grid.loc[(df_grid.sigma==sigma)&(df_grid.decay==decay)] x, y, err = df_flat['flat'], df_flat[f'fwhm_{ged}'], df_flat[f'fwhmerr_{ged}'] plt.errorbar(x,y,err,fmt='o') plt.xlabel("Flat Top ($\mu$s)", ha='right', x=1) plt.ylabel("FWHM (keV)", ha='right', y=1) plt.savefig(f"{d_det}/FWHM_vs_Flat_{ged}-{efilter}.pdf") plt.cla() # 3. vary the rc constant df_decay = df_grid.loc[(df_grid.sigma==sigma)&(df_grid.flat==flat)] x, y, err = df_decay[f'decay'], df_decay[f'fwhm_{ged}'], df_decay[f'fwhmerr_{ged}'] plt.errorbar(x,y,err,fmt='o') plt.xlabel("Decay constant ($\mu$s)", ha='right', x=1) plt.ylabel(r"FWHM (keV)", ha='right', y=1) plt.savefig(f"{d_det}/FWHM_vs_Decay_{ged}-{efilter}.pdf") plt.cla() #except: #print("") if 'trapE' in efilter: rise, flat, rc = df_min[:3] results = [ged, f'{rise:.2f}', f'{flat:.2f}', f'{rc:.2f}', f'{fwhm:.2f}', f'{fwhmerr:.2f}'] # 1. vary the rise time df_rise = df_grid.loc[(df_grid.flat==flat)&(df_grid.rc==rc)] x, y, err = df_rise['rise'], df_rise[f'fwhm_{ged}'], df_rise[f'fwhmerr_{ged}'] #plt.plot(x,y,".b") plt.errorbar(x,y,err,fmt='o') plt.xlabel("Ramp time ($\mu$s)", ha='right', x=1) plt.ylabel(r"FWHM (kev)", ha='right', y=1) # plt.ylabel(r"FWHM", ha='right', y=1) plt.savefig(f"{d_det}/FWHM_vs_Rise_{ged}-{efilter}.pdf") plt.cla() # 2. vary the flat time df_flat = df_grid.loc[(df_grid.rise==rise)&(df_grid.rc==rc)] x, y, err = df_flat['flat'], df_flat[f'fwhm_{ged}'], df_flat[f'fwhmerr_{ged}'] #plt.plot(x,y,'.b') plt.errorbar(x,y,err,fmt='o') plt.xlabel("Flat time ($\mu$s)", ha='right', x=1) plt.ylabel("FWHM (keV)", ha='right', y=1) plt.savefig(f"{d_det}/FWHM_vs_Flat_{ged}-{efilter}.pdf") plt.cla() # 3. vary the rc constant df_rc = df_grid.loc[(df_grid.rise==rise)&(df_grid.flat==flat)] x, y, err = df_rc['rc'], df_rc[f'fwhm_{ged}'], df_rc[f'fwhmerr_{ged}'] #plt.plot(x,y,'.b') plt.errorbar(x,y,err,fmt='o') plt.xlabel("RC constant ($\mu$s)", ha='right', x=1) plt.ylabel(r"FWHM (keV)", ha='right', y=1) plt.savefig(f"{d_det}/FWHM_vs_RC_{ged}-{efilter}.pdf") plt.cla() df.loc[chn] = results print("Results file:",f_res) df.to_hdf(f_res, key='results',mode='w') print(df) dets = range(len(df['fwhm'])) fwhm = np.array([float(df['fwhm'][i]) for i in dets]) fwhm_err = np.array([float(df['fwhmerr'][i]) for i in dets]) plt.cla() plt.errorbar(dets,fwhm,fwhm_err,fmt='o',c='red',label=f'{efilter} filter') plt.xlabel("detector number", ha='right', x=1) plt.ylabel("FWHM (keV)", ha='right', y=1) plt.legend() plt.savefig(f"{d_out}/FWHM_{efilter}.pdf")
def peakCounts_60(run, campaign, df, runtype, rt_min, radius, angle_det, rotary, energy_par='trapEftp_cal', bins=50, erange=[54, 65], bkg_sub=True, plot=False, writeParams=False): """ Get the number of counts in the 60 keV peak, make plots. Can be sideband-subtracted or raw. Taken partially from cage_utils.py, adapted to be specific for 60 keV analysis """ if len(erange) < 2: print('Must specify an energy range for the fit!') exit() # First use gauss_mode_width_max to use for initial guesses in fit_hist ehist, ebins, evars = pgh.get_hist(df[energy_par], bins=bins, range=erange) pars, cov = pgf.gauss_mode_width_max(ehist, ebins, evars) mode = pars[0] width = pars[1] amp = pars[2] print(f'Guess: {pars}') # print(f'mode: {mode}') # print(f'width: {width}') # print(f'amp: {amp}') e_pars, ecov = pgf.fit_hist(cage_utils.gauss_fit_func, ehist, ebins, evars, guess=(amp, mode, width, 1)) chi_2 = pgf.goodness_of_fit(ehist, ebins, cage_utils.gauss_fit_func, e_pars) mean = e_pars[1] mean_err = ecov[1] sig = e_pars[2] sig_err = ecov[2] en_amp_fit = e_pars[0] en_const_fit = e_pars[3] fwhm = sig * 2.355 print(f'chi square: {chi_2}') print(f'mean: {mean}') print(f'width: {sig}') print(f'amp: {en_amp_fit}') print(f'C: {en_const_fit}') print(f'FWHM: {fwhm} \n{(fwhm/mean)*100}%') cut_3sig = f'({mean-3*sig} <= {energy_par} <= {mean+3*sig})' counts_peak = len(df.query(cut_3sig).copy()) err_peak = np.sqrt(counts_peak) print(f'peak counts: {counts_peak}') print(f'error: {err_peak}') if plot == True: fig, ax = plt.subplots() plt.plot(ebins[1:], cage_utils.gauss_fit_func(ebins[1:], *e_pars), c='r', lw=0.8, label='gaussian fit') plt.plot(ebins[1:], ehist, ds='steps', c='b', lw=1.) plt.axvline(mean - 3 * sig, c='g', lw=1, label='Peak region (3 sigma)') plt.axvline(mean + 3 * sig, c='g', lw=1) plt.xlabel('Energy (keV)', fontsize=14) plt.ylabel('counts', fontsize=14) plt.title(f'60 keV peak with gaussian fit', fontsize=14) plt.setp(ax.get_xticklabels(), fontsize=12) plt.setp(ax.get_yticklabels(), fontsize=12) ax.text( 0.03, 0.8, f'r = {radius} mm \ntheta = {angle_det} deg \nruntime {rt_min:.2f}', verticalalignment='bottom', horizontalalignment='left', transform=ax.transAxes, color='black', fontsize=10, bbox={ 'facecolor': 'white', 'alpha': 0.8, 'pad': 8 }) ax.text( 0.95, 0.8, f'mean: {mean:.2f} \nsigma: {sig:.3f} \nchi square: {chi_2:.2f}', verticalalignment='bottom', horizontalalignment='right', transform=ax.transAxes, color='black', fontsize=10, bbox={ 'facecolor': 'white', 'alpha': 0.8, 'pad': 8 }) plt.legend(loc='center right') plt.tight_layout() plt.savefig(f'./plots/{campaign}60keV_analysis/run{run}_fit_60keV.png', dpi=200) plt.clf() plt.close() if bkg_sub == True: bkg_left_min = mean - 7. * sig bkg_left_max = mean - 4 * sig bkg_right_min = mean + 4 * sig bkg_right_max = mean + 7. * sig bkg_left = f'({bkg_left_min} <= {energy_par} < {bkg_left_max})' bkg_right = f'({bkg_right_min} < {energy_par} <= {bkg_right_max})' bkg = f'{bkg_left} or {bkg_right}' left_counts = len(df.query(bkg_left).copy()) right_counts = len(df.query(bkg_right).copy()) total_bkg = left_counts + right_counts err_bkg = np.sqrt(total_bkg) bkg_sub_counts = counts_peak - total_bkg err = np.sqrt(counts_peak + total_bkg) print(f'peak counts: {counts_peak}') print(f'bkg left: {left_counts}') print(f'bkg right: {right_counts}') print(f'total bkg: {total_bkg}') print(f'bkg_subtracted counts: {bkg_sub_counts}') print(f'error: {err}') print(f'{(err/bkg_sub_counts)*100:.3f}%') if plot == True: fig, ax = plt.subplots() full_hist, full_bins, full_evars = pgh.get_hist( df[{energy_par}], bins=bins, range=[mean - 9. * sig, mean + 9. * sig]) plt.plot(full_bins[1:], full_hist, ds='steps', c='b', lw=1) # plt.axvline(mean-3*sig, c='g', lw=1, label ='Peak region') # plt.axvline(mean+3*sig, c='g', lw=1) ax.axvspan(mean - 3 * sig, mean + 3 * sig, alpha=0.1, color='g', label='peak region (3 sigma)') # plt.axvline(bkg_left_min, c='r', lw=1, label='Background region') # plt.axvline(bkg_left_max, c='r', lw=1) # plt.axvline(bkg_right_min, c='r', lw=1) # plt.axvline(bkg_right_max, c='r', lw=1) ax.axvspan(bkg_left_min, bkg_left_max, alpha=0.2, color='r', label='background region (3 sigma)') ax.axvspan(bkg_right_min, bkg_right_max, alpha=0.2, color='r') plt.title('60 keV peak with background subtraction region', fontsize=14) plt.xlabel(f'{energy_par} (keV)', fontsize=14) plt.ylabel('counts', fontsize=14) plt.setp(ax.get_xticklabels(), fontsize=12) plt.setp(ax.get_yticklabels(), fontsize=12) ax.text( 0.03, 0.8, f'r = {radius} mm \ntheta = {angle_det} deg \nruntime {rt_min:.2f}', verticalalignment='bottom', horizontalalignment='left', transform=ax.transAxes, color='black', fontsize=10, bbox={ 'facecolor': 'white', 'alpha': 0.8, 'pad': 8 }) plt.legend(loc='upper right') plt.tight_layout() plt.savefig( f'./plots/{campaign}60keV_analysis/run{run}_bkgRegion_60keV.png', dpi=200) plt.clf() plt.close() # For Joule's 60keV analysis. Generally don't do this if writeParams == True: param_keys = [ 'mean_60', 'sig_60', 'chiSquare_fit_60', 'cut_60_3sig', 'bkg_60_left', 'bkg_60_right', 'bkg_60' ] param_list = [mean, sig, chi_2, cut_3sig, bkg_left, bkg_right, bkg] for key, cut in zip(param_keys, param_list): cage_utils.writeJson('./analysis_60keV.json', run, key, cut) return (bkg_sub_counts, err) else: return (counts_peak, err_peak)
def data_cleaning(): """ using parameters in the hit file, plot 1d and 2d spectra to find cut values. columns in file: ['trapE', 'bl', 'bl_sig', 'A_10', 'AoE', 'packet_id', 'ievt', 'energy', 'energy_first', 'timestamp', 'crate', 'card', 'channel', 'energy_cal', 'trapE_cal'] note, 'energy_first' from first value of energy gate. """ i_plot = 3 # run all plots after this number f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5' tb_name = 'ORSIS3302DecoderForEnergy/raw' hit_store = lh5.Store() data = hit_store.read_object(tb_name, f_hit) df_hit = data.get_dataframe() # get info about df -- 'describe' is very convenient dsc = df_hit[['bl', 'bl_sig', 'A_10', 'energy_first', 'timestamp']].describe() # print(dsc) # print(dsc.loc['min','bl']) # correct energy_first (inplace) to allow negative values df_hit['energy_first'] = df_hit['energy_first'].astype(np.int64) efirst = df_hit['energy_first'].values idx = np.where(efirst > 4e9) eshift = efirst[idx] - 4294967295 efirst[idx] = eshift # print(df_hit[['energy','energy_first','bl']]) if i_plot <= 0: # bl vs energy elo, ehi, epb = 0, 250, 1 blo, bhi, bpb = 54700, 61400, 100 nbx = int((ehi - elo) / epb) nby = int((bhi - blo) / bpb) h = plt.hist2d(df_hit['trapE_cal'], df_hit['bl'], bins=[nbx, nby], range=[[elo, ehi], [blo, bhi]], cmap='jet') cb = plt.colorbar(h[3], ax=plt.gca()) plt.xlabel('trapE_cal', ha='right', x=1) plt.ylabel('bl', ha='right', y=1) plt.tight_layout() # plt.show() plt.savefig('./plots/bl_vs_e.png', dpi=300) cb.remove() plt.cla() # make a formal baseline cut from 1d histogram hE, bins, vE = pgh.get_hist(df_hit['bl'], range=(blo, bhi), dx=bpb) xE = bins[1:] plt.semilogy(xE, hE, c='b', ds='steps') bl_cut_lo, bl_cut_hi = 57700, 58500 plt.axvline(bl_cut_lo, c='r', lw=1) plt.axvline(bl_cut_hi, c='r', lw=1) plt.xlabel('bl', ha='right', x=1) plt.ylabel('counts', ha='right', y=1) # plt.show() plt.savefig('./plots/bl_cut.pdf') plt.cla() if i_plot <= 1: # energy_first vs. E flo, fhi, fpb = -565534, 70000, 1000 elo, ehi, epb = 0, 250, 1 nbx = int((ehi - elo) / epb) nby = int((fhi - flo) / fpb) h = plt.hist2d(df_hit['trapE_cal'], df_hit['energy_first'], bins=[nbx, nby], range=[[elo, ehi], [flo, fhi]], cmap='jet', norm=LogNorm()) cb = plt.colorbar(h[3], ax=plt.gca()) plt.xlabel('trapE_cal', ha='right', x=1) plt.ylabel('energy_first', ha='right', y=1) plt.tight_layout() # plt.show() plt.savefig('./plots/efirst_vs_e.png', dpi=300) cb.remove() plt.cla() # make a formal baseline cut from 1d histogram flo, fhi, fpb = -20000, 20000, 100 hE, xE, vE = pgh.get_hist(df_hit['energy_first'], range=(flo, fhi), dx=fpb) xE = xE[1:] plt.semilogy(xE, hE, c='b', ds='steps') ef_cut_lo, ef_cut_hi = -5000, 4000 plt.axvline(ef_cut_lo, c='r', lw=1) plt.axvline(ef_cut_hi, c='r', lw=1) plt.xlabel('energy_first', ha='right', x=1) plt.ylabel('counts', ha='right', y=1) # plt.show() plt.savefig('./plots/efirst_cut.pdf') plt.cla() if i_plot <= 3: # trapE_cal - energy_cal vs trapE_cal # use baseline cut df_cut = df_hit.query('bl > 57700 and bl < 58500').copy() # add new diffE column df_cut['diffE'] = df_cut['trapE_cal'] - df_cut['energy_cal'] elo, ehi, epb = 0, 3000, 1 dlo, dhi, dpb = -10, 10, 0.1 nbx = int((ehi - elo) / epb) nby = int((dhi - dlo) / dpb) h = plt.hist2d(df_cut['trapE_cal'], df_cut['diffE'], bins=[nbx, nby], range=[[elo, ehi], [dlo, dhi]], cmap='jet', norm=LogNorm()) plt.xlabel('trapE_cal', ha='right', x=1) plt.ylabel('diffE (trap-onbd)', ha='right', y=1) plt.tight_layout() # plt.show() plt.savefig('./plots/diffE.png', dpi=300) plt.cla() if i_plot <= 4: # A_10/trapE_cal vs trapE_cal (A/E vs E) # i doubt we want to introduce a pulse shape cut at this point, # since i'm tuning on bkg data and we don't know a priori what (if any) # features the Kr waveforms will have. also, the efficiency as a # function of energy would have to be determined, which is hard. # so this is just for fun. # use baseline cut df_cut = df_hit.query('bl > 57700 and bl < 58500').copy() # add new A/E column df_cut['aoe'] = df_cut['A_10'] / df_cut['trapE_cal'] # alo, ahi, apb = -1300, 350, 1 # elo, ehi, epb = 0, 250, 1 alo, ahi, apb = -0.5, 5, 0.05 elo, ehi, epb = 0, 50, 0.2 nbx = int((ehi - elo) / epb) nby = int((ahi - alo) / apb) h = plt.hist2d(df_cut['trapE_cal'], df_cut['aoe'], bins=[nbx, nby], range=[[elo, ehi], [alo, ahi]], cmap='jet', norm=LogNorm()) plt.xlabel('trapE_cal', ha='right', x=1) plt.ylabel('A/E', ha='right', y=1) plt.tight_layout() # plt.show() plt.savefig('./plots/aoe_vs_e_lowe.png', dpi=300) plt.cla() if i_plot <= 5: # show effect of cuts on energy spectrum # baseline cut and efirst cut are very similar df_cut = df_hit.query('bl > 57700 and bl < 58500') # df_cut = df_hit.query('energy_first > -5000 and energy_first < 4000') etype = 'trapE_cal' elo, ehi, epb = 0, 250, 0.5 # no cuts h1, x1, v1 = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb) x1 = x1[1:] plt.plot(x1, h1, c='k', lw=1, ds='steps', label='raw') # baseline cut h2, x2, v2 = pgh.get_hist(df_cut[etype], range=(elo, ehi), dx=epb) plt.plot(x1, h2, c='b', lw=1, ds='steps', label='bl cut') plt.xlabel(etype, ha='right', x=1) plt.ylabel('counts', ha='right', y=1) plt.legend() # plt.show() plt.savefig('./plots/cut_spectrum.pdf') plt.cla()
def get_resolution(): """ """ # load hit file f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5' tb_name = 'ORSIS3302DecoderForEnergy/raw' sto = lh5.Store() groups = sto.ls(f_hit) data = sto.read_object(tb_name, f_hit) df_hit = data.get_dataframe() # load parameters e_peak = 1460.8 etype = 'trapE_cal' # etype = 'energy_cal' elo, ehi, epb = 1445, 1475, 0.2 # get histogram hE, bins, vE = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb) xE = bins[1:] # simple numerical width i_max = np.argmax(hE) h_max = hE[i_max] upr_half = xE[(xE > xE[i_max]) & (hE <= h_max / 2)][0] bot_half = xE[(xE < xE[i_max]) & (hE >= h_max / 2)][0] fwhm = upr_half - bot_half sig = fwhm / 2.355 # # fit to gaussian: amp, mu, sig, bkg # amp = h_max * fwhm # bg0 = np.mean(hE[:20]) # x0 = [amp, xE[i_max], sig, bg0] # xF, xF_cov = pgf.fit_hist(pgf.gauss_bkg, hE, bins, var=vE, guess=x0) # fit_func = pgf.gauss_bkg # fit to radford peak: mu, sigma, hstep, htail, tau, bg0, amp amp = h_max * fwhm hstep = 0.001 # fraction that the step contributes htail = 0.1 tau = 10 bg0 = np.mean(hE[:20]) x0 = [xE[i_max], sig, hstep, htail, tau, bg0, amp] xF, xF_cov = pgf.fit_hist(pgf.radford_peak, hE, bins, var=vE, guess=x0) fit_func = pgf.radford_peak xF_err = np.sqrt(np.diag(xF_cov)) chisq = [] for i, h in enumerate(hE): model = fit_func(xE[i], *xF) diff = (model - h)**2 / model chisq.append(abs(diff)) # collect results (for output, should use a dict or DataFrame) e_fit = xF[0] fwhm_fit = xF[1] * 2.355 # * e_peak / e_fit print(fwhm, fwhm_fit) fwhmerr = xF_err[1] * 2.355 * e_peak / e_fit rchisq = sum(np.array(chisq) / len(hE)) # plotting plt.plot(xE, hE, ds='steps', c='b', lw=2, label=etype) # peak shape plt.plot(xE, fit_func(xE, *x0), '-', c='orange', alpha=0.5, label='init. guess') plt.plot(xE, fit_func(xE, *xF), '-r', alpha=0.8, label='peakshape fit') plt.plot(np.nan, np.nan, '-w', label=f'mu={e_fit:.1f}, fwhm={fwhm_fit:.2f}') plt.xlabel(etype, ha='right', x=1) plt.ylabel('Counts', ha='right', y=1) plt.legend(loc=2) plt.tight_layout() # plt.show() plt.savefig(f'./plots/resolution_1460_{etype}.pdf') plt.cla()
def peakfit(df_group, config, db_ecal): """ Example: $ ./energy_cal.py -q 'run==117' -pf [-pi 002 : use peakinput] [-p : show plot] """ # choose the mode of peakdet to look up constants from if 'input_id' in config.keys(): pol = config['pol'][0] print(' Using 1st-pass constants from peakdet_input') input_peaks = True else: print(' Using 1st-pass constants from peakdet_auto') input_peaks = False pol = 1 # and p0==0 always run = int(df_group.run.iloc[0]) cyclo, cychi = df_group.cycle.iloc[0], df_group.cycle.iloc[-1] gb_run = df_group['run'].unique() if len(gb_run) > 1: print("Multi-run queries aren't supported yet, sorry!") exit() # load data and compute runtime dsp_list = config['lh5_dir'] + df_group['dsp_path'] + '/' + df_group[ 'dsp_file'] raw_data = lh5.load_nda(dsp_list, config['rawe'], config['input_table'], verbose=False) runtime_min = df_group['runtime'].sum() print(f' Runtime: {runtime_min:.1f} min. Calibrating:', [f'{et}:{len(ev)} events' for et, ev in raw_data.items()]) print(f' Fitting to:', config['fit_func']) # get list of peaks to look for epeaks = config['expected_peaks'] + config['test_peaks'] epeaks = np.array(sorted(epeaks)) # loop over energy estimators of interest pf_results = {} for et in config['rawe']: # load first-guess calibration constants from tables in the ecalDB # convention for p_i : p0 + p1 * x + p2 * x**2 + ... tb_name = f'peakinp_{et}' if input_peaks else f'peakdet_{et}' db_table = db_ecal.table(tb_name).all() df_cal = pd.DataFrame(db_table) if len(df_cal) == 0: print("Error, couldn't load cal constants for table:", tb_name) print("Try running: ./energy_cal.py -q '[query]' -s", tb_name) exit() que = f'run=={run} and cyclo=={cyclo} and cychi=={cychi}' p1cal = df_cal.query(que) if len(p1cal) != 1: print( f"Can't load a unique set of cal constants!\n Full cal DF, '{tb_name}':" ) print(df_cal) print('Result of query:', que) print(p1cal) exit() cal_pars_init = [p1cal[f'pol{p}'].iloc[0] for p in range(pol, -1, -1)] # p2, p1, p0 cal_pars_init[-1] = 45 # deleteme # NOTE: polyfit reverses the coefficients, putting highest order first cp = [f'p{i} {cp:.4e} ' for i, cp in enumerate(cal_pars_init[::-1])] print(f' First pass inputs:', ' '.join(cp)) # 1. use the first-guess constants to compute the expected mu_raw locations. # 2. run the peak fit on the raw peaks, compute new constants # 3. run the peak fit on the calibrated peaks, compute final constants f1 = fit_peaks(epeaks, cal_pars_init, raw_data[et], runtime_min, ff_name=config['fit_func'], show_plot=False, batch=config['batch_mode']) df1 = pd.DataFrame(f1).T # # xv - uncal, yval - calib. pfit, pcov = np.polyfit(df1['mu_raw'], df1['epk'], config['pol'][0], cov=True) # perr = np.sqrt(np.diag(pcov)) print("pass 1", pfit) # # print(perr) f2 = fit_peaks(df1['mu_raw'], [0, 1, 0], raw_data[et], runtime_min, range=config['init_vals'][et]['raw_range'], ff_name=config['fit_func'], show_plot=True, batch=config['batch_mode']) df2 = pd.DataFrame(f2).T pfit, pcov = np.polyfit(df2['mu'], df2['epk'], config['pol'][0], cov=True) print("pass 2", pfit) exit() # compute the difference between lit and measured values pfunc = np.poly1d(cpar) cal_data = pfunc(raw_data[et]) cal_peaks = pfunc(df_fits['mu_raw']) df_fits['residual'] = df_fits['epk'] - df_fits['mu'] res_uncertainty = df_fits['mu_err'] cp = [f'p{i} {cp:.4e} ' for i, cp in enumerate(cpar[::-1])] print(f' Peakfit outputs:', ' '.join(cp)) print(df_fits) exit() # TODO: save this output to a SEPARATE output file (don't muck up pf_results, # which is intended to be just for the constants p0, p1, p2 ... etc. # print(df_fits) # fit fwhm vs. energy # FWHM(E) = sqrt(A_noise^2 + A_fano^2 * E + A_qcol^2 E^2) # Ref: Eq. 3 of https://arxiv.org/abs/1902.02299 # TODO: fix error handling def sqrt_fwhm(x, a_n, a_f, a_c): return np.sqrt(a_n**2 + a_f**2 * x + a_c**2 * x**2) p_guess = [0.3, 0.05, 0.001] p_fit, p_cov = curve_fit( sqrt_fwhm, df_fits['mu'], df_fits['fwhm'], p0=p_guess) #, sigma = np.sqrt(h), absolute_sigma=True) p_err = np.sqrt(np.diag(p_cov)) # show a split figure with calibrated spectrum + used peaks on top, # and calib.function and resolution vs. energy on bottom if config['show_plot']: fig = plt.figure(figsize=(8, 8)) p0 = plt.subplot(2, 1, 1) # calibrated spectrum p1 = plt.subplot(2, 2, 3) # resolution vs energy p2 = plt.subplot(2, 2, 4) # fit_mu vs energy # 0. show calibrated spectrum with gamma lines # get histogram (cts / keV / d) xlo, xhi, xpb = config['cal_range'] hist, bins, _ = pgh.get_hist(cal_data, range=(xlo, xhi), dx=xpb) hist_norm = np.divide(hist, runtime_min * 60 * xpb) # show peaks cmap = plt.cm.get_cmap('brg', len(df_fits) + 1) for i, row in df_fits.iterrows(): # get a pretty label for the isotope lbl = config['pks'][str(row['epk'])] iso = ''.join(r for r in re.findall('[0-9]+', lbl)) ele = ''.join(r for r in re.findall('[a-z]', lbl, re.I)) pk_lbl = r'$^{%s}$%s' % (iso, ele) pk_diff = row['epk'] - row['mu'] p0.axvline(row['epk'], ls='--', c=cmap(i), lw=1, label=f"{pk_lbl} : {row['epk']} + {pk_diff:.3f}") p0.semilogy(bins[1:], hist_norm, ds='steps', c='b', lw=1) p0.set_ylim(1e-4) p0.set_xlabel('Energy (keV)', ha='right', x=1) p0.set_ylabel('cts / s / keV', ha='right', y=1) p0.legend(loc=3, fontsize=11) # 1. resolution vs. energy # TODO: add fwhm errorbar x_fit = np.arange(xlo, xhi, xpb) y_init = sqrt_fwhm(x_fit, *p_guess) # p1.plot(x_fit, y_init, '-', lw=1, c='orange', label='guess') y_fit = sqrt_fwhm(x_fit, *p_fit) a_n, a_f, a_c = p_fit fit_label = r'$\sqrt{(%.2f)^2 + (%.3f)^2 E + (%.4f)^2 E^2}$' % ( a_n, a_f, a_c) p1.plot(x_fit, y_fit, '-r', lw=1, label=f'fit: {fit_label}') p1.errorbar( df_fits['mu'], df_fits['fwhm'], yerr=df_fits.fwhm_err, marker='.', mfc='b', ls='none', ) p1.set_xlabel('Energy (keV)', ha='right', x=1) p1.set_ylabel('FWHM (keV)', ha='right', y=1) p1.legend(fontsize=11) # 2. fit_mu vs. energy p2.errorbar(df_fits.epk, df_fits.epk - df_fits.mu, yerr=df_fits.sig, marker='.', mfc='b', ls='none', label=r'$E_{true}$ - $E_{fit}$') p2.set_xlabel('Energy (keV)', ha='right', x=1) p2.set_ylabel('Residual (keV)', ha='right', y=1) p2.legend(fontsize=13) if config['batch_mode']: plt.savefig( f'./plots/energy_cal/peakfit_{et}_run{run}_clo{cyclo}_chi{cychi}.pdf' ) else: plt.show() plt.close('all') # fill in the peakfit results and return # cycle range pf_results[f'{et}_cyclo'] = cyclo pf_results[f'{et}_cychi'] = cychi # energy calibration constants for i, p in enumerate(cpar[::-1]): # remember to flip the coeffs! pf_results[f'{et}_cal{i}'] = p # uncertainties in cal constants for i, pe in enumerate(cerr[::-1]): pf_results[f'{et}_unc{i}'] = pe # resolution curve parameters pf_results[f'{et}_Anoise'] = p_fit[0] pf_results[f'{et}_Afano'] = p_fit[1] pf_results[f'{et}_Aqcol'] = p_fit[2] pf_results[f'{et}_runtime'] = runtime_min return pd.Series(pf_results)
def peakCounts(df, energy_par='trapEftp_cal', bins=50, erange=[], bkg_sub=True, writeParams=False): """ Get the number of counts in a peak. Can be sideband-subtracted or raw. Recommend getting pgfenergy_hist, pgfebins, evars using pgh.get_hist() """ if len(erange) < 2: print('Must specify an energy range for the fit!') exit() # First use gauss_mode_width_max to use for initial guesses in fit_hist ehist, ebins, evars = pgh.get_hist(df[energy_par], bins=bins, range=erange) pars, cov = pgf.gauss_mode_width_max(ehist, ebins, evars) mode = pars[0] width = pars[1] amp = pars[2] print('Guess: {pars}') # print(f'mode: {mode}') # print(f'width: {width}') # print(f'amp: {amp}') e_pars, ecov = pgf.fit_hist(gauss_fit_func, ehist, ebins, evars, guess=(amp, mode, width, 1)) chi_2 = pgf.goodness_of_fit(ehist, ebins, gauss_fit_func, e_pars) mean = e_pars[1] mean_err = ecov[1] sig = e_pars[2] sig_err = ecov[2] en_amp_fit = e_pars[0] en_const_fit = e_pars[3] fwhm = sig * 2.355 print(f'chi square: {chi_2}') print(f'mean: {mean}') print(f'width: {sig}') print(f'amp: {en_amp_fit}') print(f'C: {en_const_fit}') print(f'FWHM: {fwhm} \n{(fwhm/mean)*100}%') cut_3sig = f'({mean-3*sig} <= {energy_par} <= {mean+3*sig})' counts_peak = len(df.query(cut_3sig).copy()) err_peak = np.sqrt(counts_peak) print(f'peak counts: {counts_peak}') print(f'error: {err_peak}') if bkg_sub == True: bkg_left_min = mean - 7. * sig bkg_left_max = mean - 4 * sig bkg_right_min = mean + 4 * sig bkg_right_max = mean + 7. * sig bkg_left = f'({bkg_left_min} <= {energy_par} < {bkg_left_max})' bkg_right = f'({bkg_right_min} < {energy_par} <= {bkg_right_max})' bkg = f'{bkg_left} or {bkg_right}' left_counts = len(df.query(bkg_left).copy()) right_counts = len(df.query(bkg_right).copy()) total_bkg = left_counts + right_counts err_bkg = np.sqrt(total_bkg) bkg_sub_counts = counts_peak - total_bkg err = np.sqrt(counts_peak + total_bkg) print(f'peak counts: {counts_peak}') print(f'bkg left: {left_counts}') print(f'bkg right: {right_counts}') print(f'total bkg: {total_bkg}') print(f'bkg_subtracted counts: {bkg_sub_counts}') print(f'error: {err}') print(f'{(err/bkg_sub_counts)*100:.3f}%') return (bkg_sub_counts, err) else: return (counts_peak, err_peak)
f = h5py.File(filename, 'r') print("File info: ", f.keys()) chn = 0 for ged in f.keys(): try: dset = f[ged]['raw'] print("key: ", ged, "Data info: ", dset.keys()) except: conf = f[ged] print("Header info: ", conf.keys()) energies = dset['energy'][()] #energies = f[ged]['raw/energy'][()] #energies = dset['energy'][dset['channel'][()]==chn] maxe = np.amax(energies) h, b, v = pgh.get_hist(energies, bins=3500, range=(maxe / 4, maxe)) pgh.plot_hist(h, b, label=ged) #bin_max = b[np.where(h == h.max())][0] #print("chn %d Raw energy max %d, histogram max %d at %d " % (chn,maxe,h.max(),bin_max )) #min_ene = int(bin_max*0.95) #max_ene = int(bin_max*1.05) #hist, bins, var = pgh.get_hist(energies, bins=500, range=(min_ene, max_ene)) #pgh.plot_hist(hist, bins, label="chn %d" % chn ) #chn = chn + 1 plt.xlabel("uncalibrated energy", ha='right', x=1) plt.ylabel("counts", ha='right', y=1) plt.yscale('log') plt.legend() plt.show() #plt.savefig("./peak_chn%d.pdf" % chn )
def optimize_trap(rise_times, test=False): """ duplicate the plot from Figure 2.7 of Kris Vorren's thesis. need to fit the e_ftp peak to the HPGe peakshape function (same as in calibration.py) and plot the resulting FWHM^2 vs. the ramp time. """ out_dir = "~/Data/cage" opt_file = f"{out_dir}/cage_ds3_optimize.h5" print("input file:", opt_file) # match keys to settings; should maybe do this in prev function as attrs. with pd.HDFStore(opt_file, 'r') as store: keys = [key[1:] for key in store.keys()] # remove leading '/' settings = {keys[i] : rise_times[i] for i in range(len(keys))} # loop over the keys and fit each e_ftp spectrum to the peakshape function fwhms = {} for key, rt in settings.items(): t2df = pd.read_hdf(opt_file, key=key) # histogram spectrum near the uncalibrated peak -- have to be careful here xlo, xhi, xpb = 2550, 2660, 1 hE, xE, vE = ph.get_hist(t2df["e_ftp"], range=(xlo, xhi), dx=xpb, trim=False) # set initial guesses for the peakshape function. most are pretty rough mu = xE[np.argmax(hE)] sigma = 5 hstep = 0.001 htail = 0.5 tau = 10 bg0 = np.mean(hE[:20]) amp = np.sum(hE) x0 = [mu, sigma, hstep, htail, tau, bg0, amp] xF, xF_cov = pf.fit_hist(pf.radford_peak, hE, xE, var=vE, guess=x0) fwhms[key] = xF[1] * 2.355 if test: plt.cla() # peakshape function plt.plot(xE, pf.radford_peak(xE, *x0), c='orange', label='guess') plt.plot(xE, pf.radford_peak(xE, *xF), c='r', label='peakshape') plt.axvline(mu, c='g') # plot individual components # tail_hi, gaus, bg, step, tail_lo = pf.radford_peak(xE, *xF, components=True) # gaus = np.array(gaus) # step = np.array(step) # tail_lo = np.array(tail_lo) # plt.plot(xE, gaus * tail_hi, ls="--", lw=2, c='g', label="gaus+hi_tail") # plt.plot(xE, step + bg, ls='--', lw=2, c='m', label='step + bg') # plt.plot(xE, tail_lo, ls='--', lw=2, c='k', label='tail_lo') plt.plot(xE[1:], hE, ls='steps', lw=1, c='b', label="data") plt.plot(np.nan, np.nan, c='w', label=f"fwhm = {results['fwhm']:.2f} uncal.") plt.xlabel("Energy (uncal.)", ha='right', x=1) plt.ylabel("Counts", ha='right', y=1) plt.legend(loc=2) plt.show()
def peakdet_group(df_group, config): """ Access all files in this group, load energy histograms, and find the "first guess" linear calibration constant. Return the value, and a bool indicating success. """ # get file list and load energy data lh5_dir = os.path.expandvars(config['lh5_dir']) dsp_list = lh5_dir + df_group['dsp_path'] + '/' + df_group['dsp_file'] edata = lh5.load_nda(dsp_list, config['rawe'], config['input_table']) print('Found energy data:', [(et, len(ev)) for et, ev in edata.items()]) runtime_min = df_group['runtime'].sum() print(f'Runtime (min): {runtime_min:.2f}') # loop over energy estimators of interest pd_results = {} for et in config['rawe']: # get histogram, error, normalize by runtime, and derivative xlo, xhi, xpb = config['init_vals'][et]['raw_range'] hist, bins, var = pgh.get_hist(edata[et], range=(xlo, xhi), dx=xpb) hist_norm = np.divide(hist, runtime_min * 60) hist_err = np.array( [np.sqrt(hbin / (runtime_min * 60)) for hbin in hist]) # plt.plot(bins[1:], hist_norm, ds='steps') # plt.show() # hist_deriv = np.diff(hist_norm) # hist_deriv = np.insert(hist_deriv, 0, 0) # run peakdet pd_thresh = config['init_vals'][et]['peakdet_thresh'] lowe_cut = config['init_vals'][et]['lowe_cut'] ctr_bins = (bins[:-1] + bins[1:]) / 2. idx = np.where(ctr_bins > lowe_cut) maxes, mins = pgc.peakdet(hist_norm[idx], pd_thresh, ctr_bins[idx]) # maxes, mins = pgc.peakdet(hist_deriv[idx], pd_thresh, ctr_bins[idx]) if len(maxes) == 0: print('warning, no maxima! adjust peakdet threshold') # print(maxes) # x (energy) [:,0], y (counts) [:,1] # run peak matching exp_pks = config['expected_peaks'] tst_pks = config['test_peaks'] mode = config['match_mode'] etol = config['raw_ene_tol'] lin_cal, mp_success = match_peaks(maxes, exp_pks, tst_pks, mode, etol) if config['show_plot']: # plot uncalibrated and calibrated energy spectrum, w/ maxima fig, (p0, p1) = plt.subplots(2, 1, figsize=(8, 8)) idx = np.where(bins[1:] > lowe_cut) imaxes = [ np.where(np.isclose(ctr_bins, x[0]))[0][0] for x in maxes ] imaxes = np.asarray(imaxes) # energy, uncalibrated p0.plot(bins[imaxes], hist_norm[imaxes], '.m') p0.plot(bins[idx], hist_norm[idx], ds='steps', c='b', lw=1, label=et) p0.set_ylabel(f'cts/s, {xpb}/bin', ha='right', y=1) p0.set_xlabel(et, ha='right', x=1) # energy, with rough calibration bins_cal = bins[1:] * lin_cal p1.plot(bins_cal, hist_norm, ds='steps', c='b', lw=1, label=f'E = {lin_cal:.3f}*{et}') # compute best-guess location of all peaks, assuming rough calibration cal_maxes = lin_cal * maxes[:, 0] all_pks = np.concatenate((exp_pks, tst_pks)) raw_guesses = [] for pk in all_pks: imatch = np.isclose(cal_maxes, pk, atol=config['mp_tol']) if imatch.any(): # print(pk, cal_maxes[imatch], maxes[:,0][imatch]) raw_guesses.append([pk, maxes[:, 0][imatch][0]]) rg = np.asarray(raw_guesses) rg = rg[rg[:, 0].argsort()] # sort by energy cmap = plt.cm.get_cmap('jet', len(rg)) for i, epk in enumerate(rg): idx_nearest = (np.abs(bins_cal - epk[0])).argmin() cts_nearest = hist_norm[idx_nearest] p1.plot(epk[0], cts_nearest, '.r', c=cmap(i), label=f'{epk[0]:.1f} keV') p1.set_xlabel(f'{et}, pass-1 cal', ha='right', x=1) p1.set_ylabel(f'cts/s, {xpb} kev/bin', ha='right', y=1) p1.legend(fontsize=10) if config['batch_mode']: plt.savefig('./plots/peakdet_cal_{et}.pdf') else: plt.show() pd_results[f'{et}_lincal'] = lin_cal pd_results[f'{et}_lcpass'] = str(mp_success) return pd.Series(pd_results)
def peak(df, runDB, calDB, r, line, p=[1, 0], plotit=False): cal = 0.04998 # calDB["cal_pass1"]["1"]["p1cal"] meta_dir = os.path.expandvars(runDB["meta_dir"]) tier_dir = os.path.expandvars(runDB["tier2_dir"]) df['e_cal'] = p[0] * (cal * df['e_ftp']) + p[1] #h = df.hist('e_cal',bins=2000) #plt.yscale('log') df = df.loc[(df.index > 1000) & (df.index < 500000)] def gauss(x, mu, sigma, A=1): """ define a gaussian distribution, w/ args: mu, sigma, area (optional). """ return A * (1. / sigma / np.sqrt(2 * np.pi)) * np.exp(-(x - mu)**2 / (2. * sigma**2)) line_min = 0.995 * line line_max = 1.005 * line nbin = 60 res = 6.3e-4 * line + 0.85 # empirical energy resolution curve from experience hist, bins, var = pgh.get_hist(df['e_cal'], range=(line_min, line_max), dx=(line_max - line_min) / nbin) if plotit: pgh.plot_hist(hist, bins, var=hist, label="data", color='blue') pars, cov = pga.fit_hist(gauss, hist, bins, var=hist, guess=[line, res, 50]) pgu.print_fit_results(pars, cov, gauss) if plotit: pgu.plot_func(gauss, pars, label="chi2 fit", color='red') FWHM = '%.2f' % Decimal( pars[1] * 2. * np.sqrt(2. * np.log(2))) # convert sigma to FWHM FWHM_uncertainty = '%.2f' % Decimal( np.sqrt(cov[1][1]) * 2. * np.sqrt(2. * np.log(2))) peak = '%.2f' % Decimal(pars[0]) peak_uncertainty = '%.2f' % Decimal(np.sqrt(cov[0][0])) residual = '%.2f' % abs(line - float(peak)) if plotit: label_01 = 'Peak = ' + str(peak) + r' $\pm$ ' + str(peak_uncertainty) label_02 = 'FWHM = ' + str(FWHM) + r' $\pm$ ' + str(FWHM_uncertainty) labels = [ label_01, label_02, ] plt.xlim(line_min, line_max) plt.xlabel('Energy (keV)', ha='right', x=1.0) plt.ylabel('Counts', ha='right', y=1.0) plt.tight_layout() plt.hist(df['e_cal'], range=(line_min, line_max), bins=nbin) plt.legend(labels, frameon=False, loc='upper right', fontsize='small') plt.savefig(meta_dir + '/plots/lineFit_' + str(r) + '.png') return peak, FWHM
def peakfit_group(df_group, config, db_ecal): """ """ # get list of peaks to look for epeaks = config['expected_peaks'] + config['test_peaks'] epeaks = np.array(sorted(epeaks)) # right now a lookup by 'run' is hardcoded. # in principle the lookup should stay general using the gb_cols, # but it's kind of hard to see right now how to write the right db queries gb_run = df_group['run'].unique() if len(gb_run) > 1: print("Multi-run (or other) groupbys aren't supported yet, sorry") exit() # load data lh5_dir = os.path.expandvars(config['lh5_dir']) dsp_list = lh5_dir + df_group['dsp_path'] + '/' + df_group['dsp_file'] raw_data = lh5.load_nda(dsp_list, config['rawe'], config['input_table']) runtime_min = df_group['runtime'].sum() # loop over energy estimators of interest pf_results = {} for et in config['rawe']: # load first-guess calibration constant from its table in the DB db_table = db_ecal.table(f'peakdet_{et}').all() df_cal = pd.DataFrame(db_table) lin_cal = df_cal.loc[df_cal.run == str(gb_run[0])]['lincal'].values[0] cal_data = raw_data[et] * lin_cal # compute expected peak locations and widths (fit to Gaussians) fit_results = {} for ie, epk in enumerate(epeaks): # adjust the window. resolution goes as roughly sqrt(energy) window = np.sqrt(epk) * 0.5 xlo, xhi = epk - window / 2, epk + window / 2 nbins = int(window) * 5 xpb = (xhi - xlo) / nbins ibin_bkg = int(nbins * 0.2) # get histogram, error, normalize by runtime pk_data = cal_data[(cal_data >= xlo) & (cal_data <= xhi)] hist, bins, _ = pgh.get_hist(pk_data, range=(xlo, xhi), dx=xpb) hist_norm = np.divide(hist, runtime_min * 60) hist_var = np.array( [np.sqrt(h / (runtime_min * 60)) for h in hist]) # compute expected peak location and width (simple Gaussian) bkg0 = np.mean(hist_norm[:ibin_bkg]) b, h = bins[1:], hist_norm - bkg0 imax = np.argmax(h) upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h) / 2))][0] bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h) / 2))][-1] fwhm = upr_half - bot_half sig0 = fwhm / 2.355 amp0 = np.amax(h) * fwhm p_init = [amp0, bins[imax], sig0, bkg0] # a, mu, sigma, bkg p_fit, p_cov = pgf.fit_hist(pgf.gauss_bkg, hist_norm, bins, var=hist_var, guess=p_init) p_err = np.sqrt(np.diag(p_cov)) # diagnostic plot, don't delete if config['show_plot']: plt.axvline(bins[ibin_bkg], c='m', label='bkg region') xfit = np.arange(xlo, xhi, xpb * 0.1) plt.plot(xfit, pgf.gauss_bkg(xfit, *p_init), '-', c='orange', label='init') plt.plot(xfit, pgf.gauss_bkg(xfit, *p_fit), '-', c='red', label='fit') plt.plot(bins[1:], hist_norm, c='b', lw=1.5, ds='steps') plt.xlabel('pass-1 energy (kev)', ha='right', x=1) plt.legend(fontsize=12) plt.show() plt.close() # goodness of fit chisq = [] for i, h in enumerate(hist_norm): model = pgf.gauss_bkg(b[i], *p_fit) diff = (model - h)**2 / model chisq.append(abs(diff)) rchisq = sum(np.array(chisq) / len(hist_norm)) # fwhm_err = p_err[1] * 2.355 * e_peak / e_fit # collect interesting results for this row fit_results[ie] = { 'epk': epk, 'mu': p_fit[1], 'fwhm': p_fit[2] * 2.355, 'sig': p_fit[2], 'amp': p_fit[0], 'bkg': p_fit[3], 'rchisq': rchisq, 'mu_raw': p_fit[1] / lin_cal, # <-- this is in terms of raw E 'mu_unc': p_err[1] / lin_cal } # ---------------------------------------------------------------------- # compute energy calibration by matrix inversion (thanks Tim and Jason!) view_cols = ['epk', 'mu', 'fwhm', 'bkg', 'rchisq', 'mu_raw'] df_fits = pd.DataFrame(fit_results).T print(df_fits[view_cols]) true_peaks = df_fits['epk'] raw_peaks, raw_error = df_fits['mu_raw'], df_fits['mu_unc'] error = raw_error / raw_peaks * true_peaks cov = np.diag(error**2) weights = np.diag(1 / error**2) degree = config['pol_order'] raw_peaks_matrix = np.zeros((len(raw_peaks), degree + 1)) for i, pk in enumerate(raw_peaks): temp_degree = degree row = np.array([]) while temp_degree >= 0: row = np.append(row, pk**temp_degree) temp_degree -= 1 raw_peaks_matrix[i] += row print(raw_peaks_matrix) # perform matrix inversion xTWX = np.dot(np.dot(raw_peaks_matrix.T, weights), raw_peaks_matrix) xTWY = np.dot(np.dot(raw_peaks_matrix.T, weights), true_peaks) if np.linalg.det(xTWX) == 0: print("singular matrix, determinant is 0, can't get cal constants") exit() xTWX_inv = np.linalg.inv(xTWX) # get polynomial coefficients and error cal_pars = np.dot(xTWX_inv, xTWY) cal_errs = np.sqrt(np.diag(xTWX_inv)) n = len(cal_pars) print(f'Fit:', ' '.join([f'p{i}:{cal_pars[i]:.4e}' for i in range(n)])) print(f'Unc:', ' '.join([f'p{i}:{cal_errs[i]:.4e}' for i in range(n)])) # ---------------------------------------------------------------------- # repeat the peak fit with the calibrated energy (affects widths) # compute calibrated energy pol = np.poly1d(cal_pars) # handy numpy polynomial object cal_data = pol(raw_data[et]) fit_results = {} for ie, epk in enumerate(epeaks): # adjust the window. resolution goes as roughly sqrt(energy) window = np.sqrt(epk) * 0.5 xlo, xhi = epk - window / 2, epk + window / 2 nbins = int(window) * 5 xpb = (xhi - xlo) / nbins ibin_bkg = int(nbins * 0.2) # get histogram, error, normalize by runtime pk_data = cal_data[(cal_data >= xlo) & (cal_data <= xhi)] hist, bins, _ = pgh.get_hist(pk_data, range=(xlo, xhi), dx=xpb) hist_norm = np.divide(hist, runtime_min * 60) hist_var = np.array( [np.sqrt(h / (runtime_min * 60)) for h in hist]) # compute expected peak location and width (simple Gaussian) bkg0 = np.mean(hist_norm[:ibin_bkg]) b, h = bins[1:], hist_norm - bkg0 imax = np.argmax(h) upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h) / 2))][0] bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h) / 2))][-1] fwhm = upr_half - bot_half sig0 = fwhm / 2.355 amp0 = np.amax(h) * fwhm p_init = [amp0, bins[imax], sig0, bkg0] # a, mu, sigma, bkg p_fit, p_cov = pgf.fit_hist(pgf.gauss_bkg, hist_norm, bins, var=hist_var, guess=p_init) p_err = np.sqrt(np.diag(p_cov)) # save results fit_results[ie] = { 'epk': epk, 'mu': p_fit[1], 'fwhm': p_fit[2] * 2.355, 'sig': p_fit[2], 'amp': p_fit[0], 'bkg': p_fit[3], } # consolidate results again view_cols = ['epk', 'mu', 'fwhm', 'residual'] df_fits = pd.DataFrame(fit_results).T # compute the difference between lit and measured values cal_peaks = pol(raw_peaks) df_fits['residual'] = true_peaks - cal_peaks print(df_fits[view_cols]) # fit fwhm vs. energy # FWHM(E) = sqrt(A_noise^2 + A_fano^2 * E + A_qcol^2 E^2) # Ref: Eq. 3 of https://arxiv.org/abs/1902.02299 # TODO: fix error handling def sqrt_fwhm(x, a_n, a_f, a_c): return np.sqrt(a_n**2 + a_f**2 * x + a_c**2 * x**2) p_guess = [0.3, 0.05, 0.001] p_fit, p_cov = curve_fit( sqrt_fwhm, df_fits['mu'], df_fits['fwhm'], p0=p_guess) #, sigma = np.sqrt(h), absolute_sigma=True) p_err = np.sqrt(np.diag(p_cov)) if config['show_plot']: # show a split figure with calibrated spectrum + used peaks on top, # and calib.function and resolution vs. energy on bottom fig, (p0, p1) = plt.subplots(2, 1, figsize=(8, 8), sharex=True) # gridspec_kw={'height_ratios':[2, 1]})) # get histogram (cts / keV / d) xlo, xhi, xpb = config['cal_range'] hist, bins, _ = pgh.get_hist(cal_data, range=(xlo, xhi), dx=xpb) hist_norm = np.divide(hist, runtime_min * 60 * xpb) # show peaks cmap = plt.cm.get_cmap('brg', len(df_fits) + 1) for i, row in df_fits.iterrows(): # get a pretty label for the isotope lbl = config['pks'][str(row['epk'])] iso = ''.join(r for r in re.findall('[0-9]+', lbl)) ele = ''.join(r for r in re.findall('[a-z]', lbl, re.I)) pk_lbl = r'$^{%s}$%s' % (iso, ele) pk_diff = row['epk'] - row['mu'] p0.axvline(row['epk'], ls='--', c=cmap(i), lw=1, label=f"{pk_lbl} : {row['epk']} + {pk_diff:.3f}") p0.semilogy(bins[1:], hist_norm, ds='steps', c='b', lw=1) p0.set_ylabel('cts / s / keV', ha='right', y=1) p0.legend(loc=3, fontsize=11) # TODO: add fwhm errorbar x_fit = np.arange(xlo, xhi, xpb) y_init = sqrt_fwhm(x_fit, *p_guess) p1.plot(x_fit, y_init, '-', lw=1, c='orange', label='guess') y_fit = sqrt_fwhm(x_fit, *p_fit) a_n, a_f, a_c = p_fit fit_label = r'$\sqrt{(%.2f)^2 + (%.3f)^2 E + (%.4f)^2 E^2}$' % ( a_n, a_f, a_c) p1.plot(x_fit, y_fit, '-r', lw=1, label=f'fit: {fit_label}') p1.plot(df_fits['mu'], df_fits['fwhm'], '.b') p1.set_xlabel('Energy (keV)', ha='right', x=1) p1.set_ylabel('FWHM (keV)', ha='right', y=1) p1.legend(fontsize=11) if config['batch_mode']: plt.savefig('./plots/peakdet_test.png') else: plt.show() # the order of the polynomial should be in the table name pf_results[f'{et}_Anoise'] = p_fit[0] pf_results[f'{et}_Afano'] = p_fit[1] pf_results[f'{et}_Aqcol'] = p_fit[2] for i in range(len(cal_pars)): pf_results[f'{et}_cal{i}'] = cal_pars[i] for i in range(len(cal_pars)): pf_results[f'{et}_unc{i}'] = cal_errs[i] return pd.Series(pf_results)
""" Example code by Jason demonstrating some pygama convenience functions. """ import numpy as np import matplotlib.pyplot as plt import pygama.analysis.histograms as pgh import pygama.analysis.peak_fitting as pga np.random.seed(0) # fix the seed s/t we can reproduce the plot n = 10 data = np.random.normal(0, 1, n) hist, bins, var = pgh.get_hist(data, range=(-5, 5), dx=1) pgh.plot_hist(hist, bins, var, label="data") pars, cov = pga.fit_hist(pga.gauss, hist, bins, var=var, guess=[0, 1, n]) pgh.print_fit_results(pars, cov, ['mu', 'sig', 'A']) pgh.plot_func(pga.gauss, pars, label="chi2 fit") nbnd = (-np.inf, np.inf) pos = (0, np.inf) pars, cov = pga.fit_hist(pga.gauss, hist, bins, var=var, guess=[0, 1, n], bounds=[nbnd, pos, pos], poissonLL=True) pgh.print_fit_results(pars, cov, ['mu', 'sig', 'A']) pgh.plot_func(pga.gauss, pars, label="poissonLL fit")
def get_fwhm(f_grid, f_opt, efilter, verbose=False): """ this code fits the 2.6 MeV peak using the gauss+step function and writes new columns to the df_grid "fwhm", "fwhmerr" """ print("Grid file:",f_grid) print("DSP file:",f_opt) df_grid = pd.read_hdf(f_grid) f = h5py.File(f_opt,'r') for ged in f.keys(): print("Detector:",ged) data = f[ged]['data'] # declare some new columns for df_grid cols = [f"fwhm_{ged}", f"fwhmerr_{ged}", f"rchi2_{ged}"] for col in cols: df_grid[col] = np.nan for i, row in df_grid.iterrows(): try: energies = data[f"{efilter}_{i}"][()] mean = np.mean(energies) bins = 12000 hE, xE, vE = ph.get_hist(energies,bins,(mean/2,mean*2)) except: print("Energy not find in",ged,"and entry",i) # set histogram centered and symmetric on the peak try: mu = xE[np.argmax(hE)] imax = np.argmax(hE) hmax = hE[imax] idx = np.where(hE > hmax/2) # fwhm ilo, ihi = idx[0][0], idx[0][-1] sig = (xE[ihi] - xE[ilo]) / 2.355 idx = np.where(((xE-mu) > -8 * sig) & ((xE-mu) < 8 * sig)) idx0 = np.where(((xE-mu) > -4.5 * sig) & ((xE-mu) < 4.5 * sig)) ilo, ihi = idx[0][0], idx[0][-1] ilo0, ihi0 = idx0[0][0], idx0[0][-1] xE, hE, vE = xE[ilo:ihi+1], hE[ilo:ihi], vE[ilo:ihi] except: continue # set initial guesses for the peakshape function hstep = 0 tau = np.mean(hE[:10]) bg0 = 1 x0 = [hmax, mu, sig, bg0, hstep] try: xF, xF_cov = pf.fit_hist(pf.gauss_step, hE, xE, var=vE, guess=x0) xF_err = np.sqrt(np.diag(xF_cov)) # goodness of fit chisq = [] for j, h in enumerate(hE): model = pf.gauss_step(xE[j], *xF) diff = (model - h)**2 / model chisq.append(abs(diff)) # update the master dataframe fwhm = xF[2] * 2.355 * 2614.5 / mu fwhmerr = xF_err[2] * 2.355 * 2614.5 / mu rchi2 = sum(np.array(chisq) / len(hE)) df_grid.at[i, f"fwhm_{ged}"] = fwhm df_grid.at[i, f"fwhmerr_{ged}"] = fwhmerr df_grid.at[i, f"rchi2_{ged}"] = rchi2 print(fwhm,fwhmerr,rchi2) except: print("Fit not computed for detector",ged,"and entry",i) if verbose: plt.cla() plt.plot(xE, pf.gauss_step(xE, *xF), c='r', label='peakshape') gaus, step = pf.gauss_step(xE, *xF, components=True) gaus = np.array(gaus) step = np.array(step) plt.plot(xE, gaus, ls="--", lw=2, c='g', label="gaus") plt.plot(xE, step, ls='--', lw=2, c='m', label='step + bg') plt.plot(xE[1:], hE, lw=1, c='b', label=f"data {ged}") plt.xlabel(f"ADC channels", ha='right', x=1) plt.ylabel("Counts", ha='right', y=1) plt.legend(loc=2, fontsize=10,title=f"FWHM = {fwhm:.2f} $\pm$ {fwhmerr:.2f} keV") plt.show() # write the updated df_grid to the output file. if not verbose: df_grid.to_hdf(f_grid, key="pygama_optimization") if not verbose: print("Update grid file:",f_grid,"with detector",ged) print(df_grid)