def plot_fwhm(f_grid,f_opt,d_out,efilter, verbose=False): """ select the best energy resolution, plot best result fit and fwhm vs parameters """ print("Grid file:",f_grid) df_grid = pd.read_hdf(f_grid) f_res = f"{d_out}/{efilter}_results.h5" if 'trapE' in efilter: df = pd.DataFrame(columns=['ged','rise','flat','rc','fwhm','fwhmerr']) if efilter == 'zacE' or efilter == 'cuspE': df = pd.DataFrame(columns=['ged','sigma','flat','decay','fwhm','fwhmerr']) f = h5py.File(f_opt,'r') for chn, ged in enumerate(f.keys()): d_det = f"{d_out}/{ged}" try: os.mkdir(d_det) except: pass d_det = f"{d_det}/{efilter}" try: os.mkdir(d_det) except: pass data = f[ged]['data'] try: # find fwhm minimum values df_grid = df_grid.loc[(df_grid[f"rchi2_{ged}"]<100)&(df_grid[f"fwhm_{ged}"]>0)] minidx = df_grid[f'fwhm_{ged}'].idxmin() df_min = df_grid.loc[minidx] #plot best result fit energies = data[f"{efilter}_{minidx}"][()] mean = np.mean(energies) bins = 12000 hE, xE, vE = ph.get_hist(energies,bins,(mean/2,mean*2)) mu = xE[np.argmax(hE)] hmax = hE[np.argmax(hE)] idx = np.where(hE > hmax/2) ilo, ihi = idx[0][0], idx[0][-1] sig = (xE[ihi] - xE[ilo]) / 2.355 idx = np.where(((xE-mu) > -8 * sig) & ((xE-mu) < 8 * sig)) ilo, ihi = idx[0][0], idx[0][-1] xE, hE, vE = xE[ilo:ihi+1], hE[ilo:ihi], vE[ilo:ihi] x0 = [hmax, mu, sig, 1, 0] xF, xF_cov = pf.fit_hist(pf.gauss_step, hE, xE, var=vE, guess=x0) xF_err = np.sqrt(np.diag(xF_cov)) fwhm = xF[2] * 2.355 * 2614.5 / mu fwhmerr = xF_err[2] * 2.355 * 2614.5 / mu plt.plot(xE, pf.gauss_step(xE, *xF), c='r', label='peakshape') gaus, step = pf.gauss_step(xE, *xF, components=True) gaus = np.array(gaus) step = np.array(step) plt.plot(xE, gaus, ls="--", lw=2, c='g', label="gaus") plt.plot(xE, step, ls='--', lw=2, c='m', label='step + bg') plt.plot(xE[1:], hE, lw=1, c='b', label=f"data {ged}") plt.xlabel(f"ADC channels", ha='right', x=1) plt.ylabel("Counts", ha='right', y=1) plt.legend(loc=2, fontsize=10,title=f"FWHM = {fwhm:.2f} $\pm$ {fwhmerr:.2f} keV") plt.savefig(f"{d_det}/Fit_{ged}-{efilter}.pdf") plt.cla() except: print("FWHM minimum not find for detector",ged) continue if efilter=='zacE' or efilter=='cuspE': #try: sigma, flat, decay = df_min[:3] results = [ged, f'{sigma:.2f}', f'{flat:.2f}', f'{decay:.2f}', f'{fwhm:.2f}', f'{fwhmerr:.2f}'] # 1. vary the sigma cusp df_sigma = df_grid.loc[(df_grid.flat==flat)&(df_grid.decay==decay)&(df_grid.decay==decay)] x, y, err = df_sigma['sigma'], df_sigma[f'fwhm_{ged}'], df_sigma[f'fwhmerr_{ged}'] plt.errorbar(x,y,err,fmt='o') plt.xlabel("Sigma Cusp ($\mu$s)", ha='right', x=1) plt.ylabel(r"FWHM (keV)", ha='right', y=1) plt.savefig(f"{d_det}/FWHM_vs_Sigma_{ged}-{efilter}.pdf") plt.cla() # 2. vary the flat time df_flat = df_grid.loc[(df_grid.sigma==sigma)&(df_grid.decay==decay)] x, y, err = df_flat['flat'], df_flat[f'fwhm_{ged}'], df_flat[f'fwhmerr_{ged}'] plt.errorbar(x,y,err,fmt='o') plt.xlabel("Flat Top ($\mu$s)", ha='right', x=1) plt.ylabel("FWHM (keV)", ha='right', y=1) plt.savefig(f"{d_det}/FWHM_vs_Flat_{ged}-{efilter}.pdf") plt.cla() # 3. vary the rc constant df_decay = df_grid.loc[(df_grid.sigma==sigma)&(df_grid.flat==flat)] x, y, err = df_decay[f'decay'], df_decay[f'fwhm_{ged}'], df_decay[f'fwhmerr_{ged}'] plt.errorbar(x,y,err,fmt='o') plt.xlabel("Decay constant ($\mu$s)", ha='right', x=1) plt.ylabel(r"FWHM (keV)", ha='right', y=1) plt.savefig(f"{d_det}/FWHM_vs_Decay_{ged}-{efilter}.pdf") plt.cla() #except: #print("") if 'trapE' in efilter: rise, flat, rc = df_min[:3] results = [ged, f'{rise:.2f}', f'{flat:.2f}', f'{rc:.2f}', f'{fwhm:.2f}', f'{fwhmerr:.2f}'] # 1. vary the rise time df_rise = df_grid.loc[(df_grid.flat==flat)&(df_grid.rc==rc)] x, y, err = df_rise['rise'], df_rise[f'fwhm_{ged}'], df_rise[f'fwhmerr_{ged}'] #plt.plot(x,y,".b") plt.errorbar(x,y,err,fmt='o') plt.xlabel("Ramp time ($\mu$s)", ha='right', x=1) plt.ylabel(r"FWHM (kev)", ha='right', y=1) # plt.ylabel(r"FWHM", ha='right', y=1) plt.savefig(f"{d_det}/FWHM_vs_Rise_{ged}-{efilter}.pdf") plt.cla() # 2. vary the flat time df_flat = df_grid.loc[(df_grid.rise==rise)&(df_grid.rc==rc)] x, y, err = df_flat['flat'], df_flat[f'fwhm_{ged}'], df_flat[f'fwhmerr_{ged}'] #plt.plot(x,y,'.b') plt.errorbar(x,y,err,fmt='o') plt.xlabel("Flat time ($\mu$s)", ha='right', x=1) plt.ylabel("FWHM (keV)", ha='right', y=1) plt.savefig(f"{d_det}/FWHM_vs_Flat_{ged}-{efilter}.pdf") plt.cla() # 3. vary the rc constant df_rc = df_grid.loc[(df_grid.rise==rise)&(df_grid.flat==flat)] x, y, err = df_rc['rc'], df_rc[f'fwhm_{ged}'], df_rc[f'fwhmerr_{ged}'] #plt.plot(x,y,'.b') plt.errorbar(x,y,err,fmt='o') plt.xlabel("RC constant ($\mu$s)", ha='right', x=1) plt.ylabel(r"FWHM (keV)", ha='right', y=1) plt.savefig(f"{d_det}/FWHM_vs_RC_{ged}-{efilter}.pdf") plt.cla() df.loc[chn] = results print("Results file:",f_res) df.to_hdf(f_res, key='results',mode='w') print(df) dets = range(len(df['fwhm'])) fwhm = np.array([float(df['fwhm'][i]) for i in dets]) fwhm_err = np.array([float(df['fwhmerr'][i]) for i in dets]) plt.cla() plt.errorbar(dets,fwhm,fwhm_err,fmt='o',c='red',label=f'{efilter} filter') plt.xlabel("detector number", ha='right', x=1) plt.ylabel("FWHM (keV)", ha='right', y=1) plt.legend() plt.savefig(f"{d_out}/FWHM_{efilter}.pdf")
def run_dsp(dfrow): """ run dsp on the test file, editing the processor list alternate idea: generate a long list of processors with different names """ # adjust dsp config dictionary rise, flat = dfrow # dsp_config['processors']['wf_pz']['defaults']['db.pz.tau'] = f'{tau}*us' dsp_config['processors']['wf_trap']['args'][1] = f'{rise}*us' dsp_config['processors']['wf_trap']['args'][2] = f'{flat}*us' # pprint(dsp_config) # run dsp pc, tb_out = build_processing_chain(tb_data, dsp_config, verbosity=0) pc.execute() # analyze peak e_peak = 1460. etype = 'trapEmax' elo, ehi, epb = 4000, 4500, 3 # the peak moves around a bunch energy = tb_out[etype].nda # get histogram hE, bins, vE = pgh.get_hist(energy, range=(elo, ehi), dx=epb) xE = bins[1:] # should I center the max at 1460? # simple numerical width i_max = np.argmax(hE) h_max = hE[i_max] upr_half = xE[(xE > xE[i_max]) & (hE <= h_max / 2)][0] bot_half = xE[(xE < xE[i_max]) & (hE >= h_max / 2)][0] fwhm = upr_half - bot_half sig = fwhm / 2.355 # fit to gaussian: amp, mu, sig, bkg fit_func = pgf.gauss_bkg amp = h_max * fwhm bg0 = np.mean(hE[:20]) x0 = [amp, xE[i_max], sig, bg0] xF, xF_cov = pgf.fit_hist(fit_func, hE, bins, var=vE, guess=x0) # collect results e_fit = xF[0] xF_err = np.sqrt(np.diag(xF_cov)) e_err = xF fwhm_fit = xF[1] * 2.355 * 1460. / e_fit fwhm_err = xF_err[2] * 2.355 * 1460. / e_fit chisq = [] for i, h in enumerate(hE): model = fit_func(xE[i], *xF) diff = (model - h)**2 / model chisq.append(abs(diff)) rchisq = sum(np.array(chisq) / len(hE)) fwhm_ovr_mean = fwhm_fit / e_fit if show_movie: plt.plot(xE, hE, ds='steps', c='b', lw=2, label=f'{etype} {rise}--{flat}') # peak shape plt.plot(xE, fit_func(xE, *x0), '-', c='orange', alpha=0.5, label='init. guess') plt.plot(xE, fit_func(xE, *xF), '-r', alpha=0.8, label='peakshape fit') plt.plot(np.nan, np.nan, '-w', label=f'mu={e_fit:.1f}, fwhm={fwhm_fit:.2f}') plt.xlabel(etype, ha='right', x=1) plt.ylabel('Counts', ha='right', y=1) plt.legend(loc=2) # show a little movie plt.show(block=False) plt.pause(0.01) plt.cla() # return results return pd.Series({ 'e_fit': e_fit, 'fwhm_fit': fwhm_fit, 'rchisq': rchisq, 'fwhm_err': xF_err[0], 'fwhm_ovr_mean': fwhm_ovr_mean })
def peakfit_group(df_group, config, db_ecal): """ """ # get list of peaks to look for epeaks = config['expected_peaks'] + config['test_peaks'] epeaks = np.array(sorted(epeaks)) # right now a lookup by 'run' is hardcoded. # in principle the lookup should stay general using the gb_cols, # but it's kind of hard to see right now how to write the right db queries gb_run = df_group['run'].unique() if len(gb_run) > 1: print("Multi-run (or other) groupbys aren't supported yet, sorry") exit() # load data lh5_dir = os.path.expandvars(config['lh5_dir']) dsp_list = lh5_dir + df_group['dsp_path'] + '/' + df_group['dsp_file'] raw_data = lh5.load_nda(dsp_list, config['rawe'], config['input_table']) runtime_min = df_group['runtime'].sum() # loop over energy estimators of interest pf_results = {} for et in config['rawe']: # load first-guess calibration constant from its table in the DB db_table = db_ecal.table(f'peakdet_{et}').all() df_cal = pd.DataFrame(db_table) lin_cal = df_cal.loc[df_cal.run == str(gb_run[0])]['lincal'].values[0] cal_data = raw_data[et] * lin_cal # compute expected peak locations and widths (fit to Gaussians) fit_results = {} for ie, epk in enumerate(epeaks): # adjust the window. resolution goes as roughly sqrt(energy) window = np.sqrt(epk) * 0.5 xlo, xhi = epk - window / 2, epk + window / 2 nbins = int(window) * 5 xpb = (xhi - xlo) / nbins ibin_bkg = int(nbins * 0.2) # get histogram, error, normalize by runtime pk_data = cal_data[(cal_data >= xlo) & (cal_data <= xhi)] hist, bins, _ = pgh.get_hist(pk_data, range=(xlo, xhi), dx=xpb) hist_norm = np.divide(hist, runtime_min * 60) hist_var = np.array( [np.sqrt(h / (runtime_min * 60)) for h in hist]) # compute expected peak location and width (simple Gaussian) bkg0 = np.mean(hist_norm[:ibin_bkg]) b, h = bins[1:], hist_norm - bkg0 imax = np.argmax(h) upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h) / 2))][0] bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h) / 2))][-1] fwhm = upr_half - bot_half sig0 = fwhm / 2.355 amp0 = np.amax(h) * fwhm p_init = [amp0, bins[imax], sig0, bkg0] # a, mu, sigma, bkg p_fit, p_cov = pgf.fit_hist(pgf.gauss_bkg, hist_norm, bins, var=hist_var, guess=p_init) p_err = np.sqrt(np.diag(p_cov)) # diagnostic plot, don't delete if config['show_plot']: plt.axvline(bins[ibin_bkg], c='m', label='bkg region') xfit = np.arange(xlo, xhi, xpb * 0.1) plt.plot(xfit, pgf.gauss_bkg(xfit, *p_init), '-', c='orange', label='init') plt.plot(xfit, pgf.gauss_bkg(xfit, *p_fit), '-', c='red', label='fit') plt.plot(bins[1:], hist_norm, c='b', lw=1.5, ds='steps') plt.xlabel('pass-1 energy (kev)', ha='right', x=1) plt.legend(fontsize=12) plt.show() plt.close() # goodness of fit chisq = [] for i, h in enumerate(hist_norm): model = pgf.gauss_bkg(b[i], *p_fit) diff = (model - h)**2 / model chisq.append(abs(diff)) rchisq = sum(np.array(chisq) / len(hist_norm)) # fwhm_err = p_err[1] * 2.355 * e_peak / e_fit # collect interesting results for this row fit_results[ie] = { 'epk': epk, 'mu': p_fit[1], 'fwhm': p_fit[2] * 2.355, 'sig': p_fit[2], 'amp': p_fit[0], 'bkg': p_fit[3], 'rchisq': rchisq, 'mu_raw': p_fit[1] / lin_cal, # <-- this is in terms of raw E 'mu_unc': p_err[1] / lin_cal } # ---------------------------------------------------------------------- # compute energy calibration by matrix inversion (thanks Tim and Jason!) view_cols = ['epk', 'mu', 'fwhm', 'bkg', 'rchisq', 'mu_raw'] df_fits = pd.DataFrame(fit_results).T print(df_fits[view_cols]) true_peaks = df_fits['epk'] raw_peaks, raw_error = df_fits['mu_raw'], df_fits['mu_unc'] error = raw_error / raw_peaks * true_peaks cov = np.diag(error**2) weights = np.diag(1 / error**2) degree = config['pol_order'] raw_peaks_matrix = np.zeros((len(raw_peaks), degree + 1)) for i, pk in enumerate(raw_peaks): temp_degree = degree row = np.array([]) while temp_degree >= 0: row = np.append(row, pk**temp_degree) temp_degree -= 1 raw_peaks_matrix[i] += row print(raw_peaks_matrix) # perform matrix inversion xTWX = np.dot(np.dot(raw_peaks_matrix.T, weights), raw_peaks_matrix) xTWY = np.dot(np.dot(raw_peaks_matrix.T, weights), true_peaks) if np.linalg.det(xTWX) == 0: print("singular matrix, determinant is 0, can't get cal constants") exit() xTWX_inv = np.linalg.inv(xTWX) # get polynomial coefficients and error cal_pars = np.dot(xTWX_inv, xTWY) cal_errs = np.sqrt(np.diag(xTWX_inv)) n = len(cal_pars) print(f'Fit:', ' '.join([f'p{i}:{cal_pars[i]:.4e}' for i in range(n)])) print(f'Unc:', ' '.join([f'p{i}:{cal_errs[i]:.4e}' for i in range(n)])) # ---------------------------------------------------------------------- # repeat the peak fit with the calibrated energy (affects widths) # compute calibrated energy pol = np.poly1d(cal_pars) # handy numpy polynomial object cal_data = pol(raw_data[et]) fit_results = {} for ie, epk in enumerate(epeaks): # adjust the window. resolution goes as roughly sqrt(energy) window = np.sqrt(epk) * 0.5 xlo, xhi = epk - window / 2, epk + window / 2 nbins = int(window) * 5 xpb = (xhi - xlo) / nbins ibin_bkg = int(nbins * 0.2) # get histogram, error, normalize by runtime pk_data = cal_data[(cal_data >= xlo) & (cal_data <= xhi)] hist, bins, _ = pgh.get_hist(pk_data, range=(xlo, xhi), dx=xpb) hist_norm = np.divide(hist, runtime_min * 60) hist_var = np.array( [np.sqrt(h / (runtime_min * 60)) for h in hist]) # compute expected peak location and width (simple Gaussian) bkg0 = np.mean(hist_norm[:ibin_bkg]) b, h = bins[1:], hist_norm - bkg0 imax = np.argmax(h) upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h) / 2))][0] bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h) / 2))][-1] fwhm = upr_half - bot_half sig0 = fwhm / 2.355 amp0 = np.amax(h) * fwhm p_init = [amp0, bins[imax], sig0, bkg0] # a, mu, sigma, bkg p_fit, p_cov = pgf.fit_hist(pgf.gauss_bkg, hist_norm, bins, var=hist_var, guess=p_init) p_err = np.sqrt(np.diag(p_cov)) # save results fit_results[ie] = { 'epk': epk, 'mu': p_fit[1], 'fwhm': p_fit[2] * 2.355, 'sig': p_fit[2], 'amp': p_fit[0], 'bkg': p_fit[3], } # consolidate results again view_cols = ['epk', 'mu', 'fwhm', 'residual'] df_fits = pd.DataFrame(fit_results).T # compute the difference between lit and measured values cal_peaks = pol(raw_peaks) df_fits['residual'] = true_peaks - cal_peaks print(df_fits[view_cols]) # fit fwhm vs. energy # FWHM(E) = sqrt(A_noise^2 + A_fano^2 * E + A_qcol^2 E^2) # Ref: Eq. 3 of https://arxiv.org/abs/1902.02299 # TODO: fix error handling def sqrt_fwhm(x, a_n, a_f, a_c): return np.sqrt(a_n**2 + a_f**2 * x + a_c**2 * x**2) p_guess = [0.3, 0.05, 0.001] p_fit, p_cov = curve_fit( sqrt_fwhm, df_fits['mu'], df_fits['fwhm'], p0=p_guess) #, sigma = np.sqrt(h), absolute_sigma=True) p_err = np.sqrt(np.diag(p_cov)) if config['show_plot']: # show a split figure with calibrated spectrum + used peaks on top, # and calib.function and resolution vs. energy on bottom fig, (p0, p1) = plt.subplots(2, 1, figsize=(8, 8), sharex=True) # gridspec_kw={'height_ratios':[2, 1]})) # get histogram (cts / keV / d) xlo, xhi, xpb = config['cal_range'] hist, bins, _ = pgh.get_hist(cal_data, range=(xlo, xhi), dx=xpb) hist_norm = np.divide(hist, runtime_min * 60 * xpb) # show peaks cmap = plt.cm.get_cmap('brg', len(df_fits) + 1) for i, row in df_fits.iterrows(): # get a pretty label for the isotope lbl = config['pks'][str(row['epk'])] iso = ''.join(r for r in re.findall('[0-9]+', lbl)) ele = ''.join(r for r in re.findall('[a-z]', lbl, re.I)) pk_lbl = r'$^{%s}$%s' % (iso, ele) pk_diff = row['epk'] - row['mu'] p0.axvline(row['epk'], ls='--', c=cmap(i), lw=1, label=f"{pk_lbl} : {row['epk']} + {pk_diff:.3f}") p0.semilogy(bins[1:], hist_norm, ds='steps', c='b', lw=1) p0.set_ylabel('cts / s / keV', ha='right', y=1) p0.legend(loc=3, fontsize=11) # TODO: add fwhm errorbar x_fit = np.arange(xlo, xhi, xpb) y_init = sqrt_fwhm(x_fit, *p_guess) p1.plot(x_fit, y_init, '-', lw=1, c='orange', label='guess') y_fit = sqrt_fwhm(x_fit, *p_fit) a_n, a_f, a_c = p_fit fit_label = r'$\sqrt{(%.2f)^2 + (%.3f)^2 E + (%.4f)^2 E^2}$' % ( a_n, a_f, a_c) p1.plot(x_fit, y_fit, '-r', lw=1, label=f'fit: {fit_label}') p1.plot(df_fits['mu'], df_fits['fwhm'], '.b') p1.set_xlabel('Energy (keV)', ha='right', x=1) p1.set_ylabel('FWHM (keV)', ha='right', y=1) p1.legend(fontsize=11) if config['batch_mode']: plt.savefig('./plots/peakdet_test.png') else: plt.show() # the order of the polynomial should be in the table name pf_results[f'{et}_Anoise'] = p_fit[0] pf_results[f'{et}_Afano'] = p_fit[1] pf_results[f'{et}_Aqcol'] = p_fit[2] for i in range(len(cal_pars)): pf_results[f'{et}_cal{i}'] = cal_pars[i] for i in range(len(cal_pars)): pf_results[f'{et}_unc{i}'] = cal_errs[i] return pd.Series(pf_results)
def get_fwhm(f_grid, f_opt, efilter, verbose=False): """ this code fits the 2.6 MeV peak using the gauss+step function and writes new columns to the df_grid "fwhm", "fwhmerr" """ print("Grid file:",f_grid) print("DSP file:",f_opt) df_grid = pd.read_hdf(f_grid) f = h5py.File(f_opt,'r') for ged in f.keys(): print("Detector:",ged) data = f[ged]['data'] # declare some new columns for df_grid cols = [f"fwhm_{ged}", f"fwhmerr_{ged}", f"rchi2_{ged}"] for col in cols: df_grid[col] = np.nan for i, row in df_grid.iterrows(): try: energies = data[f"{efilter}_{i}"][()] mean = np.mean(energies) bins = 12000 hE, xE, vE = ph.get_hist(energies,bins,(mean/2,mean*2)) except: print("Energy not find in",ged,"and entry",i) # set histogram centered and symmetric on the peak try: mu = xE[np.argmax(hE)] imax = np.argmax(hE) hmax = hE[imax] idx = np.where(hE > hmax/2) # fwhm ilo, ihi = idx[0][0], idx[0][-1] sig = (xE[ihi] - xE[ilo]) / 2.355 idx = np.where(((xE-mu) > -8 * sig) & ((xE-mu) < 8 * sig)) idx0 = np.where(((xE-mu) > -4.5 * sig) & ((xE-mu) < 4.5 * sig)) ilo, ihi = idx[0][0], idx[0][-1] ilo0, ihi0 = idx0[0][0], idx0[0][-1] xE, hE, vE = xE[ilo:ihi+1], hE[ilo:ihi], vE[ilo:ihi] except: continue # set initial guesses for the peakshape function hstep = 0 tau = np.mean(hE[:10]) bg0 = 1 x0 = [hmax, mu, sig, bg0, hstep] try: xF, xF_cov = pf.fit_hist(pf.gauss_step, hE, xE, var=vE, guess=x0) xF_err = np.sqrt(np.diag(xF_cov)) # goodness of fit chisq = [] for j, h in enumerate(hE): model = pf.gauss_step(xE[j], *xF) diff = (model - h)**2 / model chisq.append(abs(diff)) # update the master dataframe fwhm = xF[2] * 2.355 * 2614.5 / mu fwhmerr = xF_err[2] * 2.355 * 2614.5 / mu rchi2 = sum(np.array(chisq) / len(hE)) df_grid.at[i, f"fwhm_{ged}"] = fwhm df_grid.at[i, f"fwhmerr_{ged}"] = fwhmerr df_grid.at[i, f"rchi2_{ged}"] = rchi2 print(fwhm,fwhmerr,rchi2) except: print("Fit not computed for detector",ged,"and entry",i) if verbose: plt.cla() plt.plot(xE, pf.gauss_step(xE, *xF), c='r', label='peakshape') gaus, step = pf.gauss_step(xE, *xF, components=True) gaus = np.array(gaus) step = np.array(step) plt.plot(xE, gaus, ls="--", lw=2, c='g', label="gaus") plt.plot(xE, step, ls='--', lw=2, c='m', label='step + bg') plt.plot(xE[1:], hE, lw=1, c='b', label=f"data {ged}") plt.xlabel(f"ADC channels", ha='right', x=1) plt.ylabel("Counts", ha='right', y=1) plt.legend(loc=2, fontsize=10,title=f"FWHM = {fwhm:.2f} $\pm$ {fwhmerr:.2f} keV") plt.show() # write the updated df_grid to the output file. if not verbose: df_grid.to_hdf(f_grid, key="pygama_optimization") if not verbose: print("Update grid file:",f_grid,"with detector",ged) print(df_grid)
def peak(df, runDB, calDB, r, line, p=[1, 0], plotit=False): cal = 0.04998 # calDB["cal_pass1"]["1"]["p1cal"] meta_dir = os.path.expandvars(runDB["meta_dir"]) tier_dir = os.path.expandvars(runDB["tier2_dir"]) df['e_cal'] = p[0] * (cal * df['e_ftp']) + p[1] #h = df.hist('e_cal',bins=2000) #plt.yscale('log') df = df.loc[(df.index > 1000) & (df.index < 500000)] def gauss(x, mu, sigma, A=1): """ define a gaussian distribution, w/ args: mu, sigma, area (optional). """ return A * (1. / sigma / np.sqrt(2 * np.pi)) * np.exp(-(x - mu)**2 / (2. * sigma**2)) line_min = 0.995 * line line_max = 1.005 * line nbin = 60 res = 6.3e-4 * line + 0.85 # empirical energy resolution curve from experience hist, bins, var = pgh.get_hist(df['e_cal'], range=(line_min, line_max), dx=(line_max - line_min) / nbin) if plotit: pgh.plot_hist(hist, bins, var=hist, label="data", color='blue') pars, cov = pga.fit_hist(gauss, hist, bins, var=hist, guess=[line, res, 50]) pgu.print_fit_results(pars, cov, gauss) if plotit: pgu.plot_func(gauss, pars, label="chi2 fit", color='red') FWHM = '%.2f' % Decimal( pars[1] * 2. * np.sqrt(2. * np.log(2))) # convert sigma to FWHM FWHM_uncertainty = '%.2f' % Decimal( np.sqrt(cov[1][1]) * 2. * np.sqrt(2. * np.log(2))) peak = '%.2f' % Decimal(pars[0]) peak_uncertainty = '%.2f' % Decimal(np.sqrt(cov[0][0])) residual = '%.2f' % abs(line - float(peak)) if plotit: label_01 = 'Peak = ' + str(peak) + r' $\pm$ ' + str(peak_uncertainty) label_02 = 'FWHM = ' + str(FWHM) + r' $\pm$ ' + str(FWHM_uncertainty) labels = [ label_01, label_02, ] plt.xlim(line_min, line_max) plt.xlabel('Energy (keV)', ha='right', x=1.0) plt.ylabel('Counts', ha='right', y=1.0) plt.tight_layout() plt.hist(df['e_cal'], range=(line_min, line_max), bins=nbin) plt.legend(labels, frameon=False, loc='upper right', fontsize='small') plt.savefig(meta_dir + '/plots/lineFit_' + str(r) + '.png') return peak, FWHM
def analyze_pulser_run(df_row): """ loop over each row of dfp and save the superpulse """ epk, rt, vp, cyc = df_row[['E_keV', 'runtime', 'V_pulser', 'cycle']] rt *= 60 # sec if epk == 0: return [] # skip the bkg run # load pulser energies f_dsp = dg.lh5_dir + '/' + df_row.dsp_path + '/' + df_row.dsp_file pdata = lh5.load_nda([f_dsp], ['energy'], dsp_name)['energy'] * ecal # auto-narrow the window around the max pulser peak in two steps elo, ehi, epb = epk - 50, epk + 50, 0.5 pdata_all = pdata[(pdata > elo) & (pdata < ehi)] hp, bp, _ = pgh.get_hist(pdata_all, range=(elo, ehi), dx=epb) pctr = bp[np.argmax(hp)] plo, phi, ppb = pctr - e_window, pctr + e_window, 0.1 pdata_pk = pdata[(pdata > plo) & (pdata < phi)] hp, bp, _ = pgh.get_hist(pdata_pk, range=(plo, phi), dx=ppb) hp_rt = np.divide(hp, rt) hp_var = np.array([np.sqrt(h / (rt)) for h in hp]) # fit a gaussian to get 1 sigma e-values ibin_bkg = 50 bkg0 = np.mean(hp_rt[:ibin_bkg]) b, h = bp[1:], hp_rt imax = np.argmax(h) upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h) / 2))][0] bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h) / 2))][-1] fwhm = upr_half - bot_half sig0 = fwhm / 2.355 amp0 = np.amax(hp_rt) * fwhm p_init = [amp0, bp[imax], sig0, bkg0] p_fit, p_cov = pgf.fit_hist(pgf.gauss_bkg, hp_rt, bp, var=hp_var, guess=p_init) amp, mu, sigma, bkg = p_fit # select events within 1 sigma of the maximum # and pull the waveforms from the raw file to make a superpulse. idx = np.where((pdata >= mu - sigma) & (pdata <= mu + sigma)) print( f'Pulser at {epk} keV, {len(idx[0])} events. Limiting to {nwfs}.') if len(idx[0]) > nwfs: idx = idx[0][:nwfs] # grab the 2d numpy array of pulser wfs n_rows = idx[-1] + 1 # read up to this event and stop f_raw = dg.lh5_dir + '/' + df_row.raw_path + '/' + df_row.raw_file tb_wfs, n_wfs = sto.read_object(raw_name, f_raw, n_rows=n_rows) pwfs = tb_wfs['values'].nda[idx, :] # print(idx, len(idx), pwfs.shape, '\n', pwfs) # data cleaning step: remove events with outlier baselines bl_means = pwfs[:, :500].mean(axis=1) bl_mode = mode(bl_means.astype(int))[0][0] bl_ctr = np.subtract(bl_means, bl_mode) idx_dc = np.where(np.abs(bl_ctr) < bl_thresh) pwfs = pwfs[idx_dc[0], :] bl_means = bl_means[idx_dc] # print(pwfs.shape, bl_means.shape) # baseline subtract (trp when leading (not trailing) dim is the same) wfs = (pwfs.transpose() - bl_means).transpose() # time-align all wfs at their 50% timepoint (tricky!). # adapted from pygama/sandbox/old_dsp/[calculators,transforms].py # an alternate approach would be to use ProcessingChain here wf_maxes = np.amax(wfs, axis=1) timepoints = np.argmax(wfs >= wf_maxes[:, None] * tp_align, axis=1) wf_idxs = np.zeros([wfs.shape[0], n_pre + n_post], dtype=int) row_idxs = np.zeros_like(wf_idxs) for i, tp in enumerate(timepoints): wf_idxs[i, :] = np.arange(tp - n_pre, tp + n_post) row_idxs[i, :] = i wfs = wfs[row_idxs, wf_idxs] # take the average to get the superpulse superpulse = np.mean(wfs, axis=0) # normalize all wfs to the superpulse maximum wfmax, tmax = np.amax(superpulse), np.argmax(superpulse) superpulse = np.divide(superpulse, wfmax) wfs = np.divide(wfs, wfmax) # -- plot results -- if show_plots: fig, (p0, p1) = plt.subplots(2, figsize=(7, 8)) # plot fit result (top), and waveforms + superpulse (bottom) xfit = np.arange(plo, phi, ppb * 0.1) p0.plot(xfit, pgf.gauss_bkg(xfit, *p_init), '-', c='orange', label='init') p0.plot(xfit, pgf.gauss_bkg(xfit, *p_fit), '-', c='red', label='fit') # plot 1 sigma window p0.axvspan(mu - sigma, mu + sigma, color='m', alpha=0.2, label='1 sigma') # plot data p0.plot(bp[1:], hp_rt, ds='steps', c='k', lw=1, label=f'{vp:.2f} V') p0.set_xlabel(f'onboard energy (keV, c={ecal:.2e})', ha='right', x=1) p0.set_ylabel('cts / s', ha='right', y=1) p0.legend(fontsize=10) # plot individ. wfs ts = np.arange(0, len(wfs[0, :])) for iwf in range(wfs.shape[0]): p1.plot(ts, wfs[iwf, :], '-k', lw=2, alpha=0.5) p1.plot(np.nan, np.nan, '-k', label=f'wfs, {epk:.0f} keV') # plot superpulse p1.plot(ts, superpulse, '-r', lw=2, label=f'superpulse, {vp:.2f} V') p1.set_xlabel('time (10 ns)', ha='right', x=1) p1.set_ylabel('amplitude', ha='right', y=1) p1.legend(fontsize=10) # plt.show() plt.savefig(f'./plots/superpulse_cyc{cyc}.png', dpi=150) plt.cla() # save the superpulse to our output file return superpulse
def Ba_lines(ds, t2, display=False, write_DB=True): inf = np.inf etype, ecal = "e_ftp", "e_cal" e_peak = 0 # load calibration database file with tinyDB and convert to pandas calDB = ds.calDB query = db.Query() table = calDB.table("cal_pass3").all() df_cal = pd.DataFrame(table) # apply calibration from db to tier 2 dataframe df_cal = df_cal.loc[df_cal.ds.isin(ds.ds_list)] isLin = df_cal.iloc[0]["lin"] slope = df_cal.iloc[0]["slope"] offset = df_cal.iloc[0]["offset"] eraw = t2[etype] #check for linearity and apply calibration if isLin: t2[ecal] = eraw * (eraw * slope + offset) else: t2[ecal] = eraw * (slope + (offset / eraw**2)) hE, xE, vE = pgh.get_hist(t2[ecal], range=(345, 365), dx=0.08) a = 150000 mu = 356 sigma = 0.3 tail = 50000 tau = 0.5 bkg = 4000 step = 3500 guess_60 = [a, mu, sigma, tail, tau, bkg, step] bounds_60 = ([10, 353, 0.001, 0.0, 0.001, 10, 10], [inf, 358, 0.8, inf, inf, 10000000, 1000000]) xF, xF_cov = pga.fit_hist(pga.gauss_cdf, hE, xE, var=np.ones(len(hE)), guess=guess_60, bounds=bounds_60) line, tail, step, peak = pga.gauss_cdf(xE, *xF, components=True) area = simps(peak + tail, dx=0.08) chisq_60 = [] print("Calculating the chi^2") for i, h in enumerate(hE): func = pga.gauss_cdf(xE[i], *xF) diff = (func - hE[i]) dev = diff**2 / func chisq_60.append(abs(dev)) chi_60 = sum(np.array(chisq_60)) # / (len(hE)-7) chisq_ndf_60 = chi_60 / (len(hE)) meta_dir = os.path.expandvars(ds.runDB["meta_dir"]) runNum = ds.ds_list[0] print("chi", chisq_ndf_60) plt.plot(xE[1:], hE, ls='steps', lw=1, c='b', label="data") plt.plot(xE, pga.gauss_cdf(xE, *xF), c='r', label='Fit') plt.plot(xE, (peak + tail), c='m', label='Gauss+Tail') plt.plot(xE, step, c='g', label='Step') plt.xlabel("Energy [keV]", ha='right', x=1.0) plt.ylabel("Counts", ha='right', y=1.0) plt.legend() plt.savefig(meta_dir + "/plots/356_line_run" + str(runNum) + ".png") plt.show() hE, xE, vE = pgh.get_hist(t2[ecal], range=(76, 84), dx=0.08) a = 150000 mu = 81 sigma = 0.3 a2 = 15000 mu2 = 80 sigma2 = 0.3 bkg = 4000 step = 3500 guess_60 = [a, mu, sigma, a2, mu2, sigma2, bkg, step] bounds_60 = ([10, 80, 0.001, 10.0, 75, 0.00010, 0.10, 0.1], [inf, 82, 0.8, inf, 80, 0.8, 1e9, 1e9]) xF, xF_cov = pga.fit_hist(pga.double_gauss, hE, xE, var=np.ones(len(hE)), guess=guess_60, bounds=bounds_60) fitfunc, gaus1, gaus2, step = pga.double_gauss(xE, *xF, components=True) area2 = simps(gaus1 + gaus2, dx=0.08) chisq_60 = [] print("Calculating the chi^2") for i, h in enumerate(hE): func = pga.double_gauss(xE[i], *xF) diff = (func - hE[i]) dev = diff**2 / func chisq_60.append(abs(dev)) chi_60 = sum(np.array(chisq_60)) # / (len(hE)-7) chisq_ndf_60 = chi_60 / (len(hE)) meta_dir = os.path.expandvars(ds.runDB["meta_dir"]) runNum = ds.ds_list[0] print("chi", chisq_ndf_60) plt.plot(xE[1:], hE, ls='steps', lw=1, c='b', label="data") plt.plot(xE, pga.double_gauss(xE, *xF), c='r', label='Fit') plt.plot(xE, (gaus1 + gaus2), c='m', label='Gauss+Gauss') plt.plot(xE, step, c='g', label='Step') plt.xlabel("Energy [keV]", ha='right', x=1.0) plt.ylabel("Counts", ha='right', y=1.0) plt.legend() plt.savefig(meta_dir + "/plots/356_line_run" + str(runNum) + ".png") plt.show() """ The 302 line """ hE, xE, vE = pgh.get_hist(t2[ecal], range=(296, 306), dx=0.08) a = 150000 mu = 302 sigma = 0.3 tail = 50000 tau = 0.5 bkg = 4000 step = 3500 guess_60 = [a, mu, sigma, tail, tau, bkg, step] bounds_60 = ([10, 300, 0.001, 0.0, 0.001, 10, 10], [inf, 305, 0.8, inf, inf, 10000000, 1000000]) xF, xF_cov = pga.fit_hist(pga.gauss_cdf, hE, xE, var=np.ones(len(hE)), guess=guess_60, bounds=bounds_60) line, tail, step, peak = pga.gauss_cdf(xE, *xF, components=True) area30 = simps(peak + tail, dx=0.08) chisq_60 = [] print("Calculating the chi^2") for i, h in enumerate(hE): func = pga.gauss_cdf(xE[i], *xF) diff = (func - hE[i]) dev = diff**2 / func chisq_60.append(abs(dev)) chi_60 = sum(np.array(chisq_60)) # / (len(hE)-7) chisq_ndf_60 = chi_60 / (len(hE)) meta_dir = os.path.expandvars(ds.runDB["meta_dir"]) runNum = ds.ds_list[0] print("chi", chisq_ndf_60) plt.plot(xE[1:], hE, ls='steps', lw=1, c='b', label="data") plt.plot(xE, pga.gauss_cdf(xE, *xF), c='r', label='Fit') plt.plot(xE, (peak + tail), c='m', label='Gauss+Tail') plt.plot(xE, step, c='g', label='Step') plt.xlabel("Energy [keV]", ha='right', x=1.0) plt.ylabel("Counts", ha='right', y=1.0) plt.legend() plt.savefig(meta_dir + "/plots/302_line_run" + str(runNum) + ".png") plt.show() """ The 384 line """ hE, xE, vE = pgh.get_hist(t2[ecal], range=(379, 389), dx=0.08) a = 150000 mu = 384 sigma = 0.3 tail = 50000 tau = 0.5 bkg = 4000 step = 3500 guess_60 = [a, mu, sigma, tail, tau, bkg, step] bounds_60 = ([10, 382, 0.001, 0.0, 0.001, 10, 10], [inf, 386, 0.8, inf, inf, 10000000, 1000000]) xF, xF_cov = pga.fit_hist(pga.gauss_cdf, hE, xE, var=np.ones(len(hE)), guess=guess_60, bounds=bounds_60) line, tail, step, peak = pga.gauss_cdf(xE, *xF, components=True) area38 = simps(peak + tail, dx=0.08) chisq_60 = [] print("Calculating the chi^2") for i, h in enumerate(hE): func = pga.gauss_cdf(xE[i], *xF) diff = (func - hE[i]) dev = diff**2 / func chisq_60.append(abs(dev)) chi_60 = sum(np.array(chisq_60)) # / (len(hE)-7) chisq_ndf_60 = chi_60 / (len(hE)) meta_dir = os.path.expandvars(ds.runDB["meta_dir"]) runNum = ds.ds_list[0] print("chi", chisq_ndf_60) print("a1", area) print("a2", area2) print("a38", area38) print("a30", area30) print("ratio", area / area2) plt.plot(xE[1:], hE, ls='steps', lw=1, c='b', label="data") plt.plot(xE, pga.gauss_cdf(xE, *xF), c='r', label='Fit') plt.plot(xE, (peak + tail), c='m', label='Gauss+Tail') plt.plot(xE, step, c='g', label='Step') plt.xlabel("Energy [keV]", ha='right', x=1.0) plt.ylabel("Counts", ha='right', y=1.0) plt.legend() plt.savefig(meta_dir + "/plots/384_line_run" + str(runNum) + ".png") plt.show()
def peakCounts_60(run, campaign, df, runtype, rt_min, radius, angle_det, rotary, energy_par='trapEftp_cal', bins=50, erange=[54, 65], bkg_sub=True, plot=False, writeParams=False): """ Get the number of counts in the 60 keV peak, make plots. Can be sideband-subtracted or raw. Taken partially from cage_utils.py, adapted to be specific for 60 keV analysis """ if len(erange) < 2: print('Must specify an energy range for the fit!') exit() # First use gauss_mode_width_max to use for initial guesses in fit_hist ehist, ebins, evars = pgh.get_hist(df[energy_par], bins=bins, range=erange) pars, cov = pgf.gauss_mode_width_max(ehist, ebins, evars) mode = pars[0] width = pars[1] amp = pars[2] print(f'Guess: {pars}') # print(f'mode: {mode}') # print(f'width: {width}') # print(f'amp: {amp}') e_pars, ecov = pgf.fit_hist(cage_utils.gauss_fit_func, ehist, ebins, evars, guess=(amp, mode, width, 1)) chi_2 = pgf.goodness_of_fit(ehist, ebins, cage_utils.gauss_fit_func, e_pars) mean = e_pars[1] mean_err = ecov[1] sig = e_pars[2] sig_err = ecov[2] en_amp_fit = e_pars[0] en_const_fit = e_pars[3] fwhm = sig * 2.355 print(f'chi square: {chi_2}') print(f'mean: {mean}') print(f'width: {sig}') print(f'amp: {en_amp_fit}') print(f'C: {en_const_fit}') print(f'FWHM: {fwhm} \n{(fwhm/mean)*100}%') cut_3sig = f'({mean-3*sig} <= {energy_par} <= {mean+3*sig})' counts_peak = len(df.query(cut_3sig).copy()) err_peak = np.sqrt(counts_peak) print(f'peak counts: {counts_peak}') print(f'error: {err_peak}') if plot == True: fig, ax = plt.subplots() plt.plot(ebins[1:], cage_utils.gauss_fit_func(ebins[1:], *e_pars), c='r', lw=0.8, label='gaussian fit') plt.plot(ebins[1:], ehist, ds='steps', c='b', lw=1.) plt.axvline(mean - 3 * sig, c='g', lw=1, label='Peak region (3 sigma)') plt.axvline(mean + 3 * sig, c='g', lw=1) plt.xlabel('Energy (keV)', fontsize=14) plt.ylabel('counts', fontsize=14) plt.title(f'60 keV peak with gaussian fit', fontsize=14) plt.setp(ax.get_xticklabels(), fontsize=12) plt.setp(ax.get_yticklabels(), fontsize=12) ax.text( 0.03, 0.8, f'r = {radius} mm \ntheta = {angle_det} deg \nruntime {rt_min:.2f}', verticalalignment='bottom', horizontalalignment='left', transform=ax.transAxes, color='black', fontsize=10, bbox={ 'facecolor': 'white', 'alpha': 0.8, 'pad': 8 }) ax.text( 0.95, 0.8, f'mean: {mean:.2f} \nsigma: {sig:.3f} \nchi square: {chi_2:.2f}', verticalalignment='bottom', horizontalalignment='right', transform=ax.transAxes, color='black', fontsize=10, bbox={ 'facecolor': 'white', 'alpha': 0.8, 'pad': 8 }) plt.legend(loc='center right') plt.tight_layout() plt.savefig(f'./plots/{campaign}60keV_analysis/run{run}_fit_60keV.png', dpi=200) plt.clf() plt.close() if bkg_sub == True: bkg_left_min = mean - 7. * sig bkg_left_max = mean - 4 * sig bkg_right_min = mean + 4 * sig bkg_right_max = mean + 7. * sig bkg_left = f'({bkg_left_min} <= {energy_par} < {bkg_left_max})' bkg_right = f'({bkg_right_min} < {energy_par} <= {bkg_right_max})' bkg = f'{bkg_left} or {bkg_right}' left_counts = len(df.query(bkg_left).copy()) right_counts = len(df.query(bkg_right).copy()) total_bkg = left_counts + right_counts err_bkg = np.sqrt(total_bkg) bkg_sub_counts = counts_peak - total_bkg err = np.sqrt(counts_peak + total_bkg) print(f'peak counts: {counts_peak}') print(f'bkg left: {left_counts}') print(f'bkg right: {right_counts}') print(f'total bkg: {total_bkg}') print(f'bkg_subtracted counts: {bkg_sub_counts}') print(f'error: {err}') print(f'{(err/bkg_sub_counts)*100:.3f}%') if plot == True: fig, ax = plt.subplots() full_hist, full_bins, full_evars = pgh.get_hist( df[{energy_par}], bins=bins, range=[mean - 9. * sig, mean + 9. * sig]) plt.plot(full_bins[1:], full_hist, ds='steps', c='b', lw=1) # plt.axvline(mean-3*sig, c='g', lw=1, label ='Peak region') # plt.axvline(mean+3*sig, c='g', lw=1) ax.axvspan(mean - 3 * sig, mean + 3 * sig, alpha=0.1, color='g', label='peak region (3 sigma)') # plt.axvline(bkg_left_min, c='r', lw=1, label='Background region') # plt.axvline(bkg_left_max, c='r', lw=1) # plt.axvline(bkg_right_min, c='r', lw=1) # plt.axvline(bkg_right_max, c='r', lw=1) ax.axvspan(bkg_left_min, bkg_left_max, alpha=0.2, color='r', label='background region (3 sigma)') ax.axvspan(bkg_right_min, bkg_right_max, alpha=0.2, color='r') plt.title('60 keV peak with background subtraction region', fontsize=14) plt.xlabel(f'{energy_par} (keV)', fontsize=14) plt.ylabel('counts', fontsize=14) plt.setp(ax.get_xticklabels(), fontsize=12) plt.setp(ax.get_yticklabels(), fontsize=12) ax.text( 0.03, 0.8, f'r = {radius} mm \ntheta = {angle_det} deg \nruntime {rt_min:.2f}', verticalalignment='bottom', horizontalalignment='left', transform=ax.transAxes, color='black', fontsize=10, bbox={ 'facecolor': 'white', 'alpha': 0.8, 'pad': 8 }) plt.legend(loc='upper right') plt.tight_layout() plt.savefig( f'./plots/{campaign}60keV_analysis/run{run}_bkgRegion_60keV.png', dpi=200) plt.clf() plt.close() # For Joule's 60keV analysis. Generally don't do this if writeParams == True: param_keys = [ 'mean_60', 'sig_60', 'chiSquare_fit_60', 'cut_60_3sig', 'bkg_60_left', 'bkg_60_right', 'bkg_60' ] param_list = [mean, sig, chi_2, cut_3sig, bkg_left, bkg_right, bkg] for key, cut in zip(param_keys, param_list): cage_utils.writeJson('./analysis_60keV.json', run, key, cut) return (bkg_sub_counts, err) else: return (counts_peak, err_peak)
def n_minus_1(run, campaign, df, dg, runtype, rt_min, radius, angle_det, rotary, cut_keys): with open('./cuts.json') as f: cuts = json.load(f) e_res_const = [0., 0., 0.] e_res_const[0] = cuts[str(run)]['e_res_const0'] e_res_const[1] = cuts[str(run)]['e_res_const1'] e_res_const[2] = cuts[str(run)]['e_res_const2'] bl_cut_lo_raw = cuts[str(run)]['bl_cut_lo_raw'] bl_cut_hi_raw = cuts[str(run)]['bl_cut_hi_raw'] bl_slope_lo_raw = cuts[str(run)]['bl_slope_lo_raw'] bl_slope_hi_raw = cuts[str(run)]['bl_slope_hi_raw'] bl_sig_lo_raw = cuts[str(run)]['bl_sig_lo_raw'] bl_sig_hi_raw = cuts[str(run)]['bl_sig_hi_raw'] ftp_max_lo_raw = cuts[str(run)]['ftp_max_lo_raw'] ftp_max_hi_raw = cuts[str(run)]['ftp_max_hi_raw'] wf_max_fit_const = cuts[str(run)]['wf_max_fit_const'] wf_max_fit_offset = cuts[str(run)]['wf_max_fit_offset'] df = df.query(cuts[str(run)]['muon_cut']).copy() df_cut = df total_counts = len(df) print(f'total counts: {total_counts}') for cut_out in cut_keys: df_cut = df cut_set = cut_keys - set([cut_out]) cut_full = " and ".join([cuts[str(run)][c] for c in cut_keys]) print(f'Leaving out {cut_out}. \nfull cut: {cut_full}\n') #have to apply cuts individually instead of using `cut_full` because the total cut string is too long for the query :'( for cut in cut_set: print(f'applying cut: {cut}') df_cut = df_cut.query((cuts[str(run)][cut])).copy() cut_counts = len(df.query((cuts[str(run)][cut])).copy()) percent_surviving = (cut_counts/total_counts)*100. print(f'Percentage surviving {cut} cut: {percent_surviving:.2f}') cut_counts_total = len(df_cut) percent_surviving_total = (cut_counts_total/total_counts)*100. print(f'Percentage surviving cuts: {percent_surviving_total:.2f}') # exit() # ____________baseline mean________________________________________ fig, ax = plt.subplots() # suptitle = f'Run {run}; All cuts except: {cut_out}' suptitle = f'Run {run}; All cuts except: {cut_out}\n{percent_surviving_total:.2f}% surviving cuts' fig.suptitle(suptitle, horizontalalignment='center', fontsize=14) blo, bhi, bpb = 9000,9400, 1 nbx = int((bhi-blo)/bpb) bl_hist, bins = np.histogram(df_cut['bl'], bins=nbx, range=[blo, bhi]) bl_hist_raw, bins = np.histogram(df['bl'], bins=nbx, range=[blo, bhi]) plt.semilogy(bins[1:], bl_hist_raw, c='k', alpha=0.3, ds='steps', lw=1., label='before cuts') plt.semilogy(bins[1:], bl_hist, ds='steps', c='b', lw=1, label = 'after cuts') plt.axvline(bl_cut_lo_raw, c='r', lw=1, label='95% cut lines') plt.axvline(bl_cut_hi_raw, c='r', lw=1) plt.xlabel('bl', fontsize=14) plt.ylabel('counts', fontsize=14) # plt.title(f'Baseline Mean \n{percent_surviving_total:.2f}% surviving cuts', fontsize = 14) plt.title(f'Baseline Mean', fontsize = 14) plt.setp(ax.get_xticklabels(), fontsize=12) plt.setp(ax.get_yticklabels(), fontsize=12) ax.text(0.05, 0.75, f'r = {radius} mm \ntheta = {angle_det} deg \nruntime {rt_min:.2f}', verticalalignment='bottom', horizontalalignment='left', transform=ax.transAxes, color='black', fontsize=12, bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 10}) #0.1, 0.75, plt.legend(loc='center left') plt.tight_layout() plt.savefig(f'./plots/{campaign}N_minus_1/raw/{str(run)}/except_{cut_out}_bl_mean_raw.png', dpi=200) plt.clf() plt.close() # exit() # ____________baseline slope________________________________________ fig, ax = plt.subplots() fig.suptitle(suptitle, horizontalalignment='center', fontsize=14) blo, bhi, bpb = -10., 10., 0.005 nbx = int((bhi-blo)/bpb) bl_hist, bins = np.histogram(df_cut['bl_slope'], bins=nbx,range=[blo, bhi]) bl_hist_raw, bins = np.histogram(df['bl_slope'], bins=nbx,range=[blo, bhi]) plt.semilogy(bins[1:], bl_hist_raw, ds='steps', c='k', alpha=0.3, lw=1, label='before cuts') plt.semilogy(bins[1:], bl_hist, ds='steps', c='b', lw=1, label = 'after cuts') plt.axvline(bl_slope_lo_raw, c='r', lw=1, label = '95% cut lines') plt.axvline(bl_slope_hi_raw, c='r', lw=1) plt.xlabel('bl_slope', fontsize=14) plt.ylabel('counts', fontsize=14) plt.title(f'Baseline Slope', fontsize = 14) plt.setp(ax.get_xticklabels(), fontsize=12) plt.setp(ax.get_yticklabels(), fontsize=12) ax.text(0.05, 0.75, f'r = {radius} mm \ntheta = {angle_det} deg \nruntime {rt_min:.2f}', verticalalignment='bottom', horizontalalignment='left', transform=ax.transAxes, color='black', fontsize=12, bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 10}) plt.legend() plt.tight_layout() plt.savefig(f'./plots/{campaign}dataCleaning/N_minus_1/raw/{str(run)}/except_{cut_out}_bl_slope_raw.png', dpi=200) plt.clf() plt.close() # ____________baseline sigma________________________________________ fig, ax = plt.subplots() fig.suptitle(suptitle, horizontalalignment='center', fontsize=14) blo, bhi, bpb = 2., 12., 0.005 nbx = int((bhi-blo)/bpb) bl_hist, bins = np.histogram(df_cut['bl_sig'], bins=nbx, range=[blo, bhi]) bl_hist_raw, bins = np.histogram(df['bl_sig'], bins=nbx, range=[blo, bhi]) plt.semilogy(bins[1:], bl_hist_raw, ds='steps', c='k', alpha=0.3, lw=1, label='before cuts') plt.semilogy(bins[1:], bl_hist, ds='steps', c='b', lw=1, label = 'after cuts') plt.axvline(bl_sig_lo_raw, c='r', lw=1, label = '95% cut lines') plt.axvline(bl_sig_hi_raw, c='r', lw=1) plt.xlabel('bl_sigma', fontsize=14) plt.ylabel('counts', fontsize=14) plt.title(f'Baseline Sigma', fontsize = 14) plt.setp(ax.get_xticklabels(), fontsize=12) plt.setp(ax.get_yticklabels(), fontsize=12) ax.text(0.9, 0.75, f'r = {radius} mm \ntheta = {angle_det} deg \nruntime {rt_min:.2f}', verticalalignment='bottom', horizontalalignment='right', transform=ax.transAxes, color='black', fontsize=12, bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 10}) plt.legend(loc='center right') plt.tight_layout() plt.savefig(f'./plots/{campaign}dataCleaning/N_minus_1/raw/{str(run)}/except_{cut_out}_bl_sig_raw.png', dpi=200) plt.clf() plt.close() # ____________trapEftp/trapEmax________________________________________ fig, ax = plt.subplots() fig.suptitle(suptitle, horizontalalignment='center', fontsize=14) elo, ehi = 0.925, 1.01 e_bins = int((ehi - elo )/0.001) ftp_max_hist, bins = np.histogram(df_cut['ftp_max'], bins=nbx, range=[elo, ehi]) ftp_max_hist_raw, bins = np.histogram(df['ftp_max'], bins=nbx, range=[elo, ehi]) plt.semilogy(bins[1:], ftp_max_hist_raw, ds='steps', c='k', alpha=0.3, lw=1, label='before cuts') plt.semilogy(bins[1:], ftp_max_hist, ds='steps', c='b', lw=1, label = 'after cuts') plt.axvline(ftp_max_lo_raw, c='r', lw=1, label='95% cut lines') plt.axvline(ftp_max_hi_raw, c='r', lw=1) plt.xlabel('trapEftp/trapEmax', fontsize=14) plt.ylabel('counts', fontsize=14) plt.title(f'trapEftp/trapEmax', fontsize = 14) plt.setp(ax.get_xticklabels(), fontsize=12) plt.setp(ax.get_yticklabels(), fontsize=12) ax.text(0.1, 0.75, f'r = {radius} mm \ntheta = {angle_det} deg \nruntime {rt_min:.2f}', verticalalignment='bottom', horizontalalignment='left', transform=ax.transAxes, color='black', fontsize=12, bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 10}) plt.legend(loc='center left') plt.tight_layout() plt.savefig(f'./plots/{campaign}dataCleaning/N_minus_1/raw/{str(run)}/except_{cut_out}_ftp_max_raw.png', dpi=200) plt.clf() plt.close() # ____________wf_maxVtrapEftp_cal________________________________________ fig, ax = plt.subplots() fig.suptitle(suptitle, horizontalalignment='center', fontsize=14) elo, ehi, epb = 0, 5500, 1 e_bins = 2000 #int((ehi-elo)/epb) wflo, wfhi = 0, 15000 wf_bins = 2000 wf_maxVEnergy, xedges, yedges = np.histogram2d(df_cut['wf_max'], df_cut['trapEftp_cal'], bins=[wf_bins, e_bins], range=([wflo, wfhi], [elo, ehi])) X, Y = np.mgrid[wflo:wfhi:wf_bins*1j, elo:ehi:e_bins*1j] pcm = plt.pcolormesh(X, Y, wf_maxVEnergy,norm=LogNorm()) cb = plt.colorbar() cb.set_label("counts", ha = 'right', va='center', rotation=270, fontsize=14) cb.ax.tick_params(labelsize=12) ax.text(0.1, 0.75, f'r = {radius} mm \ntheta = {angle_det} deg \nruntime {rt_min:.2f}', verticalalignment='bottom', horizontalalignment='left', transform=ax.transAxes, color='black', fontsize=12, bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 10}) # note: plotting the fit lines is only reliable if you used the same binning as when the fit was done! en_bin_centers = pgh.get_bin_centers(xedges) cal_en_bin_centers = pgh.get_bin_centers(yedges) z = (wf_max_fit_const*en_bin_centers + wf_max_fit_offset + 2.*np.sqrt(e_res_const[0]+e_res_const[1]*cal_en_bin_centers + (e_res_const[2]*cal_en_bin_centers**2))) plt.plot(en_bin_centers, z, 'r', lw = 0.7, label= 'cut lines') w = (wf_max_fit_const*en_bin_centers + wf_max_fit_offset - 2.*np.sqrt(e_res_const[0]+e_res_const[1]*cal_en_bin_centers + e_res_const[2]*cal_en_bin_centers**2)) plt.plot(en_bin_centers, w, 'r', lw=0.7) ax.set_xlabel('wf_max', fontsize=14) ax.set_ylabel('trapEftp_cal (keV)', fontsize=14) plt.setp(ax.get_xticklabels(), fontsize=12) plt.setp(ax.get_yticklabels(), fontsize=12) plt.title(f'wf_max vs Energy', horizontalalignment='center', fontsize=14) plt.legend(loc='lower right') plt.tight_layout() plt.ylim(0, 300) plt.xlim(0, 800) plt.savefig(f'./plots/{campaign}dataCleaning/N_minus_1/raw/{str(run)}/except_{cut_out}_wf_max_raw_lowE.png', dpi=200) plt.ylim(1200, 1550) plt.xlim(3300, 4300) plt.savefig(f'./plots/{campaign}dataCleaning/N_minus_1/raw/{str(run)}/except_{cut_out}_wf_max_raw_1460.png', dpi=200) plt.ylim(2400, 2750) plt.xlim(6600, 8000) plt.savefig(f'./plots/{campaign}dataCleaning/N_minus_1/raw/{str(run)}/except_{cut_out}_wf_max_raw_2615.png', dpi=200) plt.clf() plt.close() # ____________60 keV with fit________________________________________ pgfenergy_hist, pgfebins, evars = pgh.get_hist(df_cut['trapEftp_cal'], bins=50, range=[54, 65]) raw_pgfenergy_hist, pgfebins, evars = pgh.get_hist(df['trapEftp_cal'], bins=50, range=[54, 65])#range=[54, 65] pars, cov = pgf.gauss_mode_width_max(pgfenergy_hist, pgfebins, evars) mode = pars[0] width = pars[1] amp = pars[2] print(f'mode: {mode}') print(f'width: {width}') print(f'amp: {amp}') e_pars, ecov = pgf.fit_hist(cage_utils.gauss_fit_func, pgfenergy_hist, pgfebins, evars, guess = (amp, mode, width, 1)) mean_fit = e_pars[1] width_fit = e_pars[2] amp_fit = e_pars[0] const_fit = e_pars[3] fwhm = width_fit*2.355 print(f'mean: {mean_fit}') print(f'width: {width_fit}') print(f'amp: {amp_fit}') print(f'C: {const_fit}') print(f'FWHM at 60 keV: {fwhm} \n{(fwhm/mean_fit)*100}%') fig, ax = plt.subplots() fig.suptitle(suptitle, horizontalalignment='center', fontsize=14) plt.plot(pgfebins[1:], cage_utils.gauss_fit_func(pgfebins[1:], *e_pars), c = 'r', lw=0.8, label='gaussian fit') plt.plot(pgfebins[1:], pgfenergy_hist, ds='steps', c='b', lw=1., label='after cuts') plt.plot(pgfebins[1:], raw_pgfenergy_hist, ds='steps', c='k', alpha=0.3, lw=1., label='before cuts') plt.xlabel('Energy (keV)', fontsize=14) plt.ylabel('counts', fontsize=14) plt.title(f'60 keV peak with gaussian fit', fontsize = 14) plt.setp(ax.get_xticklabels(), fontsize=12) plt.setp(ax.get_yticklabels(), fontsize=12) ax.text(0.05, 0.75, f'r = {radius} mm \ntheta = {angle_det} deg \nruntime {rt_min:.2f}', verticalalignment='bottom', horizontalalignment='left', transform=ax.transAxes, color='black', fontsize=10, bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 8}) ax.text(0.95, 0.72, f'mean: {mean_fit:.2f} \nsigma: {width_fit:.3f} \nFWHM at 60 keV: {fwhm:.2f} keV\n({(fwhm/mean_fit)*100:.2f}%)', verticalalignment='bottom', horizontalalignment='right', transform=ax.transAxes, color='black', fontsize=10, bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 8}) plt.legend(loc='center right') plt.tight_layout() plt.savefig(f'./plots/{campaign}dataCleaning/N_minus_1/raw/{str(run)}/except_{cut_out}_fit_60keV_raw.png', dpi=200) plt.clf() plt.close()
def fit_Am_lines(ds, t2, display=False, write_DB=True): print("Fit Am lines") etype, ecal = "e_ftp", "e_cal" e_peak = 0 #Load calibration Values calDB = ds.calDB query = db.Query() table = calDB.table("cal_pass3").all() df_cal = pd.DataFrame(table) slope = df_cal.iloc[0]["slope"] offset = df_cal.iloc[0]["offset"] # load in the energy and apply (linear) calibration ene = t2[etype] e_cal = ene* (ene * slope +offset) green_line = slope * 500 + offset fits = {} pk_names = ds.config["pks"] am_peaks = ds.config["peaks_of_interest"] # Here I did a quick study on the impact of the bin size on the integral # and the chi2 (this is the next for loop) ar = [] chic = [] scan = [0.1,0.09,0.08,0.07,0.06,0.05,0.04,0.03,0.02,0.01] aq = 1500000 # For loop over different bin sizes for bi in scan: # Do the 100keV lines first xlo, xhi, xpb = 90, 110,bi hE, xE, vE = pgh.get_hist(e_cal, range=(xlo, xhi), dx=xpb) inf = np.inf # Set up initial values and limits guess_100 = [100000,99,0.5,11000,103,0.5,4050,101,0.5, 400000,39000,400,20000] bounds_100 = ([-np.inf,97,-np.inf,-np.inf,102,-np.inf,-np.inf,100.1,0.001,-inf,-inf,-inf,-inf],[inf,100,inf,inf,104,inf,inf,101.7,0.8,inf,inf,inf,inf]) #Do the fit (Am_double function from PeakFitting.py) xF, xF_cov = pga.fit_hist(pga.Am_double, hE, xE, var=np.ones(len(hE)), guess=guess_100, bounds=bounds_100) dg_fit, gaus1, gaus2, gaus3, step1, step2 = pga.Am_double(xE,*xF,components=True) results = { "99keV" : xF[1], "99keV_fwhm" : xF[2] * 2.355, "103keV" : xF[4], "103keV_fwhm" : xF[5] * 2.355 # ... } #calculate the integral area_g1 = simps(gaus1,dx = bi) area_g2 = simps(gaus2,dx = bi) chisq = [] for i, h in enumerate(hE): diff = (pga.Am_double(xE[i], *xF) - hE[i])**2 / hE[i] chisq.append(abs(diff)) results["peak_integral1"] = area_g1 results["peak_integral2"] = area_g2 chisq_ndf_100 = sum(np.array(chisq) / (len(hE)-13)) # Plot it if wanted if display: plt.plot(xE[1:],hE,ls='steps', lw=1, c='b', label="data") plt.plot(xE,pga.Am_double(xE,*xF),c='r', label='Fit') plt.plot(xE,gaus1+gaus2,c='m', label='Gauss 99 keV + 103 keV') plt.plot(xE,gaus3,c='y', label='Gauss bkg') plt.plot(xE,step1+step2,c='g', label='Step') plt.xlabel("Energy [keV]",ha='right', x=1.0) plt.ylabel("Counts",ha='right', y=1.0) plt.legend() meta_dir = os.path.expandvars(ds.config["meta_dir"]) runNum = ds.ds_list[0] plt.savefig(meta_dir+"/plots/100keV_100ev_bin_lines_run" + str(runNum)+".png") plt.show() # Do the 60 keV line xlo, xhi, xpb = 50, 70, bi hE, xE, vE = pgh.get_hist(e_cal, range=(xlo, xhi), dx=xpb) a = aq mu = 59.5 sigma = 0.3 tail = 50000 tau = 0.5 bkg = 4000 step = 3500 guess_60 = [a,mu,sigma,tail,tau,bkg,step] bounds_60 = ([10,59,0.001,0.0,0.001,10,10],[inf,60.5,0.8,inf,inf,10000000,1000000]) # The fit Function is a gauss_cdf xF, xF_cov = pga.fit_hist(pga.gauss_cdf, hE, xE, var=np.ones(len(hE)), guess=guess_60, bounds=bounds_60) line, tail, step, peak = pga.gauss_cdf(xE,*xF,components=True) chisq_60 = [] print("Calculating the chi^2") for i, h in enumerate(hE): func = pga.gauss_cdf(xE[i], *xF) diff = (func - hE[i]) dev = diff**2/func chisq_60.append(abs(dev)) chi_60 = sum(np.array(chisq_60)) chisq_ndf_60 = chi_60/(len(hE)) meta_dir = os.path.expandvars(ds.config["meta_dir"]) runNum = ds.ds_list[0] if display: plt.plot(xE[1:],hE,ls='steps', lw=1, c='b', label="data") plt.plot(xE,pga.gauss_cdf(xE,*xF),c='r', label='Fit') plt.plot(xE,(peak+tail), c='m', label = 'Gauss+Tail') plt.plot(xE,step, c='g', label = 'Step') plt.xlabel("Energy [keV]",ha='right', x=1.0) plt.ylabel("Counts",ha='right', y=1.0) plt.legend() plt.savefig(meta_dir+"/plots/60keV_lines_100ev_bin__run" + str(runNum) +".png") plt.show() area = simps(peak+tail,dx=bi) print("xF\n",xF) print("chi_60", chisq_ndf_60) print("chi_100", chisq_ndf_100) print("Peak Integrals:") print("60 keV = ", area) print("99 keV = ", area_g1) print("10 3keV = ", area_g2) print("ratio 1 = ", area/area_g1) print("ratio 2 = ", area/area_g2) print("ratio 3 = ", area/(area_g1+area_g2)) ar.append(area/(area_g1+area_g2)) chic.append(chisq_ndf_60) plt.subplot(211) plt.plot(scan,chic,'bx',ms=15,label='chi^2/f') plt.grid() plt.axvline(green_line, c='g', lw=1, label="calibration value at 100 keV") plt.legend() plt.subplot(212) plt.plot(scan,ar,'kx',ms=15,label='ratio "n60/(n99+n103)"') plt.axvline(green_line, c='g', lw=1, label="calibration value at 100 keV") plt.xlabel("bin size [keV]") plt.grid() plt.legend() plt.show() if write_DB: res_db = meta_dir+"/PeakRatios_100evbin.json" resDB = db.TinyDB(res_db) query = db.Query() ratiotable = resDB.table("Peak_Ratios") for dset in ds.ds_list: row = { "ds":dset, "chi2_ndf_60":chisq_ndf_60, "chi2_ndf_100":chisq_ndf_100, "60_keV": area, "99_keV": area_g1, "103_keV": area_g2, "r1": area/area_g1, "r2": area/area_g2, "r3":area/(area_g1+area_g2) } ratiotable.upsert(row, query.ds == dset)
def hpge_fit_E_peaks(E_uncal, mode_guesses, wwidths, n_bins=50, funcs=pgp.gauss_step, uncal_is_int=False, getbounds=True): """ Fit gaussians to the tops of peaks Parameters ---------- E_uncal : array unbinned energy data to be fit mode_guesses : array array of guesses for modes of each peak wwidths : float or array of float array of widths to use for the fit windows (in units of E_uncal), typically on the order of 10 sigma where sigma is the peak width n_bins : int or array of ints array of number of bins to use for the fit window histogramming funcs : function or array of functions funcs to be used to fit each region uncal_is_int : bool if True, attempts will be made to avoid picket-fencing when binning E_uncal getbounds : bool or array of bools determining whether to pass bounds to scipy.optimize.minimize via pgf.fit_hist for each peak Returns ------- pars : list of array a list of best-fit parameters for each peak fit covs : list of 2D arrays a list of covariance matrices for each pars binwidths : list a list of bin widths used for each peak fit ranges: list of array a list of [Euc_min, Euc_max] used for each peak fit """ pars = [] covs = [] binws = [] ranges = [] for i_peak in range(len(mode_guesses)): # get args for this peak wwidth_i = wwidths if not isinstance(wwidths, list) else wwidths[i_peak] n_bins_i = n_bins if np.isscalar(n_bins) else n_bins[i_peak] func_i = funcs[i_peak] if hasattr(funcs, '__len__') else funcs wleft_i = wwidth_i/2 if np.isscalar(wwidth_i) else wwidth_i[0] wright_i = wwidth_i/2 if np.isscalar(wwidth_i) else wwidth_i[1] getbounds_i = getbounds if np.isscalar(getbounds) else getbounds[1] # bin a histogram Euc_min = mode_guesses[i_peak] - wleft_i Euc_max = mode_guesses[i_peak] + wright_i Euc_min, Euc_max, n_bins_i = pgh.better_int_binning(x_lo=Euc_min, x_hi=Euc_max, n_bins=n_bins_i) hist, bins, var = pgh.get_hist(E_uncal, bins=n_bins_i, range=(Euc_min,Euc_max)) # get parameters guesses par_guesses = get_hpge_E_peak_par_guess(hist, bins, var, func_i) if getbounds_i: bounds = get_hpge_E_peak_bounds(hist, bins, var, func_i, par_guesses) else: bounds = None try: pars_i, cov_i = pgp.fit_hist(func_i, hist, bins, var=var, guess=par_guesses, bounds=bounds) if sum([sum(c) if c is not None else 0 for c in cov_i]) == np.inf or sum([sum(c) if c is not None else 0 for c in cov_i]) == 0 or (np.sqrt(np.diagonal(cov_i))/pars_i < 1e-7).any(): print(f'hpge_fit_E_peaks: cov estimation failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}') pars_i, cov_i = None, None except: pars_i, cov_i = None, None #get binning binw_1 = (bins[-1]-bins[0])/(len(bins)-1) pars.append(pars_i) covs.append(cov_i) binws.append(binw_1) ranges.append([Euc_min, Euc_max]) return np.array(pars, dtype = object), np.array(covs, dtype = object), np.array(binws), np.array(ranges)
def get_fwhm(f_grid, f_opt, verbose=False): """ duplicate the plot from Figure 2.7 of Kris Vorren's thesis (and much more!) this code fits the e_ftp peak to the HPGe peakshape function (same as in calibration.py) and writes a new column to df_grid, "fwhm". """ df_grid = pd.read_hdf(f_grid) # declare some new columns for df_grid cols = ["fwhm", "rchi2"] for col in cols: df_grid[col] = np.nan # loop over the keys and fit each e_ftp spectrum to the peakshape function print("i rise flat rc fwhm rchi2") for i, row in df_grid.iterrows(): key = f"opt_{i}" t2df = pd.read_hdf(f_opt, key=f"opt_{i}") # auto-histogram spectrum near the uncalibrated peak hE, xE, vE = ph.get_hist(t2df["e_ftp"], bins=1000, trim=False) # shift the histogram to be roughly centered at 0 and symmetric mu = xE[np.argmax(hE)] xE -= mu imax = np.argmax(hE) hmax = hE[imax] idx = np.where(hE > hmax / 2) # fwhm ilo, ihi = idx[0][0], idx[0][-1] sig = (xE[ihi] - xE[ilo]) / 2.355 idx = np.where((xE > -8 * sig) & (xE < 8 * sig)) ilo, ihi = idx[0][0], idx[0][-1] - 1 xE = xE[ilo - 1:ihi] hE, vE = hE[ilo:ihi], vE[ilo:ihi] # plt.plot(xE[1:], hE, ls='steps', c='r', lw=3) # plt.show() # exit() # set initial guesses for the peakshape function. could all be improved mu = 0 sigma = 5 # radford uses an input linear function hstep = 0.001 htail = 0.5 tau = 10 bg0 = np.mean(hE[:20]) amp = np.sum(hE) x0 = [mu, sigma, hstep, htail, tau, bg0, amp] xF, xF_cov = pf.fit_hist(pf.radford_peak, hE, xE, var=vE, guess=x0) # goodness of fit chisq = [] for j, h in enumerate(hE): model = pf.radford_peak(xE[j], *xF) diff = (model - h)**2 / model chisq.append(abs(diff)) # update the master dataframe fwhm = xF[1] * 2.355 rchi2 = sum(np.array(chisq) / len(hE)) df_grid.at[i, "fwhm"] = fwhm df_grid.at[i, "rchi2"] = rchi2 rise, flat, rc = row[:3] label = f"{i} {rise:.2f} {flat:.2f} {rc:.0f} {fwhm:.2f} {rchi2:.2f}" print(label) if verbose: # plot every dang fit plt.cla() # peakshape function plt.plot(xE, pf.radford_peak(xE, *x0), c='orange', label='guess') plt.plot(xE, pf.radford_peak(xE, *xF), c='r', label='peakshape') plt.axvline(mu, c='g') # plot individual components # tail_hi, gaus, bg, step, tail_lo = pf.radford_peak(xE, *xF, components=True) # gaus = np.array(gaus) # step = np.array(step) # tail_lo = np.array(tail_lo) # plt.plot(xE, gaus * tail_hi, ls="--", lw=2, c='g', label="gaus+hi_tail") # plt.plot(xE, step + bg, ls='--', lw=2, c='m', label='step + bg') # plt.plot(xE, tail_lo, ls='--', lw=2, c='k', label='tail_lo') plt.plot(xE[1:], hE, ls='steps', lw=1, c='b', label="data") plt.plot(np.nan, np.nan, c='w', label=f"fwhm = {fwhm:.2f} uncal.") plt.plot(np.nan, np.nan, c='w', label=label) plt.xlabel("Energy (uncal.)", ha='right', x=1) plt.ylabel("Counts", ha='right', y=1) plt.legend(loc=2, fontsize=12) plt.show() # write the updated df_grid to the output file. if not verbose: df_grid.to_hdf(f_grid, key="pygama_optimization") print("wrote output file")
def peak(line): if (len(sys.argv) != 2): print('Usage: fit_calibrated_peaks.py [run number]') sys.exit() # take calibration parameter for the 'calibration.py' output with open("calDB.json") as f: calDB = json.load(f) cal = calDB["cal_pass1"]["1"]["p1cal"] with open("runDB.json") as f: runDB = json.load(f) meta_dir = os.path.expandvars(runDB["meta_dir"]) tier_dir = os.path.expandvars(runDB["tier_dir"]) #df = pd.read_hdf("{}/Spectrum_{}_2.hdf5".format(meta_dir,sys.argv[1]), key="df") df = pd.read_hdf("{}/t2_run{}.h5".format(tier_dir, sys.argv[1])) df['e_cal'] = cal * df['e_ftp'] df = df.loc[(df.index > 1000) & (df.index < 500000)] def gauss(x, mu, sigma, A=1): """ define a gaussian distribution, w/ args: mu, sigma, area (optional). """ return A * (1. / sigma / np.sqrt(2 * np.pi)) * np.exp(-(x - mu)**2 / (2. * sigma**2)) line_min = 0.995 * line line_max = 1.005 * line nbin = 60 res = 6.3e-4 * line + 0.85 # empirical energy resolution curve from experience hist, bins, var = pgh.get_hist(df['e_cal'], range=(line_min, line_max), dx=(line_max - line_min) / nbin) pgh.plot_hist(hist, bins, var=hist, label="data", color='blue') pars, cov = pga.fit_hist(gauss, hist, bins, var=hist, guess=[line, res, 50]) pgu.print_fit_results(pars, cov, gauss) pgu.plot_func(gauss, pars, label="chi2 fit", color='red') FWHM = '%.2f' % Decimal(pars[1] * 2) FWHM_uncertainty = '%.2f' % Decimal(np.sqrt(cov[1][1]) * 2) peak = '%.2f' % Decimal(pars[0]) peak_uncertainty = '%.2f' % Decimal(np.sqrt(cov[0][0])) residual = '%.2f' % abs(line - float(peak)) label_01 = 'Peak = ' + str(peak) + r' $\pm$ ' + str(peak_uncertainty) label_02 = 'FWHM = ' + str(FWHM) + r' $\pm$ ' + str(FWHM_uncertainty) labels = [ label_01, label_02, ] plt.xlim(line_min, line_max) plt.xlabel('Energy (keV)', ha='right', x=1.0) plt.ylabel('Counts', ha='right', y=1.0) plt.tight_layout() plt.hist(df['e_cal'], range=(line_min, line_max), bins=nbin) plt.legend(labels, frameon=False, loc='upper right', fontsize='small') return peak, FWHM
def get_resolution(): """ """ # load hit file f_hit = '/Users/wisecg/Data/OPPI/hit/oppi_run0_cyc2027_hit.lh5' tb_name = 'ORSIS3302DecoderForEnergy/raw' sto = lh5.Store() groups = sto.ls(f_hit) data = sto.read_object(tb_name, f_hit) df_hit = data.get_dataframe() # load parameters e_peak = 1460.8 etype = 'trapE_cal' # etype = 'energy_cal' elo, ehi, epb = 1445, 1475, 0.2 # get histogram hE, bins, vE = pgh.get_hist(df_hit[etype], range=(elo, ehi), dx=epb) xE = bins[1:] # simple numerical width i_max = np.argmax(hE) h_max = hE[i_max] upr_half = xE[(xE > xE[i_max]) & (hE <= h_max / 2)][0] bot_half = xE[(xE < xE[i_max]) & (hE >= h_max / 2)][0] fwhm = upr_half - bot_half sig = fwhm / 2.355 # # fit to gaussian: amp, mu, sig, bkg # amp = h_max * fwhm # bg0 = np.mean(hE[:20]) # x0 = [amp, xE[i_max], sig, bg0] # xF, xF_cov = pgf.fit_hist(pgf.gauss_bkg, hE, bins, var=vE, guess=x0) # fit_func = pgf.gauss_bkg # fit to radford peak: mu, sigma, hstep, htail, tau, bg0, amp amp = h_max * fwhm hstep = 0.001 # fraction that the step contributes htail = 0.1 tau = 10 bg0 = np.mean(hE[:20]) x0 = [xE[i_max], sig, hstep, htail, tau, bg0, amp] xF, xF_cov = pgf.fit_hist(pgf.radford_peak, hE, bins, var=vE, guess=x0) fit_func = pgf.radford_peak xF_err = np.sqrt(np.diag(xF_cov)) chisq = [] for i, h in enumerate(hE): model = fit_func(xE[i], *xF) diff = (model - h)**2 / model chisq.append(abs(diff)) # collect results (for output, should use a dict or DataFrame) e_fit = xF[0] fwhm_fit = xF[1] * 2.355 # * e_peak / e_fit print(fwhm, fwhm_fit) fwhmerr = xF_err[1] * 2.355 * e_peak / e_fit rchisq = sum(np.array(chisq) / len(hE)) # plotting plt.plot(xE, hE, ds='steps', c='b', lw=2, label=etype) # peak shape plt.plot(xE, fit_func(xE, *x0), '-', c='orange', alpha=0.5, label='init. guess') plt.plot(xE, fit_func(xE, *xF), '-r', alpha=0.8, label='peakshape fit') plt.plot(np.nan, np.nan, '-w', label=f'mu={e_fit:.1f}, fwhm={fwhm_fit:.2f}') plt.xlabel(etype, ha='right', x=1) plt.ylabel('Counts', ha='right', y=1) plt.legend(loc=2) plt.tight_layout() # plt.show() plt.savefig(f'./plots/resolution_1460_{etype}.pdf') plt.cla()
def calibrate_pass3(ds, df,etype="e_ftp", write_db=False, display=False, linfit = True): """ This is the calibration method I used for HADES ICPC characterization You have to look at the raw spectrum for one dataset once. One dataset implies: - One detector setup - One source - One daq-setting From the raw spectrum at least two raw lines of your choice (i.e. 208Tl, 40K, Bi, etc) and determin an range around the lines have to be selected and added to a config file (to the runDB.json in the HADES work) under "pass3_peaks" and "pass3_lim". (Note that you also need the literature values in e.g. "cal_peaks" ) This calibration function will get the raw peaks by constructing a histogram around the raw values ,perfom a fit and devide the literature value by the output (c = lit/out). Then plot all received cal values vs energy and do a poly1 fit to extract the energy dependence (a*x+b) The fit function can also be of other type (quadratic, sqrt-like or else if needed, default = linfit) The calibrated energy is then ecal = eraw * (eraw *a +b) A.Zschocke """ etype = "e_ftp" # get the list of peaks we want epeaks = np.array(sorted(ds.runDB["pass3_peaks"])) e_lim = ds.runDB["pass3_lim"] true_peaks = sorted(np.array(ds.runDB["cal_peaks"])) # get the raw energy ene = df[etype] means = [] cals = [] # do the firts loop over all lines of interest for i, peak in enumerate(epeaks): xlo, xhi, xpb = peak - e_lim, peak + e_lim, 0.25 nb = int((xhi-xlo)/xpb) hE, xE, vE = ph.get_hist(ene, range=(xlo, xhi), dx=xpb) guess = [100000,peak,0.7,1000] guess_lims = ([0,peak -20,0,0],[1e9,peak+20,1e9,1e9]) xF, xF_cov = pf.fit_hist(pf.gauss_bkg, hE, xE, var=np.ones(len(hE)), guess=guess, bounds=guess_lims) fit = pf.gauss_bkg(xE,*xF) if display: plt.plot(xE[1:],hE,ls='steps',lw='1.5',c='b') plt.plot(xE,fit,'r') plt.show() mean = xF[1] means.append(mean) cals.append(true_peaks[i]/mean) # now calculate the energy dependence cals = np.array(cals) means = np.array(means) if linfit: xF = np.polyfit(means,cals,1) pfit = means *xF[0] + xF[1] cal_peaks = means*(means*xF[0]+xF[1]) else: xF, xF_coev = curve_fit(pf.cal_slope, epeaks, cals) pfit = pf.cal_slope(epeaks,*xF) cal_peaks = means*(np.sqrt(xF[0]+(xF[1]/means**2))) print(f"Calibration values:\n a={pfit[0]:.5f} b={pfit[1]:.5f}") residuals = abs((cal_peaks-true_peaks))#/true_peaks*100 if any(residuals > 1): r = residuals[np.where(residuals > 0)] print("\nWaning! No proper calibration\nThere is a deviation of",r, "%") if display: meta_dir = os.path.expandvars(ds.runDB["meta_dir"]) runNum = ds.ds_list[0] x = np.arange(1,60000,1) if linfit: e_cal = ene * (ene * xF[0] + xF[1]) pfit = x * xF[0] + xF[1] else: e_cal = ene *np.sqrt(xF[0] +(xF[1]/(ene**2))) pfit = pf.cal_slope(x,*xF) hE, xE, vE = ph.get_hist(e_cal, range=(0, 3000), dx=1) x = np.arange(1,60000,1) plt.plot(epeaks, cals, 'kx', ms=10, label='calibration values') plt.plot(x, pfit,'r',label='Fit') plt.xlim(0,60010) plt.xlabel("raw Energy") plt.ylabel("cal. value") plt.legend() plt.savefig(meta_dir+"/calVals_pass3_" + str(runNum)+".png") plt.show() hE, xE, vE = ph.get_hist(e_cal, range=(0, 3000), dx=1) plt.figure(1,(12.00,10.00)) plt.subplot(211) plt.semilogy(xE[1:], hE, ls='steps', lw=1.5,c='b') plt.xlabel("Energy [keV]") plt.xlim(-10,3100) plt.subplot(212) plt.plot(true_peaks, residuals, 'kx',ms=10,label='Residuals') plt.grid() plt.xlabel("Energy [keV]") plt.ylabel("Residuals [%]") plt.xlim(-10,3100) plt.legend() plt.savefig(meta_dir+"/calibratedSpectrum_pass3_" + str(runNum)+".png") plt.show()
def peakCounts(df, energy_par='trapEftp_cal', bins=50, erange=[], bkg_sub=True, writeParams=False): """ Get the number of counts in a peak. Can be sideband-subtracted or raw. Recommend getting pgfenergy_hist, pgfebins, evars using pgh.get_hist() """ if len(erange) < 2: print('Must specify an energy range for the fit!') exit() # First use gauss_mode_width_max to use for initial guesses in fit_hist ehist, ebins, evars = pgh.get_hist(df[energy_par], bins=bins, range=erange) pars, cov = pgf.gauss_mode_width_max(ehist, ebins, evars) mode = pars[0] width = pars[1] amp = pars[2] print('Guess: {pars}') # print(f'mode: {mode}') # print(f'width: {width}') # print(f'amp: {amp}') e_pars, ecov = pgf.fit_hist(gauss_fit_func, ehist, ebins, evars, guess=(amp, mode, width, 1)) chi_2 = pgf.goodness_of_fit(ehist, ebins, gauss_fit_func, e_pars) mean = e_pars[1] mean_err = ecov[1] sig = e_pars[2] sig_err = ecov[2] en_amp_fit = e_pars[0] en_const_fit = e_pars[3] fwhm = sig * 2.355 print(f'chi square: {chi_2}') print(f'mean: {mean}') print(f'width: {sig}') print(f'amp: {en_amp_fit}') print(f'C: {en_const_fit}') print(f'FWHM: {fwhm} \n{(fwhm/mean)*100}%') cut_3sig = f'({mean-3*sig} <= {energy_par} <= {mean+3*sig})' counts_peak = len(df.query(cut_3sig).copy()) err_peak = np.sqrt(counts_peak) print(f'peak counts: {counts_peak}') print(f'error: {err_peak}') if bkg_sub == True: bkg_left_min = mean - 7. * sig bkg_left_max = mean - 4 * sig bkg_right_min = mean + 4 * sig bkg_right_max = mean + 7. * sig bkg_left = f'({bkg_left_min} <= {energy_par} < {bkg_left_max})' bkg_right = f'({bkg_right_min} < {energy_par} <= {bkg_right_max})' bkg = f'{bkg_left} or {bkg_right}' left_counts = len(df.query(bkg_left).copy()) right_counts = len(df.query(bkg_right).copy()) total_bkg = left_counts + right_counts err_bkg = np.sqrt(total_bkg) bkg_sub_counts = counts_peak - total_bkg err = np.sqrt(counts_peak + total_bkg) print(f'peak counts: {counts_peak}') print(f'bkg left: {left_counts}') print(f'bkg right: {right_counts}') print(f'total bkg: {total_bkg}') print(f'bkg_subtracted counts: {bkg_sub_counts}') print(f'error: {err}') print(f'{(err/bkg_sub_counts)*100:.3f}%') return (bkg_sub_counts, err) else: return (counts_peak, err_peak)
def calibrate_pass2(ds, mode, write_db=False): """ Load first-pass constants from the calDB for this DataSet, and the list of peaks we want to fit from the runDB, and fit the PPC peakshape to each one. Apply pygama fit functions developed in pygama.analysis.peak_fitting TODO: Make a new table in the calDB for each DataSet, "cal_pass2", that holds fit results, etc. These should be used as inputs for the MultiPeakFitter calibration code (pass 3). """ etype, ecal = "e_ftp", "e_cal" # load calibration database file with tinyDB and convert to pandas calDB = ds.calDB query = db.Query() table = calDB.table("cal_pass1").all() df_cal = pd.DataFrame(table) # <<---- omg awesome # apply calibration from db to tier 2 dataframe df_cal = df_cal.loc[df_cal.ds.isin(ds.ds_list)] p1cal = df_cal.iloc[0]["p1cal"] t2df = ds.get_t2df() t2df[ecal] = t2df[etype] * p1cal # create a new column # get additional options from the config file cal_opts = ds.get_p1cal_pars(etype) pk_lim = cal_opts["peak_lim_keV"] # pk_thresh = cal_opts["peakdet_thresh"] fits = {} pk_names = ds.config["pks"] # loop over a list of peaks we assume are always present for e_peak in sorted(ds.config["main_peaks"], reverse=True): # histogram the spectrum near the peak xlo, xhi, xpb = e_peak - pk_lim, e_peak + pk_lim, 1 hE, xE, vE = ph.get_hist(t2df[ecal], range=(xlo, xhi), dx=xpb, trim=False) # run peakdet and measure the difference between expected & calibrated # maxes, mins = pu.peakdet(hE, pk_thresh, xE) # diffs = [e_peak - pk_val[0] for pk_val in maxes] # pk_min, i_min = min((v, i) for (i, v) in enumerate(diffs)) # print(e_peak, pk_min, i_min) # -- run gaussian fit (gauss + linear bkg term) -- if mode == 0: # mu, sigma, a, b, m # TODO: could set initial sigma w. some simple linear function x0 = [e_peak, 5, np.sum(hE), np.mean(hE[:50]), 1] xF, xF_cov = pf.fit_hist(pf.gauss_lin, hE, xE, var=np.ones(len(hE)), guess=x0) results = { "e_fit" : xF[0], "e_unc" : np.sqrt(xF_cov[0][0]), "fwhm" : xF[1] * 2.355, "fwhm_unc" : np.sqrt(xF_cov[1][1]) * 2.355, "resid" : abs(e_peak - xF[0]), "bkg0" : xF[3], "bkg1" : xF[4] } chisq = [] for i, h in enumerate(hE): diff = (pf.gauss_lin(xE[i], *xF) - hE[i])**2 / pf.gauss_lin(xE[i], *xF) chisq.append(abs(diff)) results["chisq_ndf"] = sum(np.array(chisq) / len(hE)) # update DB results fits[pk_names[str(e_peak)]] = results # -- run peakshape function fit (+ linear bkg term) -- elif mode == 1: # peakshape parameters: mu, sigma, hstep, htail, tau, bg0, a=1 hstep = 0.001 # fraction that the step contributes htail = 0.1 amp = np.sum(hE) tau = 10 bg0 = np.mean(hE[:20]) x0 = [e_peak, 5, hstep, htail, tau, bg0, amp] xF, xF_cov = pf.fit_hist(pf.radford_peak, hE, xE, var=vE, guess=x0) results = { "e_fit" : xF[0], "fwhm" : xF[1] * 2.355 # ... } chisq = [] for i, h in enumerate(hE): diff = (pf.radford_peak(xE[i], *xF) - hE[i])**2 / pf.radford_peak(xE[i], *xF) chisq.append(abs(diff)) results["chisq_ndf"] = sum(np.array(chisq) / len(hE)) # update DB results fits[pk_names[str(e_peak)]] = results # -- plot the fit -- plt.axvline(e_peak, c='g') if mode==0: # gaussian fit # plt.plot(xE, pf.gauss_lin(xE, *x0), c='orange', label='guess') plt.plot(xE, pf.gauss_lin(xE, *xF), c='r', label='fit') if mode==1: # peakshape function # plt.plot(xE, pf.radford_peak(xE, *x0), c='orange', label='guess') plt.plot(xE, pf.radford_peak(xE, *xF), c='r', label='peakshape') # plot individual components # consts - tail_hi & bg tail_hi, gaus, bg, step, tail_lo = pf.radford_peak(xE, *xF, components=True) gaus = np.array(gaus) step = np.array(step) tail_lo = np.array(tail_lo) plt.plot(xE, gaus * tail_hi, ls="--", lw=2, c='g', label="gaus+hi_tail") plt.plot(xE, step + bg, ls='--', lw=2, c='m', label='step + bg') plt.plot(xE, tail_lo, ls='--', lw=2, c='k', label='tail_lo') plt.plot(xE[1:], hE, ls='steps', lw=1, c='b', label="data") plt.plot(np.nan, np.nan, c='w', label=f"fwhm = {results['fwhm']:.2f} keV") plt.xlabel("Energy (keV)", ha='right', x=1) plt.ylabel("Counts", ha='right', y=1) plt.legend() # plt.show() plt.savefig("./plots/cage_ds3_pass2cal.pdf") if write_db: calDB = ds.calDB query = db.Query() table = calDB.table("cal_pass2") # collapse data to 1 row row = {} for pk in fits: for key, val in fits[pk].items(): row[f"{key}_{pk}"] = val # write an entry for every dataset. if we've chained together # multiple datasets, the values will be the same. # use "upsert" to avoid writing duplicate entries. for dset in ds.ds_list: table.upsert(row, query.ds == dset) print("wrote results to DB.")
def optimize_trap(rise_times, test=False): """ duplicate the plot from Figure 2.7 of Kris Vorren's thesis. need to fit the e_ftp peak to the HPGe peakshape function (same as in calibration.py) and plot the resulting FWHM^2 vs. the ramp time. """ out_dir = "~/Data/cage" opt_file = f"{out_dir}/cage_ds3_optimize.h5" print("input file:", opt_file) # match keys to settings; should maybe do this in prev function as attrs. with pd.HDFStore(opt_file, 'r') as store: keys = [key[1:] for key in store.keys()] # remove leading '/' settings = {keys[i] : rise_times[i] for i in range(len(keys))} # loop over the keys and fit each e_ftp spectrum to the peakshape function fwhms = {} for key, rt in settings.items(): t2df = pd.read_hdf(opt_file, key=key) # histogram spectrum near the uncalibrated peak -- have to be careful here xlo, xhi, xpb = 2550, 2660, 1 hE, xE, vE = ph.get_hist(t2df["e_ftp"], range=(xlo, xhi), dx=xpb, trim=False) # set initial guesses for the peakshape function. most are pretty rough mu = xE[np.argmax(hE)] sigma = 5 hstep = 0.001 htail = 0.5 tau = 10 bg0 = np.mean(hE[:20]) amp = np.sum(hE) x0 = [mu, sigma, hstep, htail, tau, bg0, amp] xF, xF_cov = pf.fit_hist(pf.radford_peak, hE, xE, var=vE, guess=x0) fwhms[key] = xF[1] * 2.355 if test: plt.cla() # peakshape function plt.plot(xE, pf.radford_peak(xE, *x0), c='orange', label='guess') plt.plot(xE, pf.radford_peak(xE, *xF), c='r', label='peakshape') plt.axvline(mu, c='g') # plot individual components # tail_hi, gaus, bg, step, tail_lo = pf.radford_peak(xE, *xF, components=True) # gaus = np.array(gaus) # step = np.array(step) # tail_lo = np.array(tail_lo) # plt.plot(xE, gaus * tail_hi, ls="--", lw=2, c='g', label="gaus+hi_tail") # plt.plot(xE, step + bg, ls='--', lw=2, c='m', label='step + bg') # plt.plot(xE, tail_lo, ls='--', lw=2, c='k', label='tail_lo') plt.plot(xE[1:], hE, ls='steps', lw=1, c='b', label="data") plt.plot(np.nan, np.nan, c='w', label=f"fwhm = {results['fwhm']:.2f} uncal.") plt.xlabel("Energy (uncal.)", ha='right', x=1) plt.ylabel("Counts", ha='right', y=1) plt.legend(loc=2) plt.show()
def fit_peaks(epeaks, cal_pars, raw_data, runtime_min, range=[0, 3000, 5], ff_name='gauss_step', show_plot=True, batch=False): """ Routine for sequential fit of peaks in a raw energy spectrum. Inputs: - epeaks: list of peak energies to calibrate, e.g. [1460, 2615, ...] - cal_pars: results from peakdet for the first estimate of the calibration: cal_data = p0 + p1 * raw_data + p2 * raw_data**2 + ... - raw_data: numpy array of uncalibrated data. The array is needed instead of a histogram because this routine tries to optimize the binning around each peak. - runtime_min : this is used to normalize spectra to cts/min, which helps a lot to compute initial guesses for fit functions. - range : [xlo, xhi, xpb] Returns a dict, 'fit_results', which is easily convertible to DataFrame. """ print('range is', range) # compute calibrated energy. # scale the raw data s/t the peaks in 'epeaks' are decent initial guesses pfunc = np.poly1d(cal_pars) cal_data = pfunc(raw_data) # quick spectrum check (check that the input calibration parameters are in the ballpark) if show_plot: xlo, xhi, xpb = range hist, bins, _ = pgh.get_hist(cal_data, range=(xlo, xhi), dx=xpb) hist_norm = np.divide(hist, runtime_min * 60 * xpb) plt.semilogy(bins[1:], hist_norm, ds='steps', c='b', lw=1) if batch: plt.savefig(f'./plots/energy_cal/peakfit_test.png') else: plt.show() plt.cla() # loop over peak energies fit_results = {} for ie, epk in enumerate(epeaks): # adjust the window. resolution goes as roughly sqrt(energy) window = np.sqrt(epk) xlo, xhi = epk - window / 2, epk + window / 2 nbins = int(window) * 3 # todo, make this get smaller w/ inc energy xpb = (xhi - xlo) / nbins if show_plot: print( f'Fitting peak at {epk:6} keV. xlo {xlo:6.1f} xhi {xhi:6.1f} xpb {xpb:.3f} nbins {nbins}' ) # get histogram, error, normalize by runtime pk_data = cal_data[(cal_data >= xlo) & (cal_data <= xhi)] hist, bins, _ = pgh.get_hist(pk_data, range=(xlo, xhi), dx=xpb) hist_norm = np.divide(hist, runtime_min * 60) hist_var = np.array([np.sqrt(h / (runtime_min * 60)) for h in hist]) # estimate left and right sideband locations ibkg_lo, ibkg_hi = int(nbins * 0.2), int(nbins * 0.8) bkg0 = np.mean(hist_norm[:ibkg_lo]) bkg0_hi = np.mean(hist_norm[ibkg_hi:]) b, h = bins[1:], hist_norm - bkg0 # default: gaussian fit + step function : a, mu, sigma, bkg, step if ff_name == 'gauss_step': fit_func = pgf.gauss_step # set robust initial guesses step0 = bkg0 - bkg0_hi imax = np.argmax(h) upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h) / 2))][0] bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h) / 2))][-1] fwhm0 = upr_half - bot_half sig0 = fwhm0 / 2.355 amp0 = np.amax(h) * fwhm0 p_init = [amp0, bins[imax], sig0, bkg0, step0] p_fit, p_cov = pgf.fit_hist(fit_func, hist_norm, bins, var=hist_var, guess=p_init) p_err = np.sqrt(np.diag(p_cov)) fwhm = p_fit[2] * 2.355 fwhm_err = p_err[2] * 2.355 mu_err = p_err[1] fit_results[ie] = { 'epk': epk, 'mu': p_fit[1], 'fwhm': p_fit[2] * 2.355, 'sig': p_fit[2], 'amp': p_fit[0], 'bkg': p_fit[3], 'fwhm_err': fwhm_err, 'mu_err': mu_err } # peakshape : mu, sigma, hstep, htail, tau, bg0, amp # this requires higher stats, doesn't work as well for smaller peaks elif ff_name == 'peakshape': fit_func = pgf.radford_peak # set robust initial guesses step0 = bkg0 - bkg0_hi imax = np.argmax(h) upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h) / 2))][0] bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h) / 2))][-1] fwhm0 = upr_half - bot_half sig0 = fwhm0 / 2.355 amp0 = np.amax(h) * fwhm0 htail, tau = 0.1, 10 # TODO: find a way to guess these p_init = [bins[imax], sig0, step0, htail, tau, bkg0, amp0] p_fit, p_cov = pgf.fit_hist(fit_func, hist_norm, bins, var=hist_var, guess=p_init) p_err = np.sqrt(np.diag(p_cov)) fwhm = p_fit[1] * 2.355 fwhm_err = p_err[1] * 2.355 mu_err = p_err[0] fit_results[ie] = { 'epk': epk, 'mu': p_fit[0], 'fwhm': p_fit[1] * 2.355, 'sig': p_fit[1], 'amp': p_fit[6], 'bkg': p_fit[5], 'fwhm_err': fwhm_err, 'mu_err': mu_err } # compute goodness of fit rchisq = pgf.goodness_of_fit(hist_norm, b, fit_func, p_fit) fit_results[ie]['rchisq'] = rchisq # Now we need to invert the cal polynomial to get the raw position # of the peak that we've found. This allows us to refine # an initial estimate before computing a final resolution curve, # just by calling fit_peaks multiple times. # But the polynomial has multiple roots! How to make this automatic? # Trick: pick the root that most closely matches what you would get # by only consdering our 1st-order calibration term from peakdet. # For a fairly linear system like a Ge detector this should work well. pk_guess = fit_results[ie]['mu'] / cal_pars[1] pk_roots = (pfunc - epk).roots ipk_closest = (np.abs(pk_roots - pk_guess)).argmin() mu_raw = pk_roots[ipk_closest] mu_unc = p_err[1] * (mu_raw / epk) fit_results[ie]['mu_raw'], fit_results[ie]['mu_unc'] = mu_raw, mu_unc # print(epk, (pfunc - epk).roots, p_fit[1] / cal_pars[1] ) # print(ipk_closest) # print(mu_raw, mu_unc) if show_plot: xfit = np.arange(xlo, xhi, xpb * 0.1) plt.axvline(bins[ibkg_lo], c='m', label='bkg region') plt.plot(xfit, fit_func(xfit, *p_init), '-', c='orange', label='init') plt.plot(xfit, fit_func(xfit, *p_fit), '-', c='red', label='fit') plt.plot(bins[1:], hist_norm, c='b', lw=1.5, ds='steps') plt.plot(np.nan, np.nan, 'w', label=f'FWHM: {fwhm:.2f}') plt.xlabel('pass-1 energy (kev)', ha='right', x=1) plt.legend(fontsize=12) if batch: plt.savefig(f'./plots/energy_cal/fit{ie}_peakfit.png') else: plt.show() plt.close() return fit_results
Example code by Jason demonstrating some pygama convenience functions. """ import numpy as np import matplotlib.pyplot as plt import pygama.analysis.histograms as pgh import pygama.analysis.peak_fitting as pga np.random.seed(0) # fix the seed s/t we can reproduce the plot n = 10 data = np.random.normal(0, 1, n) hist, bins, var = pgh.get_hist(data, range=(-5, 5), dx=1) pgh.plot_hist(hist, bins, var, label="data") pars, cov = pga.fit_hist(pga.gauss, hist, bins, var=var, guess=[0, 1, n]) pgh.print_fit_results(pars, cov, ['mu', 'sig', 'A']) pgh.plot_func(pga.gauss, pars, label="chi2 fit") nbnd = (-np.inf, np.inf) pos = (0, np.inf) pars, cov = pga.fit_hist(pga.gauss, hist, bins, var=var, guess=[0, 1, n], bounds=[nbnd, pos, pos], poissonLL=True) pgh.print_fit_results(pars, cov, ['mu', 'sig', 'A']) pgh.plot_func(pga.gauss, pars, label="poissonLL fit")
def peak_352(): if (len(sys.argv) != 2): print('Usage: fit_bkg_peaks.py [run number]') sys.exit() with open("runDB.json") as f: runDB = json.load(f) meta_dir = os.path.expandvars(runDB["meta_dir"]) #df = pd.read_hdf("{}/Spectrum_280-329.hdf5".format(meta_dir), key="df") df = pd.read_hdf("{}/Spectrum_{}.hdf5".format(meta_dir, sys.argv[1]), key="df") def gauss(x, mu, sigma, A=1): """ define a gaussian distribution, w/ args: mu, sigma, area (optional). """ return A * (1. / sigma / np.sqrt(2 * np.pi)) * np.exp(-(x - mu)**2 / (2. * sigma**2)) def radford_peak(x, mu, sigma, hstep, htail, tau, bg0, a=1): """ David Radford's HPGe peak shape function """ # make sure the fractional amplitude parameters stay reasonable... if htail < 0 or htail > 1: return np.zeros_like(x) if hstep < 0 or hstep > 1: return np.zeros_like(x) bg_term = bg0 #+ x*bg1 if np.any(bg_term < 0): return np.zeros_like(x) # compute the step and the low energy tail step = a * hstep * erfc((x - mu) / (sigma * np.sqrt(2))) le_tail = a * htail le_tail *= erfc((x - mu) / (sigma * np.sqrt(2)) + sigma / (tau * np.sqrt(2))) le_tail *= np.exp((x - mu) / tau) le_tail /= (2 * tau * np.exp(-(sigma / (np.sqrt(2) * tau))**2)) # add up all the peak shape components return (1 - htail) * gauss(x, mu, sigma, a) + bg_term + step + le_tail hist, bins, var = pgh.get_hist(df['e_cal'], range=(345, 360), dx=0.5) pgh.plot_hist(hist, bins, var=hist, label="data") pars, cov = pga.fit_hist(radford_peak, hist, bins, var=hist, guess=[352, 1.05, 0.001, 0.02, 500, 1000, 40000]) pgu.print_fit_results(pars, cov, radford_peak) pgu.plot_func(radford_peak, pars, label="chi2 fit", color='red') #x_vals = np.arange(345,360,0.5) #plt.plot(x_vals, radford_peak(x_vals, 353, 1.05, .001, 0.02, 500, 1000, 40000)) FWHM = '%.2f' % Decimal(pars[1] * 2) FWHM_uncertainty = '%.2f' % Decimal(np.sqrt(cov[1][1]) * 2) peak = '%.2f' % Decimal(pars[0]) peak_uncertainty = '%.2f' % Decimal(np.sqrt(cov[0][0])) residual = '%.2f' % (351.93 - float(peak)) #chi_2_element_list = [] #for i in range(len(hist)): #chi_2_element = abs((radford_peak(bins[i], *pars) - hist[i])**2/radford_peak(bins[i], *pars)) #chi_2_element_list.append(chi_2_element) #chi_2 = sum(chi_2_element_list) #reduced_chi_2 = '%.2f' % Decimal(chi_2/len(hist)) label_01 = '351.93 keV peak fit' label_02 = 'FWHM = ' + str(FWHM) + r' $\pm$ ' + str(FWHM_uncertainty) label_03 = 'Peak = ' + str(peak) + r' $\pm$ ' + str(peak_uncertainty) label_04 = 'Residual = ' + str(residual) + r' $\pm$ ' + str( peak_uncertainty) colors = ['red', 'red', 'red', 'red'] lines = [Line2D([0], [0], color=c, lw=2) for c in colors] labels = [label_01, label_02, label_03, label_04] plt.xlim(345, 360) plt.ylim(0, plt.ylim()[1]) plt.xlabel('Energy (keV)', ha='right', x=1.0) plt.ylabel('Counts', ha='right', y=1.0) plt.tight_layout() #plt.semilogy() plt.legend(lines, labels, frameon=False, loc='upper right', fontsize='small') plt.show()