def hpge_find_E_peaks(hist, bins, var, peaks_keV, n_sigma=5, deg=0, Etol_keV=None, var_zero=1, verbose=False): """ Find uncalibrated E peaks whose E spacing matches the pattern in peaks_keV Note: the specialization here to units "keV" in peaks and Etol is unnecessary. However it is kept so that the default value for Etol_keV has an unambiguous interpretation. Parameters ---------- hist, bins, var: array, array, array Histogram of uncalibrated energies, see pgh.get_hist() var cannot contain any zero entries. peaks_keV : array Energies of peaks to search for (in keV) n_sigma : float Threshold for detecting a peak in sigma (i.e. sqrt(var)) deg : int deg arg to pass to poly_match Etol_keV : float absolute tolerance in energy for matching peaks var_zero : float number used to replace zeros of var to avoid divide-by-zero in hist/sqrt(var). Default value is 1. Usually when var = 0 its because hist = 0, and any value here is fine. Returns ------- detected_peak_locations : list list of uncalibrated energies of detected peaks detected_peak_energies : list list of calibrated energies of detected peaks pars : list of floats the parameters for poly(peaks_uncal) = peaks_keV (polyfit convention) """ # clean up var if necessary if np.any(var == 0): if verbose: print(f'hpge_find_E_peaks: replacing var zeros with {var_zero}') var[np.where(var == 0)] = var_zero peaks_keV = np.asarray(peaks_keV) # Find all maxes with > n_sigma significance imaxes = get_i_local_maxima(hist/np.sqrt(var), n_sigma) # Now pattern match to peaks_keV within Etol_keV using poly_match detected_max_locs = pgh.get_bin_centers(bins)[imaxes] if Etol_keV is None: #estimate Etol_keV pt_pars, pt_covs = hpge_fit_E_peak_tops(hist, bins, var, detected_max_locs, n_to_fit=15) if sum(sum([sum(c) if c is not None else 0 for c in pt_covs])) == np.inf or sum(sum([sum(c) if c is not None else 0 for c in pt_covs])) == 0: print('hpge_find_E_peaks: can safely ignore previous covariance warning, not used') pt_pars = pt_pars[np.array([x is not None for x in pt_pars])] med_sigma_ratio = np.median(np.stack(pt_pars)[:,1]/np.stack(pt_pars)[:,0]) Etol_keV = 5. * (med_sigma_ratio / 0.003) pars, ixtup, iytup = poly_match(detected_max_locs, peaks_keV, deg=deg, atol=Etol_keV) if verbose and len(ixtup) != len(peaks_keV): print(f'hpge_find_E_peaks: only found {len(ixtup)} of {len(peaks_keV)} expected peaks') return detected_max_locs[ixtup], peaks_keV[iytup], pars
def corrDCR(df, etype, e_bins=300, elo=0, ehi=6000, dcr_fit_lo=-30, dcr_fit_hi=30): df_dcr_cut = df.query( f'dcr >{dcr_fit_lo} and dcr < {dcr_fit_hi} and {etype} > {elo} and {etype} < {ehi}' ).copy() median, xedges, binnumber = stats.binned_statistic(df_dcr_cut[etype], df_dcr_cut['dcr'], statistic="median", bins=e_bins) en_bin_centers = pgh.get_bin_centers(xedges) fit_raw, cov = np.polyfit(en_bin_centers, median, deg=1, cov=True) const = fit_raw[0] offset = fit_raw[1] err = np.sqrt(np.diag(cov)) print(f'Fit results\n slope: {const}\n offset: {offset}') return (const, offset, err)
def corrDCR(df_cut, etype, e_bins=300, elo=0, ehi=6000, dcr_fit_lo=-30, dcr_fit_hi=30, quad=False, dcr_fit_qlo=5000, dcr_fit_qhi=5800): median, xedges, binnumber = stats.binned_statistic( df_cut[etype], df_cut['dcr'], statistic="median", bins=int((dcr_fit_hi - dcr_fit_lo) / 5), range=[dcr_fit_lo, dcr_fit_hi]) en_bin_centers = pgh.get_bin_centers(xedges) if quad: qmedian, qxedges, qbinnumber = stats.binned_statistic( df_cut[etype], df_cut['dcr'], statistic="median", bins=int((dcr_fit_qhi - dcr_fit_qlo) / 5), range=[dcr_fit_qlo, dcr_fit_qhi]) qen_bin_centers = pgh.get_bin_centers(qxedges) xen = np.concatenate((en_bin_centers, qen_bin_centers)) ymed = np.concatenate((median, qmedian)) print(xen) print(ymed) plt.plot(xen, ymed) fit_raw = np.polyfit(xen, ymed, deg=2) qconst = fit_raw[0] qlin = fit_raw[1] qoffset = fit_raw[2] df_cut['dcr_corr'] = df_cut['dcr'] - (qconst * (df_cut[etype]**2) + qlin * df_cut[etype] + qoffset) else: fit_raw = np.polyfit(en_bin_centers, median, deg=1) const = fit_raw[0] offset = fit_raw[1] df_cut['dcr_corr'] = df_cut['dcr'] - (const * (df_cut[etype]) + offset) return df_cut
def fit_hist(func, hist, bins, var=None, guess=None, poissonLL=False, integral=None, method=None, bounds=None): """ do a binned fit to a histogram (nonlinear least squares). can either do a poisson log-likelihood fit (jason's fave) or use curve_fit w/ an arbitrary function. - hist, bins, var : as in return value of pygama.histograms.get_hist() - guess : initial parameter guesses. Should be optional -- we can auto-guess for many common functions. But not yet implemented. - poissonLL : use Poisson stats instead of the Gaussian approximation in each bin. Requires integer stats. You must use parameter bounds to make sure that func does not go negative over the x-range of the histogram. - method, bounds : options to pass to scipy.optimize.minimize Returns ------ coeff, cov_matrix : tuple(array, matrix) """ if guess is None: print("auto-guessing not yet implemented, you must supply a guess.") return None, None if poissonLL: if var is not None and not np.array_equal(var, hist): print("variances are not appropriate for a poisson-LL fit!") return None, None if method is None: method = "L-BFGS-B" result = minimize(neg_poisson_log_like, x0=guess, args=(func, hist, bins, integral), method=method, bounds=bounds) coeff, cov_matrix = result.x, result.hess_inv.todense() else: if var is None: var = hist # assume Poisson stats if variances are not provided # skip "okay" bins with content 0 +/- 0 to avoid div-by-0 error in curve_fit # if bin content is non-zero but var = 0 let the user see the warning zeros = (hist == 0) zero_errors = (var == 0) mask = ~(zeros & zero_errors) sigma = np.sqrt(var)[mask] hist = hist[mask] xvals = ph.get_bin_centers(bins)[mask] if bounds is None: bounds = (-np.inf, np.inf) coeff, cov_matrix = curve_fit(func, xvals, hist, p0=guess, sigma=sigma, bounds=bounds) return coeff, cov_matrix
def taylor_mode_max(hist, bins, var=None, mode_guess=None, n_bins=5, poissonLL=False): """ Get the max and mode of a peak based on Taylor exp near the max Returns the amplitude and position of a peak based on a poly fit over n_bins in the vicinity of the maximum of the hist (or the max near mode_guess, if provided) Parameters ---------- hist : array-like The values of the histogram to be fit. Often: send in a slice around a peak bins : array-like The bin edges of the histogram to be fit var : array-like (optional) The variances of the histogram values. If not provided, square-root variances are assumed. mode_guess : float (optional) An x-value (not a bin index!) near which a peak is expected. The algorithm fits around the maximum within +/- n_bins of the guess. If not provided, the center of the max bin of the histogram is used. n_bins : int The number of bins (including the max bin) to be used in the fit. Also used for searching for a max near mode_guess Returns ------- (maximum, mode) : tuple (float, float) maximum : the estimated maximum value of the peak mode : the estimated x-position of the maximum (pars, cov) : tuple (array, matrix) pars : 2-tuple with the parameters (mode, max) of the fit mode : the estimated x-position of the maximum maximum : the estimated maximum value of the peak cov : 2x2 matrix of floats The covariance matrix for the 2 parameters in pars Examples -------- >>> import pygama.analysis.histograms as pgh >>> from numpy.random import normal >>> import pygama.analysis.peak_fitting as pgf >>> hist, bins, var = pgh.get_hist(normal(size=10000), bins=100, range=(-5,5)) >>> pgf.taylor_mode_max(hist, bins, var, n_bins=5) """ if mode_guess is not None: i_0 = ph.find_bin(mode_guess, bins) else: i_0 = np.argmax(hist) i_0 -= int(np.floor(n_bins/2)) i_n = i_0 + n_bins wts = None if var is None else 1/np.sqrt(var[i_0:i_n]) pars, cov = np.polyfit(ph.get_bin_centers(bins)[i_0:i_n], hist[i_0:i_n], 2, w=wts, cov='unscaled') mode = -pars[1] / 2 / pars[0] maximum = pars[2] - pars[0] * mode**2 # build the jacobian to compute the output covariance matrix jac = np.array( [ [pars[1]/2/pars[0]**2, -1/2/pars[0], 0], [pars[1]**2/4/pars[0]**2, -pars[1]/2/pars[0], 1] ] ) cov_jact = np.matmul(cov, jac.transpose()) cov = np.matmul(jac, cov_jact) return (mode, maximum), cov
def get_bin_estimates(pars, func, hist, bins, integral=None, **kwargs): """ Bin expected means are estimated by f(bin_center)*bin_width. Supply an integrating function to compute the integral over the bin instead. TODO: make default integrating function a numerical method that is off by default. """ if integral is None: return func(ph.get_bin_centers(bins), *pars, **kwargs) * ph.get_bin_widths(bins) else: return integral(bins[1:], *pars, **kwargs) - integral(bins[:-1], *pars, **kwargs)
def goodness_of_fit(hist, bins, var, func, pars, method='var'): """ Compute chisq and dof of fit Parameters ---------- hist, bins, var : array, array, array or None histogram data. var can be None if hist is integer counts func : function the function that was fit to the hist pars : array the best-fit pars of func. Assumes all pars are free parameters method : str Sets the choice of "denominator" in the chi2 sum 'var': user passes in the variances in var (must not have zeros) 'Pearson': use func (hist must contain integer counts) 'Neyman': use hist (hist must contain integer counts and no zeros) Returns ------- chisq : float the summed up value of chisquared dof : int the number of degrees of freedom """ # arg checks if method == 'var': if var is None: print("goodness_of_fit: var must be non-None to use method 'var'") return 0, 0 if np.any(var == 0): print("goodness_of_fit: var cannot contain zeros") return 0, 0 if method == 'Neyman' and np.any(hist == 0): print("goodness_of_fit: hist cannot contain zeros for Neyman method") return 0, 0 # compute chi2 numerator and denominator yy = func(ph.get_bin_centers(bins), *pars) numerator = (hist - yy)**2 if method == 'var': denominator = var elif method == 'Pearson': denominator = yy elif method == 'Neyman': denominator = hist else: print(f"goodness_of_fit: unknown method {method}") return 0, 0 # compute chi2 and dof chisq = np.sum(numerator / denominator) dof = len(hist) - len(pars) return chisq, dof
def get_most_prominent_peaks(energySeries, xlo, xhi, xpb, max_num_peaks=np.inf, test=False): """ find the most prominent peaks in a spectrum by looking for spikes in derivative of spectrum energySeries: array of measured energies max_num_peaks = maximum number of most prominent peaks to find return a histogram around the most prominent peak in a spectrum of a given percentage of width """ nb = int((xhi - xlo) / xpb) hist, bin_edges = np.histogram(energySeries, range=(xlo, xhi), bins=nb) bin_centers = get_bin_centers(bin_edges) # median filter along the spectrum, do this as a "baseline subtraction" hist_med = medfilt(hist, 21) hist = hist - hist_med # identify peaks with a scipy function (could be improved ...) peak_idxs = find_peaks_cwt(hist, np.arange(1, 6, 0.1), min_snr=5) peak_energies = bin_centers[peak_idxs] # pick the num_peaks most prominent peaks if max_num_peaks < len(peak_energies): peak_vals = hist[peak_idxs] sort_idxs = np.argsort(peak_vals) peak_idxs_max = peak_idxs[sort_idxs[-max_num_peaks:]] peak_energies = np.sort(bin_centers[peak_idxs_max]) if test: plt.plot(bin_centers, hist, ls='steps', lw=1, c='b') for e in peak_energies: plt.axvline(e, color="r", lw=1, alpha=0.6) plt.xlabel("Energy [uncal]", ha='right', x=1) plt.ylabel("Filtered Spectrum", ha='right', y=1) plt.tight_layout() plt.show() exit() return peak_energies
def gauss_mode_width_max(hist, bins, var=None, mode_guess=None, n_bins=5, poissonLL=False, inflate_errors=False, gof_method='var'): """ Get the max, mode, and width of a peak based on gauss fit near the max Returns the parameters of a gaussian fit over n_bins in the vicinity of the maximum of the hist (or the max near mode_guess, if provided). This is equivalent to a Taylor expansion around the peak maximum because near its maximum a Gaussian can be approximated by a 2nd-order polynomial in x: A exp[ -(x-mu)^2 / 2 sigma^2 ] ~= A [ 1 - (x-mu)^2 / 2 sigma^2 ] = A - (1/2!) (A/sigma^2) (x-mu)^2 The advantage of using a gaussian over a polynomial directly is that the gaussian parameters are the ones we care about most for a peak, whereas for a poly we would have to extract them after the fit, accounting for covariances. The guassian also better approximates most peaks farther down the peak. However, the gauss fit is nonlinear and thus less stable. Parameters ---------- hist : array-like The values of the histogram to be fit bins : array-like The bin edges of the histogram to be fit var : array-like (optional) The variances of the histogram values. If not provided, square-root variances are assumed. mode_guess : float (optional) An x-value (not a bin index!) near which a peak is expected. The algorithm fits around the maximum within +/- n_bins of the guess. If not provided, the center of the max bin of the histogram is used. n_bins : int (optional) The number of bins (including the max bin) to be used in the fit. Also used for searching for a max near mode_guess poissonLL : bool (optional) Flag passed to fit_hist() inflate_errors : bool (optional) If true, the parameter uncertainties are inflated by sqrt(chi2red) if it is greater than 1 gof_method : str (optional) method flag for goodness_of_fit Returns ------- (pars, cov) : tuple (array, matrix) pars : 3-tuple containing the parameters (mode, sigma, maximum) of the gaussian fit mode : the estimated x-position of the maximum sigma : the estimated width of the peak. Equivalent to a guassian width (sigma), but based only on the curvature within n_bins of the peak. Note that the Taylor-approxiamted curvature of the underlying function in the vicinity of the max is given by max / sigma^2 maximum : the estimated maximum value of the peak cov : 3x3 matrix of floats The covariance matrix for the 3 parameters in pars """ bin_centers = ph.get_bin_centers(bins) if mode_guess is not None: i_0 = ph.find_bin(mode_guess, bins) else: i_0 = np.argmax(hist) mode_guess = bin_centers[i_0] amp_guess = hist[i_0] i_0 -= int(np.floor(n_bins / 2)) i_n = i_0 + n_bins width_guess = (bin_centers[i_n] - bin_centers[i_0]) vv = None if var is None else var[i_0:i_n] guess = (mode_guess, width_guess, amp_guess) try: pars, cov = fit_hist(gauss_basic, hist[i_0:i_n], bins[i_0:i_n + 1], vv, guess=guess, poissonLL=poissonLL) except: return None, None if pars[1] < 0: pars[1] = -pars[1] if inflate_errors: chi2, dof = goodness_of_fit(hist, bins, var, gauss_basic, pars) if chi2 > dof: cov *= chi2 / dof return pars, cov
def n_minus_1(run, campaign, df, dg, runtype, rt_min, radius, angle_det, rotary, cut_keys): with open('./cuts.json') as f: cuts = json.load(f) e_res_const = [0., 0., 0.] e_res_const[0] = cuts[str(run)]['e_res_const0'] e_res_const[1] = cuts[str(run)]['e_res_const1'] e_res_const[2] = cuts[str(run)]['e_res_const2'] bl_cut_lo_raw = cuts[str(run)]['bl_cut_lo_raw'] bl_cut_hi_raw = cuts[str(run)]['bl_cut_hi_raw'] bl_slope_lo_raw = cuts[str(run)]['bl_slope_lo_raw'] bl_slope_hi_raw = cuts[str(run)]['bl_slope_hi_raw'] bl_sig_lo_raw = cuts[str(run)]['bl_sig_lo_raw'] bl_sig_hi_raw = cuts[str(run)]['bl_sig_hi_raw'] ftp_max_lo_raw = cuts[str(run)]['ftp_max_lo_raw'] ftp_max_hi_raw = cuts[str(run)]['ftp_max_hi_raw'] wf_max_fit_const = cuts[str(run)]['wf_max_fit_const'] wf_max_fit_offset = cuts[str(run)]['wf_max_fit_offset'] df = df.query(cuts[str(run)]['muon_cut']).copy() df_cut = df total_counts = len(df) print(f'total counts: {total_counts}') for cut_out in cut_keys: df_cut = df cut_set = cut_keys - set([cut_out]) cut_full = " and ".join([cuts[str(run)][c] for c in cut_keys]) print(f'Leaving out {cut_out}. \nfull cut: {cut_full}\n') #have to apply cuts individually instead of using `cut_full` because the total cut string is too long for the query :'( for cut in cut_set: print(f'applying cut: {cut}') df_cut = df_cut.query((cuts[str(run)][cut])).copy() cut_counts = len(df.query((cuts[str(run)][cut])).copy()) percent_surviving = (cut_counts/total_counts)*100. print(f'Percentage surviving {cut} cut: {percent_surviving:.2f}') cut_counts_total = len(df_cut) percent_surviving_total = (cut_counts_total/total_counts)*100. print(f'Percentage surviving cuts: {percent_surviving_total:.2f}') # exit() # ____________baseline mean________________________________________ fig, ax = plt.subplots() # suptitle = f'Run {run}; All cuts except: {cut_out}' suptitle = f'Run {run}; All cuts except: {cut_out}\n{percent_surviving_total:.2f}% surviving cuts' fig.suptitle(suptitle, horizontalalignment='center', fontsize=14) blo, bhi, bpb = 9000,9400, 1 nbx = int((bhi-blo)/bpb) bl_hist, bins = np.histogram(df_cut['bl'], bins=nbx, range=[blo, bhi]) bl_hist_raw, bins = np.histogram(df['bl'], bins=nbx, range=[blo, bhi]) plt.semilogy(bins[1:], bl_hist_raw, c='k', alpha=0.3, ds='steps', lw=1., label='before cuts') plt.semilogy(bins[1:], bl_hist, ds='steps', c='b', lw=1, label = 'after cuts') plt.axvline(bl_cut_lo_raw, c='r', lw=1, label='95% cut lines') plt.axvline(bl_cut_hi_raw, c='r', lw=1) plt.xlabel('bl', fontsize=14) plt.ylabel('counts', fontsize=14) # plt.title(f'Baseline Mean \n{percent_surviving_total:.2f}% surviving cuts', fontsize = 14) plt.title(f'Baseline Mean', fontsize = 14) plt.setp(ax.get_xticklabels(), fontsize=12) plt.setp(ax.get_yticklabels(), fontsize=12) ax.text(0.05, 0.75, f'r = {radius} mm \ntheta = {angle_det} deg \nruntime {rt_min:.2f}', verticalalignment='bottom', horizontalalignment='left', transform=ax.transAxes, color='black', fontsize=12, bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 10}) #0.1, 0.75, plt.legend(loc='center left') plt.tight_layout() plt.savefig(f'./plots/{campaign}N_minus_1/raw/{str(run)}/except_{cut_out}_bl_mean_raw.png', dpi=200) plt.clf() plt.close() # exit() # ____________baseline slope________________________________________ fig, ax = plt.subplots() fig.suptitle(suptitle, horizontalalignment='center', fontsize=14) blo, bhi, bpb = -10., 10., 0.005 nbx = int((bhi-blo)/bpb) bl_hist, bins = np.histogram(df_cut['bl_slope'], bins=nbx,range=[blo, bhi]) bl_hist_raw, bins = np.histogram(df['bl_slope'], bins=nbx,range=[blo, bhi]) plt.semilogy(bins[1:], bl_hist_raw, ds='steps', c='k', alpha=0.3, lw=1, label='before cuts') plt.semilogy(bins[1:], bl_hist, ds='steps', c='b', lw=1, label = 'after cuts') plt.axvline(bl_slope_lo_raw, c='r', lw=1, label = '95% cut lines') plt.axvline(bl_slope_hi_raw, c='r', lw=1) plt.xlabel('bl_slope', fontsize=14) plt.ylabel('counts', fontsize=14) plt.title(f'Baseline Slope', fontsize = 14) plt.setp(ax.get_xticklabels(), fontsize=12) plt.setp(ax.get_yticklabels(), fontsize=12) ax.text(0.05, 0.75, f'r = {radius} mm \ntheta = {angle_det} deg \nruntime {rt_min:.2f}', verticalalignment='bottom', horizontalalignment='left', transform=ax.transAxes, color='black', fontsize=12, bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 10}) plt.legend() plt.tight_layout() plt.savefig(f'./plots/{campaign}dataCleaning/N_minus_1/raw/{str(run)}/except_{cut_out}_bl_slope_raw.png', dpi=200) plt.clf() plt.close() # ____________baseline sigma________________________________________ fig, ax = plt.subplots() fig.suptitle(suptitle, horizontalalignment='center', fontsize=14) blo, bhi, bpb = 2., 12., 0.005 nbx = int((bhi-blo)/bpb) bl_hist, bins = np.histogram(df_cut['bl_sig'], bins=nbx, range=[blo, bhi]) bl_hist_raw, bins = np.histogram(df['bl_sig'], bins=nbx, range=[blo, bhi]) plt.semilogy(bins[1:], bl_hist_raw, ds='steps', c='k', alpha=0.3, lw=1, label='before cuts') plt.semilogy(bins[1:], bl_hist, ds='steps', c='b', lw=1, label = 'after cuts') plt.axvline(bl_sig_lo_raw, c='r', lw=1, label = '95% cut lines') plt.axvline(bl_sig_hi_raw, c='r', lw=1) plt.xlabel('bl_sigma', fontsize=14) plt.ylabel('counts', fontsize=14) plt.title(f'Baseline Sigma', fontsize = 14) plt.setp(ax.get_xticklabels(), fontsize=12) plt.setp(ax.get_yticklabels(), fontsize=12) ax.text(0.9, 0.75, f'r = {radius} mm \ntheta = {angle_det} deg \nruntime {rt_min:.2f}', verticalalignment='bottom', horizontalalignment='right', transform=ax.transAxes, color='black', fontsize=12, bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 10}) plt.legend(loc='center right') plt.tight_layout() plt.savefig(f'./plots/{campaign}dataCleaning/N_minus_1/raw/{str(run)}/except_{cut_out}_bl_sig_raw.png', dpi=200) plt.clf() plt.close() # ____________trapEftp/trapEmax________________________________________ fig, ax = plt.subplots() fig.suptitle(suptitle, horizontalalignment='center', fontsize=14) elo, ehi = 0.925, 1.01 e_bins = int((ehi - elo )/0.001) ftp_max_hist, bins = np.histogram(df_cut['ftp_max'], bins=nbx, range=[elo, ehi]) ftp_max_hist_raw, bins = np.histogram(df['ftp_max'], bins=nbx, range=[elo, ehi]) plt.semilogy(bins[1:], ftp_max_hist_raw, ds='steps', c='k', alpha=0.3, lw=1, label='before cuts') plt.semilogy(bins[1:], ftp_max_hist, ds='steps', c='b', lw=1, label = 'after cuts') plt.axvline(ftp_max_lo_raw, c='r', lw=1, label='95% cut lines') plt.axvline(ftp_max_hi_raw, c='r', lw=1) plt.xlabel('trapEftp/trapEmax', fontsize=14) plt.ylabel('counts', fontsize=14) plt.title(f'trapEftp/trapEmax', fontsize = 14) plt.setp(ax.get_xticklabels(), fontsize=12) plt.setp(ax.get_yticklabels(), fontsize=12) ax.text(0.1, 0.75, f'r = {radius} mm \ntheta = {angle_det} deg \nruntime {rt_min:.2f}', verticalalignment='bottom', horizontalalignment='left', transform=ax.transAxes, color='black', fontsize=12, bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 10}) plt.legend(loc='center left') plt.tight_layout() plt.savefig(f'./plots/{campaign}dataCleaning/N_minus_1/raw/{str(run)}/except_{cut_out}_ftp_max_raw.png', dpi=200) plt.clf() plt.close() # ____________wf_maxVtrapEftp_cal________________________________________ fig, ax = plt.subplots() fig.suptitle(suptitle, horizontalalignment='center', fontsize=14) elo, ehi, epb = 0, 5500, 1 e_bins = 2000 #int((ehi-elo)/epb) wflo, wfhi = 0, 15000 wf_bins = 2000 wf_maxVEnergy, xedges, yedges = np.histogram2d(df_cut['wf_max'], df_cut['trapEftp_cal'], bins=[wf_bins, e_bins], range=([wflo, wfhi], [elo, ehi])) X, Y = np.mgrid[wflo:wfhi:wf_bins*1j, elo:ehi:e_bins*1j] pcm = plt.pcolormesh(X, Y, wf_maxVEnergy,norm=LogNorm()) cb = plt.colorbar() cb.set_label("counts", ha = 'right', va='center', rotation=270, fontsize=14) cb.ax.tick_params(labelsize=12) ax.text(0.1, 0.75, f'r = {radius} mm \ntheta = {angle_det} deg \nruntime {rt_min:.2f}', verticalalignment='bottom', horizontalalignment='left', transform=ax.transAxes, color='black', fontsize=12, bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 10}) # note: plotting the fit lines is only reliable if you used the same binning as when the fit was done! en_bin_centers = pgh.get_bin_centers(xedges) cal_en_bin_centers = pgh.get_bin_centers(yedges) z = (wf_max_fit_const*en_bin_centers + wf_max_fit_offset + 2.*np.sqrt(e_res_const[0]+e_res_const[1]*cal_en_bin_centers + (e_res_const[2]*cal_en_bin_centers**2))) plt.plot(en_bin_centers, z, 'r', lw = 0.7, label= 'cut lines') w = (wf_max_fit_const*en_bin_centers + wf_max_fit_offset - 2.*np.sqrt(e_res_const[0]+e_res_const[1]*cal_en_bin_centers + e_res_const[2]*cal_en_bin_centers**2)) plt.plot(en_bin_centers, w, 'r', lw=0.7) ax.set_xlabel('wf_max', fontsize=14) ax.set_ylabel('trapEftp_cal (keV)', fontsize=14) plt.setp(ax.get_xticklabels(), fontsize=12) plt.setp(ax.get_yticklabels(), fontsize=12) plt.title(f'wf_max vs Energy', horizontalalignment='center', fontsize=14) plt.legend(loc='lower right') plt.tight_layout() plt.ylim(0, 300) plt.xlim(0, 800) plt.savefig(f'./plots/{campaign}dataCleaning/N_minus_1/raw/{str(run)}/except_{cut_out}_wf_max_raw_lowE.png', dpi=200) plt.ylim(1200, 1550) plt.xlim(3300, 4300) plt.savefig(f'./plots/{campaign}dataCleaning/N_minus_1/raw/{str(run)}/except_{cut_out}_wf_max_raw_1460.png', dpi=200) plt.ylim(2400, 2750) plt.xlim(6600, 8000) plt.savefig(f'./plots/{campaign}dataCleaning/N_minus_1/raw/{str(run)}/except_{cut_out}_wf_max_raw_2615.png', dpi=200) plt.clf() plt.close() # ____________60 keV with fit________________________________________ pgfenergy_hist, pgfebins, evars = pgh.get_hist(df_cut['trapEftp_cal'], bins=50, range=[54, 65]) raw_pgfenergy_hist, pgfebins, evars = pgh.get_hist(df['trapEftp_cal'], bins=50, range=[54, 65])#range=[54, 65] pars, cov = pgf.gauss_mode_width_max(pgfenergy_hist, pgfebins, evars) mode = pars[0] width = pars[1] amp = pars[2] print(f'mode: {mode}') print(f'width: {width}') print(f'amp: {amp}') e_pars, ecov = pgf.fit_hist(cage_utils.gauss_fit_func, pgfenergy_hist, pgfebins, evars, guess = (amp, mode, width, 1)) mean_fit = e_pars[1] width_fit = e_pars[2] amp_fit = e_pars[0] const_fit = e_pars[3] fwhm = width_fit*2.355 print(f'mean: {mean_fit}') print(f'width: {width_fit}') print(f'amp: {amp_fit}') print(f'C: {const_fit}') print(f'FWHM at 60 keV: {fwhm} \n{(fwhm/mean_fit)*100}%') fig, ax = plt.subplots() fig.suptitle(suptitle, horizontalalignment='center', fontsize=14) plt.plot(pgfebins[1:], cage_utils.gauss_fit_func(pgfebins[1:], *e_pars), c = 'r', lw=0.8, label='gaussian fit') plt.plot(pgfebins[1:], pgfenergy_hist, ds='steps', c='b', lw=1., label='after cuts') plt.plot(pgfebins[1:], raw_pgfenergy_hist, ds='steps', c='k', alpha=0.3, lw=1., label='before cuts') plt.xlabel('Energy (keV)', fontsize=14) plt.ylabel('counts', fontsize=14) plt.title(f'60 keV peak with gaussian fit', fontsize = 14) plt.setp(ax.get_xticklabels(), fontsize=12) plt.setp(ax.get_yticklabels(), fontsize=12) ax.text(0.05, 0.75, f'r = {radius} mm \ntheta = {angle_det} deg \nruntime {rt_min:.2f}', verticalalignment='bottom', horizontalalignment='left', transform=ax.transAxes, color='black', fontsize=10, bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 8}) ax.text(0.95, 0.72, f'mean: {mean_fit:.2f} \nsigma: {width_fit:.3f} \nFWHM at 60 keV: {fwhm:.2f} keV\n({(fwhm/mean_fit)*100:.2f}%)', verticalalignment='bottom', horizontalalignment='right', transform=ax.transAxes, color='black', fontsize=10, bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 8}) plt.legend(loc='center right') plt.tight_layout() plt.savefig(f'./plots/{campaign}dataCleaning/N_minus_1/raw/{str(run)}/except_{cut_out}_fit_60keV_raw.png', dpi=200) plt.clf() plt.close()
def get_hpge_E_peak_par_guess(hist, bins, var, func): """ Get parameter guesses for func fit to peak in hist Parameters ---------- hist, bins, var: array, array, array Histogram of uncalibrated energies, see pgh.get_hist(). Should be windowed around the peak. func : function The function to be fit to the peak in the (windowed) hist """ if func == pgp.gauss_step: # pars are: amp, mu, sigma, bkg, step # get mu and hieght from a gaus fit pars, cov = pgf.gauss_mode_max(hist, bins, var) if pars is None: print("get_hpge_E_peak_par_guess: gauss_mode_max failed") return [] mu = pars[0] height = pars[1] # get bg and step from edges of hist bg = np.sum(hist[-5:])/5 step = np.sum(hist[:5])/5 - bg # get sigma from fwfm with f = 1/sqrt(e) try: sigma = pgh.get_fwfm(0.6065, hist, bins, var, mx=height, bl=bg+step/2, method='interpolate')[0] if sigma == 0: raise ValueError except: sigma = pgh.get_fwfm(0.6065, hist, bins, var, mx=height, bl=bg+step/2, method='fit_slopes')[0] if sigma == 0: print("get_hpge_E_peak_par_guess: sigma estimation failed") return [] # now compute amp and return height -= (bg + step/2) amp = height * sigma * np.sqrt(2 * np.pi) return [amp, mu, sigma, bg, step] if func == pgp.radford_peak: # pars are: mu, sigma, hstep, htail, tau, bg0, amp #guess mu, height i_0 = np.argmax(hist) mu = pgh.get_bin_centers(bins)[i_0] height = hist[i_0] # get bg and step from edges of hist bg0 = np.sum(hist[-5:])/5 step = np.sum(hist[:5])/5 - bg0 # get sigma from fwfm with f = 1/sqrt(e) try: sigma = pgh.get_fwfm(0.6065, hist, bins, var, mx=height, bl=bg0+step/2, method='interpolate')[0] if sigma == 0: raise ValueError except: sigma = pgh.get_fwfm(0.6065, hist, bins, var, mx=height, bl=bg0+step/2, method='fit_slopes')[0] if sigma == 0: print("get_hpge_E_peak_par_guess: sigma estimation failed") return [] sigma = sigma*.5 # roughly remove some amount due to tail # for now hard-coded htail = 1./5 tau = 6.*sigma # now compute amp and return height -= (bg0 + step/2) amp = height / (htail*0.87/35 + (1-htail)/(sigma*np.sqrt(2*np.pi))) #numerical factors from definition of tail_func @ mu hstep = step/(2*amp) parguess = [mu, sigma, hstep, htail, tau, bg0, amp] return parguess if func == pgp.radford_peak_wrapped: # pars are: mu, sigma, hstep, htail, tau, bg0, amp # get mu and height from a gaus fit #pars, cov = pgf.gauss_mode_max(hist, bins, var) #guess mu, height i_0 = np.argmax(hist) mu = pgh.get_bin_centers(bins)[i_0] height = hist[i_0] # get bg and step from edges of hist bg0 = np.sum(hist[-5:])/5 step = np.sum(hist[:5])/5 - bg0 # get sigma from fwfm with f = 1/sqrt(e) try: sigma = pgh.get_fwfm(0.6065, hist, bins, var, mx=height, bl=bg0+step/2, method='interpolate')[0] if sigma == 0: raise ValueError except: sigma = pgh.get_fwfm(0.6065, hist, bins, var, mx=height, bl=bg0+step/2, method='fit_slopes')[0] if sigma == 0: print("get_hpge_E_peak_par_guess: sigma estimation failed") return [] sigma = sigma*.5 # roughly remove some amount due to tail # for now hard-coded htail = 1./5 tau = 6.*sigma # now compute amp and return height -= (bg0 + step/2) amp = height / (htail*0.87/35 + (1-htail)/(sigma*np.sqrt(2*np.pi))) #numerical factors from definition of tail_func @ mu hstep = step/(2*amp) # convert to wrapped parameters A = amp*(1-htail) S = amp*2*hstep T = amp*htail parguess = [A, mu, sigma, bg0, S, T, tau] return parguess else: print(f'get_hpge_E_peak_par_guess not implemented for {func.__name__}') return []
def calibrate_tl208(energy_series, cal_peaks=None, plotFigure=None): """ energy_series: array of energies we want to calibrate cal_peaks: array of peaks to fit 1.) we find the 2614 peak by looking for the tallest peak at >0.1 the max adc value 2.) fit that peak to get a rough guess at a calibration to find other peaks with 3.) fit each peak in peak_energies 4.) do a linear fit to the peak centroids to find a calibration """ if cal_peaks is None: cal_peaks = np.array( [238.632, 510.770, 583.191, 727.330, 860.564, 2614.553]) #get_calibration_energies(peak_energies) else: cal_peaks = np.array(cal_peaks) if len(energy_series) < 100: return 1, 0 #get 10 most prominent ~high e peaks max_adc = np.amax(energy_series) energy_hi = energy_series #[ (energy_series > np.percentile(energy_series, 20)) & (energy_series < np.percentile(energy_series, 99.9))] peak_energies, peak_e_err = get_most_prominent_peaks(energy_hi,) rough_kev_per_adc, rough_kev_offset = match_peaks(peak_energies, cal_peaks) e_cal_rough = rough_kev_per_adc * energy_series + rough_kev_offset # return rough_kev_per_adc, rough_kev_offset # print(energy_series) # plt.ion() # plt.figure() # # for peak in cal_peaks: # # plt.axvline(peak, c="r", ls=":") # # energy_series.hist() # # for peak in peak_energies: # # plt.axvline(peak, c="r", ls=":") # # # plt.hist(energy_series) # # plt.hist(e_cal_rough[e_cal_rough>100], bins=2700) # val = input("do i exist?") # exit() ############################################### #Do a real fit to every peak in peak_energies ############################################### max_adc = np.amax(energy_series) peak_num = len(cal_peaks) centers = np.zeros(peak_num) fit_result_map = {} bin_size = 0.2 #keV if plotFigure is not None: plot_map = {} for i, energy in enumerate(cal_peaks): window_width = 10 #keV window_width_in_adc = (window_width) / rough_kev_per_adc energy_in_adc = (energy - rough_kev_offset) / rough_kev_per_adc bin_size_adc = (bin_size) / rough_kev_per_adc peak_vals = energy_series[ (energy_series > energy_in_adc - window_width_in_adc) & (energy_series < energy_in_adc + window_width_in_adc)] peak_hist, bins = np.histogram( peak_vals, bins=np.arange(energy_in_adc - window_width_in_adc, energy_in_adc + window_width_in_adc + bin_size_adc, bin_size_adc)) bin_centers = pgh.get_bin_centers(bins) # plt.ion() # plt.figure() # plt.plot(bin_centers,peak_hist, color="k", ls="steps") # inpu = input("q to quit...") # if inpu == "q": exit() try: guess_e, guess_sigma, guess_area = get_gaussian_guess( peak_hist, bin_centers) except IndexError: print("\n\nIt looks like there may not be a peak at {} keV".format( energy)) print("Here is a plot of the area I'm searching for a peak...") plt.ion() plt.figure(figsize=(12, 6)) plt.subplot(121) plt.plot(bin_centers, peak_hist, color="k", ls="steps") plt.subplot(122) plt.hist(e_cal_rough, bins=2700, histtype="step") input("-->press any key to continue...") sys.exit() plt.plot( bin_centers, gauss(bin_centers, guess_e, guess_sigma, guess_area), color="b") # inpu = input("q to quit...") # if inpu == "q": exit() bounds = ([0.9 * guess_e, 0.5 * guess_sigma, 0, 0, 0, 0, 0], [ 1.1 * guess_e, 2 * guess_sigma, 0.1, 0.75, window_width_in_adc, 10, 5 * guess_area ]) params = fit_binned( radford_peak, peak_hist, bin_centers, [guess_e, guess_sigma, 1E-3, 0.7, 5, 0, guess_area], ) #bounds=bounds) plt.plot(bin_centers, radford_peak(bin_centers, *params), color="r") # inpu = input("q to quit...") # if inpu == "q": exit() fit_result_map[energy] = params centers[i] = params[0] if plotFigure is not None: plot_map[energy] = (bin_centers, peak_hist) #Do a linear fit to find the calibration linear_cal = np.polyfit(centers, cal_peaks, deg=1) if plotFigure is not None: plt.figure(plotFigure.number) plt.clf() grid = gs.GridSpec(peak_num, 3) ax_line = plt.subplot(grid[:, 1]) ax_spec = plt.subplot(grid[:, 2]) for i, energy in enumerate(cal_peaks): ax_peak = plt.subplot(grid[i, 0]) bin_centers, peak_hist = plot_map[energy] params = fit_result_map[energy] ax_peak.plot( bin_centers * rough_kev_per_adc + rough_kev_offset, peak_hist, ls="steps-mid", color="k") fit = radford_peak(bin_centers, *params) ax_peak.plot( bin_centers * rough_kev_per_adc + rough_kev_offset, fit, color="b") ax_peak.set_xlabel("Energy [keV]") ax_line.scatter( centers, cal_peaks, ) x = np.arange(0, max_adc, 1) ax_line.plot(x, linear_cal[0] * x + linear_cal[1]) ax_line.set_xlabel("ADC") ax_line.set_ylabel("Energy [keV]") energies_cal = energy_series * linear_cal[0] + linear_cal[1] peak_hist, bins = np.histogram(energies_cal, bins=np.arange(0, 2700)) ax_spec.semilogy(pgh.get_bin_centers(bins), peak_hist, ls="steps-mid") ax_spec.set_xlabel("Energy [keV]") return linear_cal