Esempio n. 1
0
def calibrate_pass3(ds, df,etype="e_ftp", write_db=False, display=False, linfit = True):

    """
    This is the calibration method I used for HADES ICPC characterization  
    
    You have to look at the raw spectrum for one dataset once. One dataset implies:
 
     - One detector setup
     - One source
     - One daq-setting 

    From the raw spectrum at least two raw lines of your choice (i.e. 208Tl, 40K, Bi, etc) and determin an range 
    around the lines have to be selected and added to a config file (to the runDB.json in the HADES work)
    under "pass3_peaks" and "pass3_lim".
    (Note that you also need the literature values in e.g. "cal_peaks" ) 

    This calibration function will get the raw peaks by constructing a histogram around the raw values 
    ,perfom a fit and devide the literature value by the output (c = lit/out). 
    Then plot all received cal values vs energy and do a poly1 fit to extract the energy dependence (a*x+b)
    The fit function can also be of other type (quadratic, sqrt-like or else if needed, default = linfit)

    The calibrated energy is then ecal = eraw * (eraw *a +b)  
  

    A.Zschocke    
    """

    etype = "e_ftp"

    # get the list of peaks we want  
    epeaks = np.array(sorted(ds.runDB["pass3_peaks"]))
    e_lim = ds.runDB["pass3_lim"]
    true_peaks = sorted(np.array(ds.runDB["cal_peaks"]))

    # get the raw energy 
    ene = df[etype]
    means = []
    cals = []
    # do the firts loop over all lines of interest
    for i, peak in enumerate(epeaks):

        xlo, xhi, xpb = peak - e_lim, peak + e_lim, 0.25
        nb = int((xhi-xlo)/xpb)
        hE, xE, vE = ph.get_hist(ene, range=(xlo, xhi), dx=xpb)
        guess = [100000,peak,0.7,1000]
        guess_lims = ([0,peak -20,0,0],[1e9,peak+20,1e9,1e9])

        xF, xF_cov = pf.fit_hist(pf.gauss_bkg, hE, xE, var=np.ones(len(hE)), guess=guess, bounds=guess_lims)
        fit = pf.gauss_bkg(xE,*xF)

        if display:
           plt.plot(xE[1:],hE,ls='steps',lw='1.5',c='b')
           plt.plot(xE,fit,'r')
           plt.show()

        mean = xF[1]
        means.append(mean)
        cals.append(true_peaks[i]/mean)

    # now calculate the energy dependence
    cals = np.array(cals)
    means = np.array(means)

    if linfit:
       xF = np.polyfit(means,cals,1)
       pfit = means *xF[0] + xF[1]
       cal_peaks = means*(means*xF[0]+xF[1])

    else:
       xF, xF_coev = curve_fit(pf.cal_slope, epeaks, cals)
       pfit = pf.cal_slope(epeaks,*xF)
       cal_peaks = means*(np.sqrt(xF[0]+(xF[1]/means**2)))

    print(f"Calibration values:\n a={pfit[0]:.5f} b={pfit[1]:.5f}")

    residuals = abs((cal_peaks-true_peaks))#/true_peaks*100

    if any(residuals > 1):
       r = residuals[np.where(residuals > 0)]
       print("\nWaning! No proper calibration\nThere is a deviation of",r, "%")


    if display:

       meta_dir = os.path.expandvars(ds.runDB["meta_dir"])
       runNum = ds.ds_list[0]
       x = np.arange(1,60000,1)

       if linfit:
          e_cal = ene * (ene * xF[0] + xF[1])
          pfit = x * xF[0] + xF[1]
       else:
          e_cal = ene *np.sqrt(xF[0] +(xF[1]/(ene**2)))
          pfit = pf.cal_slope(x,*xF)

       hE, xE, vE = ph.get_hist(e_cal, range=(0, 3000), dx=1)

        x = np.arange(1,60000,1)

       plt.plot(epeaks, cals, 'kx', ms=10, label='calibration values')
       plt.plot(x, pfit,'r',label='Fit')
       plt.xlim(0,60010)
       plt.xlabel("raw Energy")
       plt.ylabel("cal. value")
       plt.legend()
       plt.savefig(meta_dir+"/calVals_pass3_" + str(runNum)+".png")
       plt.show()

       hE, xE, vE = ph.get_hist(e_cal, range=(0, 3000), dx=1)
       plt.figure(1,(12.00,10.00))
       plt.subplot(211)
       plt.semilogy(xE[1:], hE, ls='steps', lw=1.5,c='b')
       plt.xlabel("Energy [keV]")
       plt.xlim(-10,3100)
       plt.subplot(212)
       plt.plot(true_peaks, residuals, 'kx',ms=10,label='Residuals')
       plt.grid()
       plt.xlabel("Energy [keV]")
       plt.ylabel("Residuals [%]")
       plt.xlim(-10,3100)
       plt.legend()
       plt.savefig(meta_dir+"/calibratedSpectrum_pass3_" + str(runNum)+".png")
       plt.show()
Esempio n. 2
0
def peakfit_group(df_group, config, db_ecal):
    """
    """
    # get list of peaks to look for
    epeaks = config['expected_peaks'] + config['test_peaks']
    epeaks = np.array(sorted(epeaks))

    # right now a lookup by 'run' is hardcoded.
    # in principle the lookup should stay general using the gb_cols,
    # but it's kind of hard to see right now how to write the right db queries
    gb_run = df_group['run'].unique()
    if len(gb_run) > 1:
        print("Multi-run (or other) groupbys aren't supported yet, sorry")
        exit()

    # load data
    lh5_dir = os.path.expandvars(config['lh5_dir'])
    dsp_list = lh5_dir + df_group['dsp_path'] + '/' + df_group['dsp_file']
    raw_data = lh5.load_nda(dsp_list, config['rawe'], config['input_table'])
    runtime_min = df_group['runtime'].sum()

    # loop over energy estimators of interest
    pf_results = {}
    for et in config['rawe']:

        # load first-guess calibration constant from its table in the DB
        db_table = db_ecal.table(f'peakdet_{et}').all()
        df_cal = pd.DataFrame(db_table)
        lin_cal = df_cal.loc[df_cal.run == str(gb_run[0])]['lincal'].values[0]
        cal_data = raw_data[et] * lin_cal

        # compute expected peak locations and widths (fit to Gaussians)
        fit_results = {}
        for ie, epk in enumerate(epeaks):

            # adjust the window.  resolution goes as roughly sqrt(energy)
            window = np.sqrt(epk) * 0.5
            xlo, xhi = epk - window / 2, epk + window / 2
            nbins = int(window) * 5
            xpb = (xhi - xlo) / nbins
            ibin_bkg = int(nbins * 0.2)

            # get histogram, error, normalize by runtime
            pk_data = cal_data[(cal_data >= xlo) & (cal_data <= xhi)]
            hist, bins, _ = pgh.get_hist(pk_data, range=(xlo, xhi), dx=xpb)
            hist_norm = np.divide(hist, runtime_min * 60)
            hist_var = np.array(
                [np.sqrt(h / (runtime_min * 60)) for h in hist])

            # compute expected peak location and width (simple Gaussian)
            bkg0 = np.mean(hist_norm[:ibin_bkg])
            b, h = bins[1:], hist_norm - bkg0
            imax = np.argmax(h)
            upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h) / 2))][0]
            bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h) / 2))][-1]
            fwhm = upr_half - bot_half
            sig0 = fwhm / 2.355
            amp0 = np.amax(h) * fwhm
            p_init = [amp0, bins[imax], sig0, bkg0]  # a, mu, sigma, bkg
            p_fit, p_cov = pgf.fit_hist(pgf.gauss_bkg,
                                        hist_norm,
                                        bins,
                                        var=hist_var,
                                        guess=p_init)
            p_err = np.sqrt(np.diag(p_cov))

            # diagnostic plot, don't delete
            if config['show_plot']:
                plt.axvline(bins[ibin_bkg], c='m', label='bkg region')
                xfit = np.arange(xlo, xhi, xpb * 0.1)
                plt.plot(xfit,
                         pgf.gauss_bkg(xfit, *p_init),
                         '-',
                         c='orange',
                         label='init')
                plt.plot(xfit,
                         pgf.gauss_bkg(xfit, *p_fit),
                         '-',
                         c='red',
                         label='fit')
                plt.plot(bins[1:], hist_norm, c='b', lw=1.5, ds='steps')
                plt.xlabel('pass-1 energy (kev)', ha='right', x=1)
                plt.legend(fontsize=12)
                plt.show()
                plt.close()

            # goodness of fit
            chisq = []
            for i, h in enumerate(hist_norm):
                model = pgf.gauss_bkg(b[i], *p_fit)
                diff = (model - h)**2 / model
                chisq.append(abs(diff))
            rchisq = sum(np.array(chisq) / len(hist_norm))
            # fwhm_err = p_err[1] * 2.355 * e_peak / e_fit

            # collect interesting results for this row
            fit_results[ie] = {
                'epk': epk,
                'mu': p_fit[1],
                'fwhm': p_fit[2] * 2.355,
                'sig': p_fit[2],
                'amp': p_fit[0],
                'bkg': p_fit[3],
                'rchisq': rchisq,
                'mu_raw': p_fit[1] / lin_cal,  # <-- this is in terms of raw E
                'mu_unc': p_err[1] / lin_cal
            }

        # ----------------------------------------------------------------------
        # compute energy calibration by matrix inversion (thanks Tim and Jason!)

        view_cols = ['epk', 'mu', 'fwhm', 'bkg', 'rchisq', 'mu_raw']
        df_fits = pd.DataFrame(fit_results).T
        print(df_fits[view_cols])

        true_peaks = df_fits['epk']
        raw_peaks, raw_error = df_fits['mu_raw'], df_fits['mu_unc']

        error = raw_error / raw_peaks * true_peaks
        cov = np.diag(error**2)
        weights = np.diag(1 / error**2)

        degree = config['pol_order']
        raw_peaks_matrix = np.zeros((len(raw_peaks), degree + 1))
        for i, pk in enumerate(raw_peaks):
            temp_degree = degree
            row = np.array([])
            while temp_degree >= 0:
                row = np.append(row, pk**temp_degree)
                temp_degree -= 1
            raw_peaks_matrix[i] += row
        print(raw_peaks_matrix)

        # perform matrix inversion
        xTWX = np.dot(np.dot(raw_peaks_matrix.T, weights), raw_peaks_matrix)
        xTWY = np.dot(np.dot(raw_peaks_matrix.T, weights), true_peaks)
        if np.linalg.det(xTWX) == 0:
            print("singular matrix, determinant is 0, can't get cal constants")
            exit()
        xTWX_inv = np.linalg.inv(xTWX)

        # get polynomial coefficients and error
        cal_pars = np.dot(xTWX_inv, xTWY)
        cal_errs = np.sqrt(np.diag(xTWX_inv))
        n = len(cal_pars)
        print(f'Fit:', ' '.join([f'p{i}:{cal_pars[i]:.4e}' for i in range(n)]))
        print(f'Unc:', ' '.join([f'p{i}:{cal_errs[i]:.4e}' for i in range(n)]))

        # ----------------------------------------------------------------------
        # repeat the peak fit with the calibrated energy (affects widths)

        # compute calibrated energy
        pol = np.poly1d(cal_pars)  # handy numpy polynomial object
        cal_data = pol(raw_data[et])

        fit_results = {}
        for ie, epk in enumerate(epeaks):

            # adjust the window.  resolution goes as roughly sqrt(energy)
            window = np.sqrt(epk) * 0.5
            xlo, xhi = epk - window / 2, epk + window / 2
            nbins = int(window) * 5
            xpb = (xhi - xlo) / nbins
            ibin_bkg = int(nbins * 0.2)

            # get histogram, error, normalize by runtime
            pk_data = cal_data[(cal_data >= xlo) & (cal_data <= xhi)]
            hist, bins, _ = pgh.get_hist(pk_data, range=(xlo, xhi), dx=xpb)
            hist_norm = np.divide(hist, runtime_min * 60)
            hist_var = np.array(
                [np.sqrt(h / (runtime_min * 60)) for h in hist])

            # compute expected peak location and width (simple Gaussian)
            bkg0 = np.mean(hist_norm[:ibin_bkg])
            b, h = bins[1:], hist_norm - bkg0
            imax = np.argmax(h)
            upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h) / 2))][0]
            bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h) / 2))][-1]
            fwhm = upr_half - bot_half
            sig0 = fwhm / 2.355
            amp0 = np.amax(h) * fwhm
            p_init = [amp0, bins[imax], sig0, bkg0]  # a, mu, sigma, bkg
            p_fit, p_cov = pgf.fit_hist(pgf.gauss_bkg,
                                        hist_norm,
                                        bins,
                                        var=hist_var,
                                        guess=p_init)
            p_err = np.sqrt(np.diag(p_cov))

            # save results
            fit_results[ie] = {
                'epk': epk,
                'mu': p_fit[1],
                'fwhm': p_fit[2] * 2.355,
                'sig': p_fit[2],
                'amp': p_fit[0],
                'bkg': p_fit[3],
            }

        # consolidate results again
        view_cols = ['epk', 'mu', 'fwhm', 'residual']
        df_fits = pd.DataFrame(fit_results).T

        # compute the difference between lit and measured values
        cal_peaks = pol(raw_peaks)
        df_fits['residual'] = true_peaks - cal_peaks
        print(df_fits[view_cols])

        # fit fwhm vs. energy
        # FWHM(E) = sqrt(A_noise^2 + A_fano^2 * E + A_qcol^2 E^2)
        # Ref: Eq. 3 of https://arxiv.org/abs/1902.02299
        # TODO: fix error handling
        def sqrt_fwhm(x, a_n, a_f, a_c):
            return np.sqrt(a_n**2 + a_f**2 * x + a_c**2 * x**2)

        p_guess = [0.3, 0.05, 0.001]
        p_fit, p_cov = curve_fit(
            sqrt_fwhm, df_fits['mu'], df_fits['fwhm'],
            p0=p_guess)  #, sigma = np.sqrt(h), absolute_sigma=True)
        p_err = np.sqrt(np.diag(p_cov))

        if config['show_plot']:

            # show a split figure with calibrated spectrum + used peaks on top,
            # and calib.function and resolution vs. energy on bottom
            fig, (p0, p1) = plt.subplots(2, 1, figsize=(8, 8), sharex=True)
            # gridspec_kw={'height_ratios':[2, 1]}))

            # get histogram (cts / keV / d)
            xlo, xhi, xpb = config['cal_range']
            hist, bins, _ = pgh.get_hist(cal_data, range=(xlo, xhi), dx=xpb)
            hist_norm = np.divide(hist, runtime_min * 60 * xpb)

            # show peaks
            cmap = plt.cm.get_cmap('brg', len(df_fits) + 1)
            for i, row in df_fits.iterrows():

                # get a pretty label for the isotope
                lbl = config['pks'][str(row['epk'])]
                iso = ''.join(r for r in re.findall('[0-9]+', lbl))
                ele = ''.join(r for r in re.findall('[a-z]', lbl, re.I))
                pk_lbl = r'$^{%s}$%s' % (iso, ele)

                pk_diff = row['epk'] - row['mu']
                p0.axvline(row['epk'],
                           ls='--',
                           c=cmap(i),
                           lw=1,
                           label=f"{pk_lbl} : {row['epk']} + {pk_diff:.3f}")

            p0.semilogy(bins[1:], hist_norm, ds='steps', c='b', lw=1)

            p0.set_ylabel('cts / s / keV', ha='right', y=1)
            p0.legend(loc=3, fontsize=11)

            # TODO: add fwhm errorbar
            x_fit = np.arange(xlo, xhi, xpb)
            y_init = sqrt_fwhm(x_fit, *p_guess)
            p1.plot(x_fit, y_init, '-', lw=1, c='orange', label='guess')

            y_fit = sqrt_fwhm(x_fit, *p_fit)
            a_n, a_f, a_c = p_fit
            fit_label = r'$\sqrt{(%.2f)^2 + (%.3f)^2 E + (%.4f)^2  E^2}$' % (
                a_n, a_f, a_c)
            p1.plot(x_fit, y_fit, '-r', lw=1, label=f'fit: {fit_label}')

            p1.plot(df_fits['mu'], df_fits['fwhm'], '.b')

            p1.set_xlabel('Energy (keV)', ha='right', x=1)
            p1.set_ylabel('FWHM (keV)', ha='right', y=1)
            p1.legend(fontsize=11)

            if config['batch_mode']:
                plt.savefig('./plots/peakdet_test.png')
            else:
                plt.show()

        # the order of the polynomial should be in the table name
        pf_results[f'{et}_Anoise'] = p_fit[0]
        pf_results[f'{et}_Afano'] = p_fit[1]
        pf_results[f'{et}_Aqcol'] = p_fit[2]
        for i in range(len(cal_pars)):
            pf_results[f'{et}_cal{i}'] = cal_pars[i]
        for i in range(len(cal_pars)):
            pf_results[f'{et}_unc{i}'] = cal_errs[i]

    return pd.Series(pf_results)
Esempio n. 3
0
    def analyze_pulser_run(df_row):
        """
        loop over each row of dfp and save the superpulse
        """
        epk, rt, vp, cyc = df_row[['E_keV', 'runtime', 'V_pulser', 'cycle']]
        rt *= 60  # sec
        if epk == 0: return []  # skip the bkg run

        # load pulser energies
        f_dsp = dg.lh5_dir + '/' + df_row.dsp_path + '/' + df_row.dsp_file
        pdata = lh5.load_nda([f_dsp], ['energy'], dsp_name)['energy'] * ecal

        # auto-narrow the window around the max pulser peak in two steps
        elo, ehi, epb = epk - 50, epk + 50, 0.5
        pdata_all = pdata[(pdata > elo) & (pdata < ehi)]
        hp, bp, _ = pgh.get_hist(pdata_all, range=(elo, ehi), dx=epb)
        pctr = bp[np.argmax(hp)]

        plo, phi, ppb = pctr - e_window, pctr + e_window, 0.1
        pdata_pk = pdata[(pdata > plo) & (pdata < phi)]
        hp, bp, _ = pgh.get_hist(pdata_pk, range=(plo, phi), dx=ppb)
        hp_rt = np.divide(hp, rt)
        hp_var = np.array([np.sqrt(h / (rt)) for h in hp])

        # fit a gaussian to get 1 sigma e-values
        ibin_bkg = 50
        bkg0 = np.mean(hp_rt[:ibin_bkg])
        b, h = bp[1:], hp_rt
        imax = np.argmax(h)
        upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h) / 2))][0]
        bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h) / 2))][-1]
        fwhm = upr_half - bot_half
        sig0 = fwhm / 2.355
        amp0 = np.amax(hp_rt) * fwhm
        p_init = [amp0, bp[imax], sig0, bkg0]
        p_fit, p_cov = pgf.fit_hist(pgf.gauss_bkg,
                                    hp_rt,
                                    bp,
                                    var=hp_var,
                                    guess=p_init)
        amp, mu, sigma, bkg = p_fit

        # select events within 1 sigma of the maximum
        # and pull the waveforms from the raw file to make a superpulse.
        idx = np.where((pdata >= mu - sigma) & (pdata <= mu + sigma))
        print(
            f'Pulser at {epk} keV, {len(idx[0])} events.  Limiting to {nwfs}.')
        if len(idx[0]) > nwfs:
            idx = idx[0][:nwfs]

        # grab the 2d numpy array of pulser wfs
        n_rows = idx[-1] + 1  # read up to this event and stop
        f_raw = dg.lh5_dir + '/' + df_row.raw_path + '/' + df_row.raw_file
        tb_wfs, n_wfs = sto.read_object(raw_name, f_raw, n_rows=n_rows)
        pwfs = tb_wfs['values'].nda[idx, :]
        # print(idx, len(idx), pwfs.shape, '\n', pwfs)

        # data cleaning step: remove events with outlier baselines
        bl_means = pwfs[:, :500].mean(axis=1)
        bl_mode = mode(bl_means.astype(int))[0][0]
        bl_ctr = np.subtract(bl_means, bl_mode)
        idx_dc = np.where(np.abs(bl_ctr) < bl_thresh)
        pwfs = pwfs[idx_dc[0], :]
        bl_means = bl_means[idx_dc]
        # print(pwfs.shape, bl_means.shape)

        # baseline subtract (trp when leading (not trailing) dim is the same)
        wfs = (pwfs.transpose() - bl_means).transpose()

        # time-align all wfs at their 50% timepoint (tricky!).
        # adapted from pygama/sandbox/old_dsp/[calculators,transforms].py
        # an alternate approach would be to use ProcessingChain here
        wf_maxes = np.amax(wfs, axis=1)
        timepoints = np.argmax(wfs >= wf_maxes[:, None] * tp_align, axis=1)
        wf_idxs = np.zeros([wfs.shape[0], n_pre + n_post], dtype=int)
        row_idxs = np.zeros_like(wf_idxs)
        for i, tp in enumerate(timepoints):
            wf_idxs[i, :] = np.arange(tp - n_pre, tp + n_post)
            row_idxs[i, :] = i
        wfs = wfs[row_idxs, wf_idxs]

        # take the average to get the superpulse
        superpulse = np.mean(wfs, axis=0)

        # normalize all wfs to the superpulse maximum
        wfmax, tmax = np.amax(superpulse), np.argmax(superpulse)
        superpulse = np.divide(superpulse, wfmax)
        wfs = np.divide(wfs, wfmax)

        # -- plot results --
        if show_plots:
            fig, (p0, p1) = plt.subplots(2, figsize=(7, 8))

            # plot fit result (top), and waveforms + superpulse (bottom)
            xfit = np.arange(plo, phi, ppb * 0.1)
            p0.plot(xfit,
                    pgf.gauss_bkg(xfit, *p_init),
                    '-',
                    c='orange',
                    label='init')
            p0.plot(xfit,
                    pgf.gauss_bkg(xfit, *p_fit),
                    '-',
                    c='red',
                    label='fit')

            # plot 1 sigma window
            p0.axvspan(mu - sigma,
                       mu + sigma,
                       color='m',
                       alpha=0.2,
                       label='1 sigma')

            # plot data
            p0.plot(bp[1:],
                    hp_rt,
                    ds='steps',
                    c='k',
                    lw=1,
                    label=f'{vp:.2f} V')
            p0.set_xlabel(f'onboard energy (keV, c={ecal:.2e})',
                          ha='right',
                          x=1)
            p0.set_ylabel('cts / s', ha='right', y=1)
            p0.legend(fontsize=10)

            # plot individ. wfs
            ts = np.arange(0, len(wfs[0, :]))
            for iwf in range(wfs.shape[0]):
                p1.plot(ts, wfs[iwf, :], '-k', lw=2, alpha=0.5)
            p1.plot(np.nan, np.nan, '-k', label=f'wfs, {epk:.0f} keV')

            # plot superpulse
            p1.plot(ts,
                    superpulse,
                    '-r',
                    lw=2,
                    label=f'superpulse, {vp:.2f} V')

            p1.set_xlabel('time (10 ns)', ha='right', x=1)
            p1.set_ylabel('amplitude', ha='right', y=1)
            p1.legend(fontsize=10)
            # plt.show()
            plt.savefig(f'./plots/superpulse_cyc{cyc}.png', dpi=150)
            plt.cla()

        # save the superpulse to our output file
        return superpulse