Beispiel #1
0
    def runtime_cycle(df_row):

        # load raw file path (with {these} in it)
        f_raw = f'{dg.lh5_dir}/{df_row.raw_path}/{df_row.raw_file}'
        f_raw = f_raw.format_map({'sysn':'geds'})

        # always look for Ge
        f_key = df_row.raw_file.format_map({'sysn':'geds'})
        if not os.path.exists(f_raw):
            # print(f'no Ge data: {f_key}')
            return pd.Series({'runtime':0, 'rt_std':0})

        # for PGT, compare the first three channels (for redundancy)
        rts = []
        ge_groups = sto.ls(f_raw)
        for ge in ge_groups[:3]:
            ts = lh5.load_nda([f_raw], ['timestamp'], ge+'/raw/')['timestamp']
            rts.append(ts[-1])

        # take largest value & compute uncertainty
        runtime = max(rts) / 60
        rt_std = np.std(np.array([rts]))
        # print(f_key, runtime, rt_std)

        return pd.Series({'runtime':runtime, 'rt_std':rt_std})
Beispiel #2
0
def show_cal_spectrum(dg):
    """
    apply calibration to dsp file
    """
    # get file list and load energy data (numpy array)
    lh5_dir = os.path.expandvars(dg.config['lh5_dir'])
    dsp_list = lh5_dir + dg.fileDB['dsp_path'] + '/' + dg.fileDB['dsp_file']
    edata = lh5.load_nda(dsp_list, ['trapEmax'],
                         'ORSIS3302DecoderForEnergy/dsp')
    rt_min = dg.fileDB['runtime'].sum()
    u_start = dg.fileDB.iloc[0]['startTime']
    t_start = pd.to_datetime(u_start, unit='s')  # str
    print('Found energy data:', [(et, len(ev)) for et, ev in edata.items()])
    print(f'Runtime (min): {rt_min:.2f}')

    # load calibration from peakfit
    cal_db = db.TinyDB(storage=MemoryStorage)
    with open('ecalDB.json') as f:
        raw_db = json.load(f)
        cal_db.storage.write(raw_db)
    runs = dg.fileDB.run.unique()
    if len(runs) > 1:
        print("sorry, I can't do combined runs yet")
        exit()
    run = runs[0]
    tb = cal_db.table("peakfit_trapEmax").all()
    df_cal = pd.DataFrame(tb)
    df_cal['run'] = df_cal['run'].astype(int)
    df_run = df_cal.loc[df_cal.run == run]
    cal_pars = df_run.iloc[0][['cal0', 'cal1', 'cal2']]

    # compute calibrated energy
    pol = np.poly1d(cal_pars)  # handy numpy polynomial object
    cal_data = pol(edata['trapEmax'])

    elo, ehi, epb, etype = 0, 3000, 1, 'trapEmax_cal'  # gamma region
    elo, ehi, epb, etype = 2500, 8000, 10, 'trapEmax_cal'  # overflow region
    # elo, ehi, epb, etype = 0, 250, 1, 'trapEmax_cal' # low-e region

    hist, bins, _ = pgh.get_hist(cal_data, range=(elo, ehi), dx=epb)

    # normalize by runtime
    hist_rt = np.divide(hist, rt_min * 60)

    plt.plot(np.nan, np.nan, '-w', lw=1, label=f'start: {t_start}')

    plt.plot(bins[1:],
             hist_rt,
             ds='steps',
             c='b',
             lw=1,
             label=f'{etype}, {rt_min:.2f} mins')

    plt.xlabel(etype, ha='right', x=1)
    plt.ylabel('cts / sec', ha='right', y=1)
    plt.legend(loc=1, fontsize=12)
    plt.tight_layout()
    plt.savefig('./plots/CalSpectrum.png')
Beispiel #3
0
def check_raw_spectrum(dg, config, db_ecal):
    """
    $ ./energy_cal.py -q 'query' --raw
    """
    # load energy data
    dsp_list = config['lh5_dir'] + dg.fileDB['dsp_path'] + '/' + dg.fileDB[
        'dsp_file']
    raw_data = lh5.load_nda(dsp_list,
                            config['rawe'],
                            config['input_table'],
                            verbose=False)
    runtime_min = dg.fileDB['runtime'].sum()

    print('\nShowing raw spectra ...')
    for etype in config['rawe']:
        xlo, xhi, xpb = config['init_vals'][etype]["raw_range"]

        # load energy data for this estimator
        data = raw_data[etype]

        # print columns of table
        file_info = db_ecal.table('_file_info').all()[0]
        tb_in = file_info['input_table']
        with h5py.File(dsp_list.iloc[0], 'r') as hf:
            print("LH5 columns:", list(hf[f'{tb_in}'].keys()))

        # generate histogram
        hist, bins, var = pgh.get_hist(data, range=(xlo, xhi), dx=xpb)
        bins = bins[1:]  # trim zero bin, not needed with ds='steps'

        # normalize by runtime
        hist_rt = np.divide(hist, runtime_min * 60)

        print(
            '\nPlease determine the following parameters for ecal config file:\n'
            "  - 'raw_range': Optimal binning, and hi/lo raw energy limits\n"
            "  - 'peakdet_thresh': ~1/2 the height of a target peak\n"
            "  - 'lowe_cut' energy threshold for peak detection")

        print(
            f'\nRaw E: {etype}, {len(data)} cts, runtime: {runtime_min:.2f} min'
        )

        plt.semilogy(bins, hist_rt, ds='steps', c='b', lw=1, label=etype)
        plt.xlabel(etype, ha='right', x=1)
        plt.ylabel(f'cts/sec, {xpb}/bin', ha='right', y=1)

        if config['batch_mode']:
            plt.savefig('./plots/energy_cal/cal_spec_test.png')
        else:
            plt.show()
        plt.close()
Beispiel #4
0
def show_raw_spectrum(dg):
    """
    show spectrum w/ onbd energy and trapE
    - get calibration constants for onbd energy and 'trapE' energy
    - TODO: fit each expected peak and get resolution vs energy
    """
    # get file list and load energy data (numpy array)
    # lh5_dir = os.path.expandvars(dg.config['lh5_dir'])
    lh5_dir = dg.lh5_dir
    dsp_list = lh5_dir + dg.fileDB['dsp_path'] + '/' + dg.fileDB['dsp_file']
    edata = lh5.load_nda(dsp_list, ['trapEmax'],
                         'ORSIS3302DecoderForEnergy/dsp')
    rt_min = dg.fileDB['runtime'].sum()
    u_start = dg.fileDB.iloc[0]['startTime']
    t_start = pd.to_datetime(u_start, unit='s')  # str

    print('Found energy data:', [(et, len(ev)) for et, ev in edata.items()])
    print(f'Runtime (min): {rt_min:.2f}')

    elo, ehi, epb, etype = 6000, 8000, 10, 'trapEmax'

    ene_uncal = edata[etype]
    hist, bins, _ = pgh.get_hist(ene_uncal, range=(elo, ehi), dx=epb)

    # normalize by runtime
    hist_rt = np.divide(hist, rt_min * 60)

    plt.plot(np.nan, np.nan, '-w', lw=1, label=t_start)

    plt.semilogy(bins[1:],
                 hist_rt,
                 ds='steps',
                 c='b',
                 lw=1,
                 label=f'{etype}, {rt_min:.2f} mins')

    plt.xlabel(etype, ha='right', x=1)
    plt.ylabel('cts / sec', ha='right', y=1)
    plt.legend()
    plt.tight_layout()
    # plt.show()
    plt.savefig('./plots/normScan/e_zoom.png', dpi=200)
Beispiel #5
0
def peakfit_group(df_group, config, db_ecal):
    """
    """
    # get list of peaks to look for
    epeaks = config['expected_peaks'] + config['test_peaks']
    epeaks = np.array(sorted(epeaks))

    # right now a lookup by 'run' is hardcoded.
    # in principle the lookup should stay general using the gb_cols,
    # but it's kind of hard to see right now how to write the right db queries
    gb_run = df_group['run'].unique()
    if len(gb_run) > 1:
        print("Multi-run (or other) groupbys aren't supported yet, sorry")
        exit()

    # load data
    lh5_dir = os.path.expandvars(config['lh5_dir'])
    dsp_list = lh5_dir + df_group['dsp_path'] + '/' + df_group['dsp_file']
    raw_data = lh5.load_nda(dsp_list, config['rawe'], config['input_table'])
    runtime_min = df_group['runtime'].sum()

    # loop over energy estimators of interest
    pf_results = {}
    for et in config['rawe']:

        # load first-guess calibration constant from its table in the DB
        db_table = db_ecal.table(f'peakdet_{et}').all()
        df_cal = pd.DataFrame(db_table)
        lin_cal = df_cal.loc[df_cal.run == str(gb_run[0])]['lincal'].values[0]
        cal_data = raw_data[et] * lin_cal

        # compute expected peak locations and widths (fit to Gaussians)
        fit_results = {}
        for ie, epk in enumerate(epeaks):

            # adjust the window.  resolution goes as roughly sqrt(energy)
            window = np.sqrt(epk) * 0.5
            xlo, xhi = epk - window / 2, epk + window / 2
            nbins = int(window) * 5
            xpb = (xhi - xlo) / nbins
            ibin_bkg = int(nbins * 0.2)

            # get histogram, error, normalize by runtime
            pk_data = cal_data[(cal_data >= xlo) & (cal_data <= xhi)]
            hist, bins, _ = pgh.get_hist(pk_data, range=(xlo, xhi), dx=xpb)
            hist_norm = np.divide(hist, runtime_min * 60)
            hist_var = np.array(
                [np.sqrt(h / (runtime_min * 60)) for h in hist])

            # compute expected peak location and width (simple Gaussian)
            bkg0 = np.mean(hist_norm[:ibin_bkg])
            b, h = bins[1:], hist_norm - bkg0
            imax = np.argmax(h)
            upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h) / 2))][0]
            bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h) / 2))][-1]
            fwhm = upr_half - bot_half
            sig0 = fwhm / 2.355
            amp0 = np.amax(h) * fwhm
            p_init = [amp0, bins[imax], sig0, bkg0]  # a, mu, sigma, bkg
            p_fit, p_cov = pgf.fit_hist(pgf.gauss_bkg,
                                        hist_norm,
                                        bins,
                                        var=hist_var,
                                        guess=p_init)
            p_err = np.sqrt(np.diag(p_cov))

            # diagnostic plot, don't delete
            if config['show_plot']:
                plt.axvline(bins[ibin_bkg], c='m', label='bkg region')
                xfit = np.arange(xlo, xhi, xpb * 0.1)
                plt.plot(xfit,
                         pgf.gauss_bkg(xfit, *p_init),
                         '-',
                         c='orange',
                         label='init')
                plt.plot(xfit,
                         pgf.gauss_bkg(xfit, *p_fit),
                         '-',
                         c='red',
                         label='fit')
                plt.plot(bins[1:], hist_norm, c='b', lw=1.5, ds='steps')
                plt.xlabel('pass-1 energy (kev)', ha='right', x=1)
                plt.legend(fontsize=12)
                plt.show()
                plt.close()

            # goodness of fit
            chisq = []
            for i, h in enumerate(hist_norm):
                model = pgf.gauss_bkg(b[i], *p_fit)
                diff = (model - h)**2 / model
                chisq.append(abs(diff))
            rchisq = sum(np.array(chisq) / len(hist_norm))
            # fwhm_err = p_err[1] * 2.355 * e_peak / e_fit

            # collect interesting results for this row
            fit_results[ie] = {
                'epk': epk,
                'mu': p_fit[1],
                'fwhm': p_fit[2] * 2.355,
                'sig': p_fit[2],
                'amp': p_fit[0],
                'bkg': p_fit[3],
                'rchisq': rchisq,
                'mu_raw': p_fit[1] / lin_cal,  # <-- this is in terms of raw E
                'mu_unc': p_err[1] / lin_cal
            }

        # ----------------------------------------------------------------------
        # compute energy calibration by matrix inversion (thanks Tim and Jason!)

        view_cols = ['epk', 'mu', 'fwhm', 'bkg', 'rchisq', 'mu_raw']
        df_fits = pd.DataFrame(fit_results).T
        print(df_fits[view_cols])

        true_peaks = df_fits['epk']
        raw_peaks, raw_error = df_fits['mu_raw'], df_fits['mu_unc']

        error = raw_error / raw_peaks * true_peaks
        cov = np.diag(error**2)
        weights = np.diag(1 / error**2)

        degree = config['pol_order']
        raw_peaks_matrix = np.zeros((len(raw_peaks), degree + 1))
        for i, pk in enumerate(raw_peaks):
            temp_degree = degree
            row = np.array([])
            while temp_degree >= 0:
                row = np.append(row, pk**temp_degree)
                temp_degree -= 1
            raw_peaks_matrix[i] += row
        print(raw_peaks_matrix)

        # perform matrix inversion
        xTWX = np.dot(np.dot(raw_peaks_matrix.T, weights), raw_peaks_matrix)
        xTWY = np.dot(np.dot(raw_peaks_matrix.T, weights), true_peaks)
        if np.linalg.det(xTWX) == 0:
            print("singular matrix, determinant is 0, can't get cal constants")
            exit()
        xTWX_inv = np.linalg.inv(xTWX)

        # get polynomial coefficients and error
        cal_pars = np.dot(xTWX_inv, xTWY)
        cal_errs = np.sqrt(np.diag(xTWX_inv))
        n = len(cal_pars)
        print(f'Fit:', ' '.join([f'p{i}:{cal_pars[i]:.4e}' for i in range(n)]))
        print(f'Unc:', ' '.join([f'p{i}:{cal_errs[i]:.4e}' for i in range(n)]))

        # ----------------------------------------------------------------------
        # repeat the peak fit with the calibrated energy (affects widths)

        # compute calibrated energy
        pol = np.poly1d(cal_pars)  # handy numpy polynomial object
        cal_data = pol(raw_data[et])

        fit_results = {}
        for ie, epk in enumerate(epeaks):

            # adjust the window.  resolution goes as roughly sqrt(energy)
            window = np.sqrt(epk) * 0.5
            xlo, xhi = epk - window / 2, epk + window / 2
            nbins = int(window) * 5
            xpb = (xhi - xlo) / nbins
            ibin_bkg = int(nbins * 0.2)

            # get histogram, error, normalize by runtime
            pk_data = cal_data[(cal_data >= xlo) & (cal_data <= xhi)]
            hist, bins, _ = pgh.get_hist(pk_data, range=(xlo, xhi), dx=xpb)
            hist_norm = np.divide(hist, runtime_min * 60)
            hist_var = np.array(
                [np.sqrt(h / (runtime_min * 60)) for h in hist])

            # compute expected peak location and width (simple Gaussian)
            bkg0 = np.mean(hist_norm[:ibin_bkg])
            b, h = bins[1:], hist_norm - bkg0
            imax = np.argmax(h)
            upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h) / 2))][0]
            bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h) / 2))][-1]
            fwhm = upr_half - bot_half
            sig0 = fwhm / 2.355
            amp0 = np.amax(h) * fwhm
            p_init = [amp0, bins[imax], sig0, bkg0]  # a, mu, sigma, bkg
            p_fit, p_cov = pgf.fit_hist(pgf.gauss_bkg,
                                        hist_norm,
                                        bins,
                                        var=hist_var,
                                        guess=p_init)
            p_err = np.sqrt(np.diag(p_cov))

            # save results
            fit_results[ie] = {
                'epk': epk,
                'mu': p_fit[1],
                'fwhm': p_fit[2] * 2.355,
                'sig': p_fit[2],
                'amp': p_fit[0],
                'bkg': p_fit[3],
            }

        # consolidate results again
        view_cols = ['epk', 'mu', 'fwhm', 'residual']
        df_fits = pd.DataFrame(fit_results).T

        # compute the difference between lit and measured values
        cal_peaks = pol(raw_peaks)
        df_fits['residual'] = true_peaks - cal_peaks
        print(df_fits[view_cols])

        # fit fwhm vs. energy
        # FWHM(E) = sqrt(A_noise^2 + A_fano^2 * E + A_qcol^2 E^2)
        # Ref: Eq. 3 of https://arxiv.org/abs/1902.02299
        # TODO: fix error handling
        def sqrt_fwhm(x, a_n, a_f, a_c):
            return np.sqrt(a_n**2 + a_f**2 * x + a_c**2 * x**2)

        p_guess = [0.3, 0.05, 0.001]
        p_fit, p_cov = curve_fit(
            sqrt_fwhm, df_fits['mu'], df_fits['fwhm'],
            p0=p_guess)  #, sigma = np.sqrt(h), absolute_sigma=True)
        p_err = np.sqrt(np.diag(p_cov))

        if config['show_plot']:

            # show a split figure with calibrated spectrum + used peaks on top,
            # and calib.function and resolution vs. energy on bottom
            fig, (p0, p1) = plt.subplots(2, 1, figsize=(8, 8), sharex=True)
            # gridspec_kw={'height_ratios':[2, 1]}))

            # get histogram (cts / keV / d)
            xlo, xhi, xpb = config['cal_range']
            hist, bins, _ = pgh.get_hist(cal_data, range=(xlo, xhi), dx=xpb)
            hist_norm = np.divide(hist, runtime_min * 60 * xpb)

            # show peaks
            cmap = plt.cm.get_cmap('brg', len(df_fits) + 1)
            for i, row in df_fits.iterrows():

                # get a pretty label for the isotope
                lbl = config['pks'][str(row['epk'])]
                iso = ''.join(r for r in re.findall('[0-9]+', lbl))
                ele = ''.join(r for r in re.findall('[a-z]', lbl, re.I))
                pk_lbl = r'$^{%s}$%s' % (iso, ele)

                pk_diff = row['epk'] - row['mu']
                p0.axvline(row['epk'],
                           ls='--',
                           c=cmap(i),
                           lw=1,
                           label=f"{pk_lbl} : {row['epk']} + {pk_diff:.3f}")

            p0.semilogy(bins[1:], hist_norm, ds='steps', c='b', lw=1)

            p0.set_ylabel('cts / s / keV', ha='right', y=1)
            p0.legend(loc=3, fontsize=11)

            # TODO: add fwhm errorbar
            x_fit = np.arange(xlo, xhi, xpb)
            y_init = sqrt_fwhm(x_fit, *p_guess)
            p1.plot(x_fit, y_init, '-', lw=1, c='orange', label='guess')

            y_fit = sqrt_fwhm(x_fit, *p_fit)
            a_n, a_f, a_c = p_fit
            fit_label = r'$\sqrt{(%.2f)^2 + (%.3f)^2 E + (%.4f)^2  E^2}$' % (
                a_n, a_f, a_c)
            p1.plot(x_fit, y_fit, '-r', lw=1, label=f'fit: {fit_label}')

            p1.plot(df_fits['mu'], df_fits['fwhm'], '.b')

            p1.set_xlabel('Energy (keV)', ha='right', x=1)
            p1.set_ylabel('FWHM (keV)', ha='right', y=1)
            p1.legend(fontsize=11)

            if config['batch_mode']:
                plt.savefig('./plots/peakdet_test.png')
            else:
                plt.show()

        # the order of the polynomial should be in the table name
        pf_results[f'{et}_Anoise'] = p_fit[0]
        pf_results[f'{et}_Afano'] = p_fit[1]
        pf_results[f'{et}_Aqcol'] = p_fit[2]
        for i in range(len(cal_pars)):
            pf_results[f'{et}_cal{i}'] = cal_pars[i]
        for i in range(len(cal_pars)):
            pf_results[f'{et}_unc{i}'] = cal_errs[i]

    return pd.Series(pf_results)
Beispiel #6
0
def peakdet_group(df_group, config):
    """
    Access all files in this group, load energy histograms, and find the
    "first guess" linear calibration constant.
    Return the value, and a bool indicating success.
    """
    # get file list and load energy data
    lh5_dir = os.path.expandvars(config['lh5_dir'])
    dsp_list = lh5_dir + df_group['dsp_path'] + '/' + df_group['dsp_file']

    edata = lh5.load_nda(dsp_list, config['rawe'], config['input_table'])
    print('Found energy data:', [(et, len(ev)) for et, ev in edata.items()])

    runtime_min = df_group['runtime'].sum()
    print(f'Runtime (min): {runtime_min:.2f}')

    # loop over energy estimators of interest
    pd_results = {}
    for et in config['rawe']:

        # get histogram, error, normalize by runtime, and derivative
        xlo, xhi, xpb = config['init_vals'][et]['raw_range']
        hist, bins, var = pgh.get_hist(edata[et], range=(xlo, xhi), dx=xpb)
        hist_norm = np.divide(hist, runtime_min * 60)
        hist_err = np.array(
            [np.sqrt(hbin / (runtime_min * 60)) for hbin in hist])

        # plt.plot(bins[1:], hist_norm, ds='steps')
        # plt.show()
        # hist_deriv = np.diff(hist_norm)
        # hist_deriv = np.insert(hist_deriv, 0, 0)

        # run peakdet
        pd_thresh = config['init_vals'][et]['peakdet_thresh']
        lowe_cut = config['init_vals'][et]['lowe_cut']
        ctr_bins = (bins[:-1] + bins[1:]) / 2.
        idx = np.where(ctr_bins > lowe_cut)

        maxes, mins = pgc.peakdet(hist_norm[idx], pd_thresh, ctr_bins[idx])
        # maxes, mins = pgc.peakdet(hist_deriv[idx], pd_thresh, ctr_bins[idx])
        if len(maxes) == 0:
            print('warning, no maxima!  adjust peakdet threshold')
        # print(maxes) # x (energy) [:,0], y (counts) [:,1]

        # run peak matching
        exp_pks = config['expected_peaks']
        tst_pks = config['test_peaks']
        mode = config['match_mode']
        etol = config['raw_ene_tol']
        lin_cal, mp_success = match_peaks(maxes, exp_pks, tst_pks, mode, etol)

        if config['show_plot']:

            # plot uncalibrated and calibrated energy spectrum, w/ maxima
            fig, (p0, p1) = plt.subplots(2, 1, figsize=(8, 8))

            idx = np.where(bins[1:] > lowe_cut)
            imaxes = [
                np.where(np.isclose(ctr_bins, x[0]))[0][0] for x in maxes
            ]
            imaxes = np.asarray(imaxes)

            # energy, uncalibrated
            p0.plot(bins[imaxes], hist_norm[imaxes], '.m')
            p0.plot(bins[idx],
                    hist_norm[idx],
                    ds='steps',
                    c='b',
                    lw=1,
                    label=et)
            p0.set_ylabel(f'cts/s, {xpb}/bin', ha='right', y=1)
            p0.set_xlabel(et, ha='right', x=1)

            # energy, with rough calibration
            bins_cal = bins[1:] * lin_cal
            p1.plot(bins_cal,
                    hist_norm,
                    ds='steps',
                    c='b',
                    lw=1,
                    label=f'E = {lin_cal:.3f}*{et}')

            # compute best-guess location of all peaks, assuming rough calibration
            cal_maxes = lin_cal * maxes[:, 0]
            all_pks = np.concatenate((exp_pks, tst_pks))
            raw_guesses = []
            for pk in all_pks:
                imatch = np.isclose(cal_maxes, pk, atol=config['mp_tol'])
                if imatch.any():
                    # print(pk, cal_maxes[imatch], maxes[:,0][imatch])
                    raw_guesses.append([pk, maxes[:, 0][imatch][0]])
            rg = np.asarray(raw_guesses)
            rg = rg[rg[:, 0].argsort()]  # sort by energy

            cmap = plt.cm.get_cmap('jet', len(rg))
            for i, epk in enumerate(rg):
                idx_nearest = (np.abs(bins_cal - epk[0])).argmin()
                cts_nearest = hist_norm[idx_nearest]
                p1.plot(epk[0],
                        cts_nearest,
                        '.r',
                        c=cmap(i),
                        label=f'{epk[0]:.1f} keV')

            p1.set_xlabel(f'{et}, pass-1 cal', ha='right', x=1)
            p1.set_ylabel(f'cts/s, {xpb} kev/bin', ha='right', y=1)
            p1.legend(fontsize=10)

            if config['batch_mode']:
                plt.savefig('./plots/peakdet_cal_{et}.pdf')
            else:
                plt.show()

        pd_results[f'{et}_lincal'] = lin_cal
        pd_results[f'{et}_lcpass'] = str(mp_success)

    return pd.Series(pd_results)
Beispiel #7
0
def peakfit(df_group, config, db_ecal):
    """
    Example:
    $ ./energy_cal.py -q 'run==117' -pf [-pi 002 : use peakinput] [-p : show plot]
    """
    # choose the mode of peakdet to look up constants from
    if 'input_id' in config.keys():
        pol = config['pol'][0]
        print('  Using 1st-pass constants from peakdet_input')
        input_peaks = True
    else:
        print('  Using 1st-pass constants from peakdet_auto')
        input_peaks = False
        pol = 1  # and p0==0 always

    run = int(df_group.run.iloc[0])
    cyclo, cychi = df_group.cycle.iloc[0], df_group.cycle.iloc[-1]

    gb_run = df_group['run'].unique()
    if len(gb_run) > 1:
        print("Multi-run queries aren't supported yet, sorry!")
        exit()

    # load data and compute runtime
    dsp_list = config['lh5_dir'] + df_group['dsp_path'] + '/' + df_group[
        'dsp_file']
    raw_data = lh5.load_nda(dsp_list,
                            config['rawe'],
                            config['input_table'],
                            verbose=False)
    runtime_min = df_group['runtime'].sum()
    print(f'  Runtime: {runtime_min:.1f} min.  Calibrating:',
          [f'{et}:{len(ev)} events' for et, ev in raw_data.items()])
    print(f'  Fitting to:', config['fit_func'])

    # get list of peaks to look for
    epeaks = config['expected_peaks'] + config['test_peaks']
    epeaks = np.array(sorted(epeaks))

    # loop over energy estimators of interest
    pf_results = {}
    for et in config['rawe']:

        # load first-guess calibration constants from tables in the ecalDB
        # convention for p_i : p0  +  p1 * x  +  p2 * x**2  +  ...
        tb_name = f'peakinp_{et}' if input_peaks else f'peakdet_{et}'
        db_table = db_ecal.table(tb_name).all()
        df_cal = pd.DataFrame(db_table)
        if len(df_cal) == 0:
            print("Error, couldn't load cal constants for table:", tb_name)
            print("Try running: ./energy_cal.py -q '[query]' -s", tb_name)
            exit()

        que = f'run=={run} and cyclo=={cyclo} and cychi=={cychi}'
        p1cal = df_cal.query(que)
        if len(p1cal) != 1:
            print(
                f"Can't load a unique set of cal constants!\n  Full cal DF, '{tb_name}':"
            )
            print(df_cal)
            print('Result of query:', que)
            print(p1cal)
            exit()
        cal_pars_init = [p1cal[f'pol{p}'].iloc[0]
                         for p in range(pol, -1, -1)]  # p2, p1, p0

        cal_pars_init[-1] = 45  # deleteme

        # NOTE: polyfit reverses the coefficients, putting highest order first
        cp = [f'p{i} {cp:.4e} ' for i, cp in enumerate(cal_pars_init[::-1])]
        print(f'  First pass inputs:', ' '.join(cp))

        # 1. use the first-guess constants to compute the expected mu_raw locations.
        # 2. run the peak fit on the raw peaks, compute new constants
        # 3. run the peak fit on the calibrated peaks, compute final constants

        f1 = fit_peaks(epeaks,
                       cal_pars_init,
                       raw_data[et],
                       runtime_min,
                       ff_name=config['fit_func'],
                       show_plot=False,
                       batch=config['batch_mode'])
        df1 = pd.DataFrame(f1).T

        # # xv - uncal, yval - calib.
        pfit, pcov = np.polyfit(df1['mu_raw'],
                                df1['epk'],
                                config['pol'][0],
                                cov=True)
        # perr = np.sqrt(np.diag(pcov))
        print("pass 1", pfit)
        # # print(perr)

        f2 = fit_peaks(df1['mu_raw'], [0, 1, 0],
                       raw_data[et],
                       runtime_min,
                       range=config['init_vals'][et]['raw_range'],
                       ff_name=config['fit_func'],
                       show_plot=True,
                       batch=config['batch_mode'])

        df2 = pd.DataFrame(f2).T

        pfit, pcov = np.polyfit(df2['mu'],
                                df2['epk'],
                                config['pol'][0],
                                cov=True)
        print("pass 2", pfit)

        exit()

        # compute the difference between lit and measured values
        pfunc = np.poly1d(cpar)
        cal_data = pfunc(raw_data[et])
        cal_peaks = pfunc(df_fits['mu_raw'])
        df_fits['residual'] = df_fits['epk'] - df_fits['mu']
        res_uncertainty = df_fits['mu_err']

        cp = [f'p{i} {cp:.4e} ' for i, cp in enumerate(cpar[::-1])]
        print(f'  Peakfit outputs:', ' '.join(cp))
        print(df_fits)
        exit()

        # TODO: save this output to a SEPARATE output file (don't muck up pf_results,
        # which is intended to be just for the constants p0, p1, p2 ... etc.
        # print(df_fits)

        # fit fwhm vs. energy
        # FWHM(E) = sqrt(A_noise^2 + A_fano^2 * E + A_qcol^2 E^2)
        # Ref: Eq. 3 of https://arxiv.org/abs/1902.02299
        # TODO: fix error handling
        def sqrt_fwhm(x, a_n, a_f, a_c):
            return np.sqrt(a_n**2 + a_f**2 * x + a_c**2 * x**2)

        p_guess = [0.3, 0.05, 0.001]
        p_fit, p_cov = curve_fit(
            sqrt_fwhm, df_fits['mu'], df_fits['fwhm'],
            p0=p_guess)  #, sigma = np.sqrt(h), absolute_sigma=True)
        p_err = np.sqrt(np.diag(p_cov))

        # show a split figure with calibrated spectrum + used peaks on top,
        # and calib.function and resolution vs. energy on bottom
        if config['show_plot']:

            fig = plt.figure(figsize=(8, 8))
            p0 = plt.subplot(2, 1, 1)  # calibrated spectrum
            p1 = plt.subplot(2, 2, 3)  # resolution vs energy
            p2 = plt.subplot(2, 2, 4)  # fit_mu vs energy

            # 0. show calibrated spectrum with gamma lines
            # get histogram (cts / keV / d)
            xlo, xhi, xpb = config['cal_range']
            hist, bins, _ = pgh.get_hist(cal_data, range=(xlo, xhi), dx=xpb)
            hist_norm = np.divide(hist, runtime_min * 60 * xpb)

            # show peaks
            cmap = plt.cm.get_cmap('brg', len(df_fits) + 1)
            for i, row in df_fits.iterrows():

                # get a pretty label for the isotope
                lbl = config['pks'][str(row['epk'])]
                iso = ''.join(r for r in re.findall('[0-9]+', lbl))
                ele = ''.join(r for r in re.findall('[a-z]', lbl, re.I))
                pk_lbl = r'$^{%s}$%s' % (iso, ele)

                pk_diff = row['epk'] - row['mu']
                p0.axvline(row['epk'],
                           ls='--',
                           c=cmap(i),
                           lw=1,
                           label=f"{pk_lbl} : {row['epk']} + {pk_diff:.3f}")

            p0.semilogy(bins[1:], hist_norm, ds='steps', c='b', lw=1)
            p0.set_ylim(1e-4)
            p0.set_xlabel('Energy (keV)', ha='right', x=1)
            p0.set_ylabel('cts / s / keV', ha='right', y=1)
            p0.legend(loc=3, fontsize=11)

            # 1. resolution vs. energy

            # TODO: add fwhm errorbar
            x_fit = np.arange(xlo, xhi, xpb)
            y_init = sqrt_fwhm(x_fit, *p_guess)
            # p1.plot(x_fit, y_init, '-', lw=1, c='orange', label='guess')

            y_fit = sqrt_fwhm(x_fit, *p_fit)
            a_n, a_f, a_c = p_fit
            fit_label = r'$\sqrt{(%.2f)^2 + (%.3f)^2 E + (%.4f)^2  E^2}$' % (
                a_n, a_f, a_c)
            p1.plot(x_fit, y_fit, '-r', lw=1, label=f'fit: {fit_label}')

            p1.errorbar(
                df_fits['mu'],
                df_fits['fwhm'],
                yerr=df_fits.fwhm_err,
                marker='.',
                mfc='b',
                ls='none',
            )

            p1.set_xlabel('Energy (keV)', ha='right', x=1)
            p1.set_ylabel('FWHM (keV)', ha='right', y=1)
            p1.legend(fontsize=11)

            # 2. fit_mu vs. energy
            p2.errorbar(df_fits.epk,
                        df_fits.epk - df_fits.mu,
                        yerr=df_fits.sig,
                        marker='.',
                        mfc='b',
                        ls='none',
                        label=r'$E_{true}$ - $E_{fit}$')
            p2.set_xlabel('Energy (keV)', ha='right', x=1)
            p2.set_ylabel('Residual (keV)', ha='right', y=1)
            p2.legend(fontsize=13)

            if config['batch_mode']:
                plt.savefig(
                    f'./plots/energy_cal/peakfit_{et}_run{run}_clo{cyclo}_chi{cychi}.pdf'
                )
            else:
                plt.show()
            plt.close('all')

        # fill in the peakfit results and return

        # cycle range
        pf_results[f'{et}_cyclo'] = cyclo
        pf_results[f'{et}_cychi'] = cychi

        # energy calibration constants
        for i, p in enumerate(cpar[::-1]):  # remember to flip the coeffs!
            pf_results[f'{et}_cal{i}'] = p

        # uncertainties in cal constants
        for i, pe in enumerate(cerr[::-1]):
            pf_results[f'{et}_unc{i}'] = pe

        # resolution curve parameters
        pf_results[f'{et}_Anoise'] = p_fit[0]
        pf_results[f'{et}_Afano'] = p_fit[1]
        pf_results[f'{et}_Aqcol'] = p_fit[2]
        pf_results[f'{et}_runtime'] = runtime_min

    return pd.Series(pf_results)
Beispiel #8
0
def peakdet_input(df_group, config):
    """
    $ ./energy_cal.py -q 'whatever' -pi [input_id] [-p]
    Instead of using the automatic peakdet algorithm, compute the first-guess
    calibration constant from an input file.
    """
    # load data and compute runtime
    dsp_list = config['lh5_dir'] + df_group['dsp_path'] + '/' + df_group[
        'dsp_file']
    edata = lh5.load_nda(dsp_list,
                         config['rawe'],
                         config['input_table'],
                         verbose=False)
    runtime_min = df_group['runtime'].sum()
    run = int(df_group.run.iloc[0])
    cyclo, cychi = df_group.cycle.iloc[0], df_group.cycle.iloc[-1]
    print(f'  Runtime: {runtime_min:.1f} min.  Calibrating:',
          [f'{et}:{len(ev)} events' for et, ev in edata.items()])

    # loop over energy estimators of interest
    pd_results = {}
    for et in config['rawe']:

        # get histogram, error, normalize by runtime, and derivative
        xlo, xhi, xpb = config['init_vals'][et]['raw_range']
        hist, bins, var = pgh.get_hist(edata[et], range=(xlo, xhi), dx=xpb)
        hist_norm = np.divide(hist, runtime_min * 60)
        hist_err = np.array(
            [np.sqrt(hbin / (runtime_min * 60)) for hbin in hist])

        # load the input peaks
        inp_id = config['input_id']  # string id, like 002
        with open(config['input_peaks']) as f:
            pk_inputs = json.load(f)
        # pprint(pk_inputs)
        pk_list = {k: v for k, v in pk_inputs[inp_id][et].items()}
        yv = [pk_list[k][0] for k in pk_list]  # true peaks (keV)
        xv_input = [pk_list[k][1] for k in pk_list]  # raw peaks (uncalib.)
        # pprint(pk_list)

        # To make the input_peaks method more robust, add a step to refine
        # the input peak guess that can catch small changes in gain.
        # For each peak, select the maximum bin within 3% of the input
        # raw energy value.  It's hard to make this window larger if you're
        # using calibration peaks very close together (like 583 and 609).
        xv_tuned = []
        for rpk in xv_input:
            winlo, winhi = rpk * (1 - 0.03), rpk * (1 + 0.03)
            idx = np.where((bins >= winlo) & (bins <= winhi))
            ilo = idx[0][0]
            imax = np.argmax(hist_norm[idx])
            ipk = ilo + imax
            xval_adj = bins[ipk]
            xv_tuned.append(xval_adj)
        xv = xv_tuned

        # run polyfit (pass-1 fit is simple)
        pol = config['pol'][0]
        pfit = np.polyfit(xv, yv, pol)  # p2, p1, p0

        # save results for this energy estimator
        pd_results[f'{et}_calpass'] = True
        pd_results[f'{et}_runtime'] = runtime_min
        pd_results[f'{et}_cyclo'] = cyclo
        pd_results[f'{et}_cychi'] = cychi
        for i, p in enumerate(np.flip(pfit)):  # p0, p1, p2
            pd_results[f'{et}_pol{i}'] = p

        if config['show_plot']:

            # plot uncalibrated and calibrated energy spectrum, w/ maxima
            fig, (p0, p1) = plt.subplots(2, 1, figsize=(8, 8))

            # 1. show spectrum and input peaks
            p0.semilogy(bins[1:], hist_norm, 'b', ds='steps', lw=1)

            p0.plot(np.nan,
                    np.nan,
                    '-w',
                    label=f'Run {run}, cyc {cyclo}--{cychi}')

            cmap = plt.cm.get_cmap('jet', len(pk_list))
            for i in range(len(xv)):
                rpk = xv[i]
                idx = (np.abs(bins - rpk)).argmin()
                p0.plot(rpk,
                        hist_norm[idx],
                        'v',
                        ms=10,
                        c=cmap(i),
                        label=f'{yv[i]} : {rpk:.0f}')

            p0.set_xlabel(f'{et} (uncal)', ha='right', x=1)
            p0.set_ylabel(f'Counts / min / {xpb:.1f} keV', ha='right', y=1)
            p0.legend(fontsize=10)
            p0.set_ylim(1e-4)

            # 2: show the calibration curve fit result
            p1.plot(np.nan,
                    np.nan,
                    '-w',
                    label=f'Run {run}, cyc {cyclo}--{cychi}')

            p1.plot(xv, yv, '.k')

            polfunc = np.poly1d(pfit)  # handy numpy polynomial function
            yfit = polfunc(xv)
            pol_label = '  '.join(
                [f'p{i} : {ene:.2e}' for i, ene in enumerate(pfit[::-1])])
            p1.plot(xv, yfit, '-r', lw=2, label=pol_label)

            p1.set_xlabel(f'{et} (uncal)', ha='right', x=1)
            p1.set_ylabel('Energy (keV)', ha='right', y=1)
            p1.legend(fontsize=10)

            if config['batch_mode']:
                plt.savefig(
                    f'./plots/energy_cal/peakinput_{et}_run{run}_clo{cyclo}_chi{cychi}.pdf'
                )
            else:
                plt.show()
            plt.close()

    return pd.Series(pd_results)
Beispiel #9
0
def peakfit(df_group, config, db_ecal):
    """
    Example:
    $ ./energy_cal.py -q 'run==117' -pf [-pi 002 : use peakinput] [-p : show plot]
    """
    # choose the mode of peakdet to look up constants from
    if 'input_id' in config.keys():
        pol = config['pol'][0]
        print('  Using 1st-pass constants from peakdet_input')
        input_peaks = True
    else:
        print('  Using 1st-pass constants from peakdet_auto')
        input_peaks = False
        pol = 1  # and p0==0 always

    run = int(df_group.run.iloc[0])
    cyclo, cychi = df_group.cycle.iloc[0], df_group.cycle.iloc[-1]

    gb_run = df_group['run'].unique()
    if len(gb_run) > 1:
        print("Multi-run queries aren't supported yet, sorry!")
        exit()

    # load data and compute runtime
    dsp_list = config['lh5_dir'] + df_group['dsp_path'] + '/' + df_group[
        'dsp_file']
    raw_data = lh5.load_nda(dsp_list,
                            config['rawe'],
                            config['input_table'],
                            verbose=False)
    runtime_min = df_group['runtime'].sum()
    print(f'  Runtime: {runtime_min:.1f} min.  Calibrating:',
          [f'{et}:{len(ev)} events' for et, ev in raw_data.items()])
    print(f'  Fitting to:', config['fit_func'])

    # get list of peaks to look for
    epeaks = config['expected_peaks'] + config['test_peaks']
    epeaks = np.array(sorted(epeaks))

    # loop over energy estimators of interest
    pf_results = {}
    for et in config['rawe']:

        # load first-guess calibration constants from tables in the ecalDB
        # convention for p_i : p0  +  p1 * x  +  p2 * x**2  +  ...
        tb_name = f'peakinp_{et}' if input_peaks else f'peakdet_{et}'
        db_table = db_ecal.table(tb_name).all()
        df_cal = pd.DataFrame(db_table)
        que = f'run=={run} and cyclo=={cyclo} and cychi=={cychi}'
        p1cal = df_cal.query(que)
        if len(p1cal) != 1:
            print(
                f"Can't load a unique set of cal constants!\n  Full cal DF, '{tb_name}':"
            )
            print(df_cal)
            print('Result of query:', que)
            print(p1cal)
            exit()
        cal_pars_init = [p1cal[f'pol{p}'].iloc[0] for p in range(pol, -1, -1)]

        # NOTE: polyfit reverses the coefficients, putting highest order first
        cp = [f'p{i} {cp:.4e} ' for i, cp in enumerate(cal_pars_init[::-1])]
        print(f'  First pass inputs:', ' '.join(cp))

        # loop over each peak
        fit_results = fit_peaks(epeaks, cal_pars_init, raw_data[et],
                                runtime_min, config['fit_func'],
                                config['verbose'], config['batch_mode'])

        view_cols = ['epk', 'mu', 'fwhm', 'bkg', 'rchisq', 'mu_raw']
        df_fits = pd.DataFrame(fit_results).T
        # print(df_fits)

        # ----------------------------------------------------------------------
        # compute energy calibration by matrix inversion (thanks Tim and Jason!)

        true_peaks = df_fits['epk']
        raw_peaks, raw_error = df_fits['mu_raw'], df_fits['mu_unc']

        error = raw_error / raw_peaks * true_peaks
        cov = np.diag(error**2)
        weights = np.diag(1 / error**2)

        # create the matrix with columns of descending degree
        degree = config['pol'][0]
        raw_peaks_matrix = np.zeros((len(raw_peaks), degree + 1))
        for i, pk in enumerate(raw_peaks):
            temp_degree = degree
            row = np.array([])
            while temp_degree >= 0:
                row = np.append(row, pk**temp_degree)
                temp_degree -= 1
            raw_peaks_matrix[i] += row
        # print(raw_peaks_matrix)

        # perform matrix inversion
        xTWX = np.dot(np.dot(raw_peaks_matrix.T, weights), raw_peaks_matrix)
        xTWY = np.dot(np.dot(raw_peaks_matrix.T, weights), true_peaks)
        if np.linalg.det(xTWX) == 0:
            print("singular matrix, determinant is 0, can't get cal constants")
            exit()
        xTWX_inv = np.linalg.inv(xTWX)

        # get polynomial coefficients and error
        cal_pars_best = np.dot(xTWX_inv, xTWY)
        cal_errs = np.sqrt(np.diag(xTWX_inv))
        n = len(cal_pars_best)

        cp = [f'p{i} {cp:.4e} ' for i, cp in enumerate(cal_pars_best[::-1])]
        print(f'  Peakfit results:  ', ' '.join(cp))

        # ----------------------------------------------------------------------

        # repeat the peak fit with the new 'best' energy (can affect width
        # especially if the peaks are displaced from the guessed locations)
        fit_results = fit_peaks(epeaks, cal_pars_best, raw_data[et],
                                runtime_min, config['fit_func'],
                                config['verbose'], config['batch_mode'])

        df_fits = pd.DataFrame(fit_results).T

        # compute the difference between lit and measured values
        pfunc = np.poly1d(cal_pars_best)
        cal_data = pfunc(raw_data[et])
        cal_peaks = pfunc(raw_peaks)
        df_fits['residual'] = true_peaks - cal_peaks
        print(df_fits)

        # fit fwhm vs. energy
        # FWHM(E) = sqrt(A_noise^2 + A_fano^2 * E + A_qcol^2 E^2)
        # Ref: Eq. 3 of https://arxiv.org/abs/1902.02299
        # TODO: fix error handling
        def sqrt_fwhm(x, a_n, a_f, a_c):
            return np.sqrt(a_n**2 + a_f**2 * x + a_c**2 * x**2)

        p_guess = [0.3, 0.05, 0.001]
        p_fit, p_cov = curve_fit(
            sqrt_fwhm, df_fits['mu'], df_fits['fwhm'],
            p0=p_guess)  #, sigma = np.sqrt(h), absolute_sigma=True)
        p_err = np.sqrt(np.diag(p_cov))

        # show a split figure with calibrated spectrum + used peaks on top,
        # and calib.function and resolution vs. energy on bottom
        if config['show_plot']:

            fig = plt.figure(figsize=(8, 8))
            p0 = plt.subplot(2, 1, 1)  # calibrated spectrum
            p1 = plt.subplot(2, 2, 3)  # resolution vs energy
            p2 = plt.subplot(2, 2, 4)  # fit_mu vs energy

            # 0. show calibrated spectrum with gamma lines
            # get histogram (cts / keV / d)
            xlo, xhi, xpb = config['cal_range']
            hist, bins, _ = pgh.get_hist(cal_data, range=(xlo, xhi), dx=xpb)
            hist_norm = np.divide(hist, runtime_min * 60 * xpb)

            # show peaks
            cmap = plt.cm.get_cmap('brg', len(df_fits) + 1)
            for i, row in df_fits.iterrows():

                # get a pretty label for the isotope
                lbl = config['pks'][str(row['epk'])]
                iso = ''.join(r for r in re.findall('[0-9]+', lbl))
                ele = ''.join(r for r in re.findall('[a-z]', lbl, re.I))
                pk_lbl = r'$^{%s}$%s' % (iso, ele)

                pk_diff = row['epk'] - row['mu']
                p0.axvline(row['epk'],
                           ls='--',
                           c=cmap(i),
                           lw=1,
                           label=f"{pk_lbl} : {row['epk']} + {pk_diff:.3f}")

            p0.semilogy(bins[1:], hist_norm, ds='steps', c='b', lw=1)
            p0.set_ylim(1e-4)
            p0.set_xlabel('Energy (keV)', ha='right', x=1)
            p0.set_ylabel('cts / s / keV', ha='right', y=1)
            p0.legend(loc=3, fontsize=11)

            # 1. resolution vs. energy

            # TODO: add fwhm errorbar
            x_fit = np.arange(xlo, xhi, xpb)
            y_init = sqrt_fwhm(x_fit, *p_guess)
            # p1.plot(x_fit, y_init, '-', lw=1, c='orange', label='guess')

            y_fit = sqrt_fwhm(x_fit, *p_fit)
            a_n, a_f, a_c = p_fit
            fit_label = r'$\sqrt{(%.2f)^2 + (%.3f)^2 E + (%.4f)^2  E^2}$' % (
                a_n, a_f, a_c)
            p1.plot(x_fit, y_fit, '-r', lw=1, label=f'fit: {fit_label}')

            p1.plot(df_fits['mu'], df_fits['fwhm'], '.b')

            p1.set_xlabel('Energy (keV)', ha='right', x=1)
            p1.set_ylabel('FWHM (keV)', ha='right', y=1)
            p1.legend(fontsize=11)

            # 2. fit_mu vs. energy
            p2.plot(df_fits.epk,
                    df_fits.epk - df_fits.mu,
                    '.b',
                    label=r'$E_{true}$ - $E_{fit}$')
            p2.set_xlabel('Energy (keV)', ha='right', x=1)
            p2.set_ylabel('Residual (keV)', ha='right', y=1)
            p2.legend(fontsize=13)

            if config['batch_mode']:
                plt.savefig(
                    f'./plots/energy_cal/peakfit_{et}_run{run}_clo{cyclo}_chi{cychi}.pdf'
                )
            else:
                plt.show()
            plt.close('all')

        # fill in the peakfit results and return

        # cycle range
        pf_results[f'{et}_cyclo'] = cyclo
        pf_results[f'{et}_cychi'] = cychi

        # energy calibration constants
        for i, p in enumerate(
                cal_pars_best[::-1]):  # remember to flip the coeffs!
            pf_results[f'{et}_cal{i}'] = p

        # uncertainties in cal constants
        for i, pe in enumerate(cal_errs[::-1]):
            pf_results[f'{et}_unc{i}'] = pe

        # resolution curve parameters
        pf_results[f'{et}_Anoise'] = p_fit[0]
        pf_results[f'{et}_Afano'] = p_fit[1]
        pf_results[f'{et}_Aqcol'] = p_fit[2]
        pf_results[f'{et}_runtime'] = runtime_min

    return pd.Series(pf_results)
Beispiel #10
0
    def analyze_pulser_run(df_row):
        """
        loop over each row of dfp and save the superpulse
        """
        epk, rt, vp, cyc = df_row[['E_keV', 'runtime', 'V_pulser', 'cycle']]
        rt *= 60  # sec
        if epk == 0: return []  # skip the bkg run

        # load pulser energies
        f_dsp = dg.lh5_dir + '/' + df_row.dsp_path + '/' + df_row.dsp_file
        pdata = lh5.load_nda([f_dsp], ['energy'], dsp_name)['energy'] * ecal

        # auto-narrow the window around the max pulser peak in two steps
        elo, ehi, epb = epk - 50, epk + 50, 0.5
        pdata_all = pdata[(pdata > elo) & (pdata < ehi)]
        hp, bp, _ = pgh.get_hist(pdata_all, range=(elo, ehi), dx=epb)
        pctr = bp[np.argmax(hp)]

        plo, phi, ppb = pctr - e_window, pctr + e_window, 0.1
        pdata_pk = pdata[(pdata > plo) & (pdata < phi)]
        hp, bp, bpvars = pgh.get_hist(pdata_pk, range=(plo, phi), dx=ppb)
        hp_rt = np.divide(hp, rt)
        hp_var = np.array([np.sqrt(h / (rt)) for h in hp])

        # fit a gaussian to get 1 sigma e-values
        ibin_bkg = 50
        bkg0 = np.mean(hp_rt[:ibin_bkg])
        b, h = bp[1:], hp_rt
        imax = np.argmax(h)
        upr_half = b[np.where((b > b[imax]) & (h <= np.amax(h) / 2))][0]
        bot_half = b[np.where((b < b[imax]) & (h <= np.amax(h) / 2))][-1]
        fwhm = upr_half - bot_half
        sig0 = fwhm / 2.355
        amp0 = np.amax(hp_rt) * fwhm

        # 14 July 2021 Joule changed p_init to use outputs gauss_mode_with_max() b/c fit wasn't
        # working with previous initial guess
        # p_init = [amp0, bp[imax], sig0, bkg0]

        pars, cov = pgf.gauss_mode_width_max(hp, bp, bpvars, n_bins=50)
        p_init = [pars[2], pars[0], pars[1], 1]
        p_fit, p_cov = pgf.fit_hist(pgf.gauss_bkg,
                                    hp,
                                    bp,
                                    var=hp_var,
                                    guess=p_init)
        amp, mu, sigma, bkg = p_fit

        # select events within 1 sigma of the maximum
        # and pull the waveforms from the raw file to make a superpulse.
        idx = np.where((pdata >= mu - sigma) & (pdata <= mu + sigma))
        print(
            f'Pulser at {epk} keV, {len(idx[0])} events.  Limiting to {nwfs}.')
        if len(idx[0]) > nwfs:
            idx = idx[0][:nwfs]

        # grab the 2d numpy array of pulser wfs
        n_rows = idx[-1] + 1  # read up to this event and stop
        f_raw = dg.lh5_dir + '/' + df_row.raw_path + '/' + df_row.raw_file
        tb_wfs, n_wfs = sto.read_object(raw_name, f_raw, n_rows=n_rows)
        pwfs = tb_wfs['values'].nda[idx, :]
        # print(idx, len(idx), pwfs.shape, '\n', pwfs)

        # data cleaning step: remove events with outlier baselines
        bl_means = pwfs[:, :500].mean(axis=1)
        bl_mode = mode(bl_means.astype(int))[0][0]
        bl_ctr = np.subtract(bl_means, bl_mode)
        idx_dc = np.where(np.abs(bl_ctr) < bl_thresh)
        pwfs = pwfs[idx_dc[0], :]
        bl_means = bl_means[idx_dc]
        print(pwfs.shape, bl_means.shape)

        # baseline subtract (trp when leading (not trailing) dim is the same)
        wfs = (pwfs.transpose() - bl_means).transpose()

        # !!!!15 July 2021: Joule commented this out because somehow it makes superpulses 150 instead of 8192 samples!!!!

        # time-align all wfs at their 50% timepoint (tricky!).
        # adapted from pygama/sandbox/old_dsp/[calculators,transforms].py
        # an alternate approach would be to use ProcessingChain here
        # wf_maxes = np.amax(wfs, axis=1)
        # timepoints = np.argmax(wfs >= wf_maxes[:, None]*tp_align, axis=1)
        # wf_idxs = np.zeros([wfs.shape[0], n_pre + n_post], dtype=int)
        # row_idxs = np.zeros_like(wf_idxs)
        # for i, tp in enumerate(timepoints):
        # wf_idxs[i, :] = np.arange(tp - n_pre, tp + n_post)
        # row_idxs[i, :] = i
        # wfs = wfs[row_idxs, wf_idxs]
        # print(f'len wfs: {len(wfs[1])}')

        # take the average to get the superpulse
        superpulse = np.mean(wfs, axis=0)

        # normalize all wfs to the superpulse maximum
        wfmax, tmax = np.amax(superpulse), np.argmax(superpulse)
        superpulse = np.divide(superpulse, wfmax)
        wfs = np.divide(wfs, wfmax)

        # -- plot results --
        if show_plots:
            fig, (p0, p1) = plt.subplots(2, figsize=(7, 8))

            # plot fit result (top), and waveforms + superpulse (bottom)
            xfit = np.arange(plo, phi, ppb * 0.1)
            p0.plot(xfit,
                    pgf.gauss_bkg(xfit, *p_init),
                    '-',
                    c='orange',
                    label='init')
            p0.plot(xfit,
                    pgf.gauss_bkg(xfit, *p_fit),
                    '-',
                    c='red',
                    label='fit')

            # plot 1 sigma window
            p0.axvspan(mu - sigma,
                       mu + sigma,
                       color='m',
                       alpha=0.2,
                       label='1 sigma')

            # plot data
            p0.plot(bp[1:],
                    hp_rt,
                    ds='steps',
                    c='k',
                    lw=1,
                    label=f'{vp:.2f} V')
            p0.set_xlabel(f'onboard energy (keV, c={ecal:.2e})',
                          ha='right',
                          x=1)
            p0.set_ylabel('cts / s', ha='right', y=1)
            p0.legend(fontsize=10)

            # plot individ. wfs
            ts = np.arange(0, len(wfs[0, :]))
            for iwf in range(wfs.shape[0]):
                p1.plot(ts, wfs[iwf, :], '-k', lw=2, alpha=0.5)
            p1.plot(np.nan, np.nan, '-k', label=f'wfs, {epk:.0f} keV')

            # plot superpulse
            p1.plot(ts,
                    superpulse,
                    '-r',
                    lw=2,
                    label=f'superpulse, {vp:.2f} V')

            p1.set_xlabel('time (10 ns)', ha='right', x=1)
            p1.set_ylabel('amplitude', ha='right', y=1)
            p1.legend(fontsize=10)
            # plt.show()
            plt.savefig(f'./plots/superpulse_cyc{cyc}.png', dpi=150)
            plt.cla()

        # save the superpulse to our output file
        print(f'length of superpulse: {len(superpulse)}')
        return superpulse
Beispiel #11
0
def show_spectra(dfp, dg):
    """
    plot events from each pulser peak on top of a background spectrum run,
    to show where in the spectrum we sampled from.
    let's use the E_keV column to find the pulser peaks.
    need to figure out the proper calibration constant (use onboard energy)
    so load the bkg run and figure out the calibration constant.
    that's the parameter we need for get_superpulses.
    """
    run_diagnostic = False

    f_dsp = dg.lh5_dir + '/' + dfp.dsp_path + '/' + dfp.dsp_file
    f_bkg = f_dsp.iloc[0]  # bkg run is 0 by dfn
    print('Background run:', f_bkg)

    # dataframe method - pulls all values from table
    # sto = lh5.Store()
    # tb_data, n_rows = sto.read_object('ORSIS3302DecoderForEnergy/dsp', f_bkg)
    # df_data = tb_data.get_dataframe()

    # load_nda method - just grab onboard energy
    tb_name = 'ORSIS3302DecoderForEnergy/dsp'
    edata = lh5.load_nda([f_bkg], ['energy'], tb_name)['energy']

    # use this flag to figure out the calibration of the 1460 line
    if run_diagnostic:
        elo, ehi, epb = 0, 1e7, 10000
        hist, bins, _ = pgh.get_hist(edata, range=(elo, ehi), dx=epb)
        plt.semilogy(bins[1:], hist, ds='steps', c='b', lw=1)
        plt.show()
        exit()

    ecal = 1460.8 / 2.005e6  # works for pulser dataset 2 (dec 2020)

    elo, ehi, epb = 0, 5000, 10
    hist, bins, _ = pgh.get_hist(edata * ecal, range=(elo, ehi), dx=epb)
    runtime = dfp.iloc[0].runtime * 60  # sec
    hist_rt = np.divide(hist, runtime)
    print(f'bkg runtime: {runtime:.2f} min')

    cmap = plt.cm.get_cmap('jet', len(dfp))
    for i, df_row in dfp.iterrows():

        epk, rt, vp = df_row[['E_keV', 'runtime', 'V_pulser']]
        rt *= 60  # sec
        if epk == 0: continue  # skip the bkg run

        # draw the expected peak location based on our input table
        plt.axvline(epk, lw=1, alpha=0.5)

        # load pulser data
        f_dsp = dg.lh5_dir + '/' + df_row.dsp_path + '/' + df_row.dsp_file
        pdata = lh5.load_nda([f_dsp], ['energy'], tb_name)['energy'] * ecal

        # take a wide window around where we expect the pulser peak
        pdata = pdata[(pdata > epk - 50) & (pdata < epk + 50)]
        hp, bp, _ = pgh.get_hist(pdata, range=(elo, ehi), dx=epb)
        hp_rt = np.divide(hp, rt)
        plt.semilogy(bp[1:],
                     hp_rt,
                     ds='steps',
                     lw=1,
                     c=cmap(i),
                     label=f'{vp:.2f} V')

    plt.semilogy(bins[1:], hist_rt, ds='steps', c='k', lw=1, label='bkg data')

    plt.xlabel(f'onboard energy (keV, c={ecal:.2e})', ha='right', x=1)
    plt.ylabel('cts / s', ha='right', y=1)
    plt.legend(fontsize=10)
    plt.savefig('./plots/transferfn_peaks.pdf')
    plt.show()
    plt.clf()