def runAutoThresh(ds=None): """./job-panda.py [-q] [-ds dsNum] -thresh Generates auto-thresh jobs. Default is to do it for all datasets. """ bkg = dsi.BkgInfo() dsList = [ds] if ds is not None else [0, 1, 2, 3, 4, "5A", "5B", "5C", 6] for ds in dsList: dsNum = ds if isinstance(ds, int) else 5 dub = "-d" if dsNum < 6 else "" for sub in bkg.getRanges(ds): if dsNum == 5 and sub >= 113: dub = "" subr = bkg.GetSubRanges(ds, sub) if len(subr) > 0: for runLo, runHi in subr: job = "./auto-thresh %d %d -s %d %d %s -o %s" % ( dsNum, sub, runLo, runHi, dub, dsi.threshDir) if useJobQueue: sh("%s >& ./logs/thresh-ds%d-%d-%d-%d.txt" % (job, dsNum, sub, runLo, runHi)) else: sh("%s '%s'" % (jobStr, job)) else: job = "./auto-thresh %d %d %s -o %s" % (dsNum, sub, dub, dsi.threshDir) if useJobQueue: sh("%s >& ./logs/thresh-ds%d-%d.txt" % (job, dsNum, sub)) else: sh("%s '%s'" % (jobStr, job))
def pandifySkim(dsNum, subNum=None, runNum=None, calList=[]): """ ./job-panda.py -pandify (-ds dsNum) (-sub dsNum subNum) (-run dsNum subNum) [-cal] Run ROOTtoPandas jobs. """ # bg if not calList: bkg = dsi.BkgInfo() dsMap = bkg.dsMap() # -ds if subNum == None and runNum == None: for i in range(dsMap[dsNum] + 1): sh("""%s 'python3 ./sandbox/ROOTtoPandas.py -ws %d %d -p -d %s %s'""" % (jobStr, dsNum, i, dsi.waveDir, dsi.pandaDir)) # -sub elif runNum == None: sh("""%s 'python3 ./sandbox/ROOTtoPandas.py -ws %d %d -p -d %s %s'""" % (jobStr, dsNum, subNum, dsi.waveDir, dsi.pandaDir)) # -run elif subNum == None: sh("""%s 'python3 ./sandbox/ROOTtoPandas.py -f %d %d -p -d %s %s'""" % (jobStr, dsNum, runNum, dsi.waveDir, dsi.pandaDir)) # cal else: for i in calList: sh("""%s 'python3 ./sandbox/ROOTtoPandas.py -f %d %d -p -d %s %s'""" % (jobStr, dsNum, i, dsi.calWaveDir, dsi.pandaDir))
def getThreshDB(): """ ./chan-sel.py -getThreshDB Just an example of getting all threshold values (accounting for sub-bIdx's) from the DB. """ calDB = db.TinyDB("%s/calDB-v2.json" % dsi.latSWDir) pars = db.Query() bkg = dsi.BkgInfo() # loop over datasets # for ds in [0,1,2,3,4,5,6]: for ds in [6]: dsNum = ds if isinstance(ds, int) else 5 goodChans = det.getGoodChanList(dsNum) for bkgIdx in bkg.getRanges(ds): # ==== loop over sub-ranges (when TF was run) ==== rFirst, rLast = bkg.getRanges(ds)[bkgIdx][0], bkg.getRanges(ds)[bkgIdx][-1] subRanges = bkg.GetSubRanges(ds,bkgIdx) if len(subRanges) == 0: subRanges.append((rFirst, rLast)) for subIdx, (runLo, runHi) in enumerate(subRanges): key = "thresh_ds%d_bkg%d_sub%d" % (dsNum, bkgIdx, subIdx) thD = dsi.getDBRecord(key, False, calDB, pars) print(key) for ch in thD: print(ch,":",thD[ch]) print("")
def runSkimmer(dsNum, subNum=None, runNum=None, calList=[]): """ ./job-panda.py [-q] -skim (-ds dsNum) (-sub dsNum subNum) (-run dsNum subNum) [-cal] Submit skim_mjd_data jobs. """ bkg = dsi.BkgInfo() # bkg if not calList: dub = "-d" if dsNum < 6 else "" dsMap = bkg.dsMap() # -ds if subNum == None and runNum == None: for i in range(dsMap[dsNum] + 1): if dsNum == 5 and i >= 113: dub = "" job = "./skim_mjd_data %d %d -l -g %s -t 0.7 %s" % ( dsNum, i, dub, dsi.skimDir) if useJobQueue: sh("%s >& ./logs/skim-ds%d-%d.txt" % (job, dsNum, i)) else: sh("%s '%s'" % (jobStr, job)) # -sub elif runNum == None: if dsNum == 5 and i >= 113: dub = "" job = "./skim_mjd_data %d %d -l -g %s -t 0.7 %s" % ( dsNum, subNum, dub, dsi.skimDir) if useJobQueue: sh("%s >& ./logs/skim-ds%d-%d.txt" % (job, dsNum, subNum)) else: sh("%s '%s'" % (jobStr, job)) # -run elif subNum == None: dub = "-d" if run < 23959 or run > 6000000 else "" job = "./skim_mjd_data -f %d -l -g %s -t 0.7 %s" % (runNum, dub, dsi.skimDir) if useJobQueue: sh("%s >& ./logs/skim-ds%d-run%d.txt" % (job, bkg.GetDSNum(run), run)) else: sh("%s '%s'" % (jobStr, job)) # cal else: for run in calList: dub = "-d" if run < 23959 or run > 6000000 else "" job = "./skim_mjd_data -f %d -l -g %s -t 0.7 %s" % (run, dub, dsi.calSkimDir) if useJobQueue: sh("%s >& ./logs/skim-ds%d-run%d.txt" % (job, bkg.GetDSNum(run), run)) else: sh("%s '%s'" % (jobStr, job))
def runWaveSkim(dsNum, subNum=None, runNum=None, calList=[]): """ ./job-panda.py [-q] -wave (-ds dsNum) (-sub dsNum subNum) (-run dsNum subNum) [-cal] Submit wave-skim jobs. """ bkg = dsi.BkgInfo() # bkg if not calList: dub = "-d" if dsNum < 6 else "" dsMap = bkg.dsMap() # -ds if subNum == None and runNum == None: for i in range(dsMap[dsNum] + 1): if dsNum == 5 and i >= 113: dub = "" job = "./wave-skim -n %s -r %d %d -p %s %s" % ( dub, dsNum, i, dsi.skimDir, dsi.waveDir) if useJobQueue: sh("%s >& ./logs/wave-ds%d-%d.txt" % (job, dsNum, i)) else: sh("%s '%s'" % (jobStr, job)) # -sub elif runNum == None: if dsNum == 5 and i >= 113: dub = "" job = "./wave-skim -n %s -r %d %d -p %s %s" % ( dub, dsNum, subNum, dsi.skimDir, dsi.waveDir) if useJobQueue: sh("%s >& ./logs/wave-ds%d-%d.txt" % (job, dsNum, subNum)) else: sh("%s '%s'" % (jobStr, job)) # -run elif subNum == None: dub = "-d" if run < 23959 or run > 6000000 else "" job = "./wave-skim -n %s -f %d %d -p %s %s" % ( dub, dsNum, runNum, dsi.skimDir, dsi.waveDir) if useJobQueue: sh("%s >& ./logs/wave-ds%d-run%d.txt" % (job, dsNum, runNum)) else: sh("%s '%s'" % (jobStr, job)) # cal else: for run in calList: dub = "-d" if run < 23959 or run > 6000000 else "" job = "./wave-skim -n %s -c -f %d %d -p %s %s" % ( dub, bkg.GetDSNum(run), run, dsi.calSkimDir, dsi.calWaveDir) if useJobQueue: sh("%s >& ./logs/wave-ds%d-run%d.txt" % (job, bkg.GetDSNum(run), run)) else: sh("%s '%s'" % (jobStr, job))
def writeCut(dsNum, subNum=None, runNum=None, calList=[]): """ ./job-panda.py -writeCut (-ds dsNum) (-sub dsNum subNum) (-run dsNum subNum) [-cal] Assumes the cut used in the FIRST file (even in the whole DS) should be applied to ALL files. This should be a relatively safe assumption. """ from ROOT import TFile, TNamed, TObject fileList = [] bkg = dsi.BkgInfo() # bg if not calList: dsMap = bkg.dsMap() # -ds if subNum == None and runNum == None: for i in range(dsMap[dsNum] + 1): inPath = "%s/splitSkimDS%d_%d*" % (dsi.splitDir, dsNum, i) fileList.extend(sorted(glob.glob(inPath))) # -sub elif runNum == None: inPath = "%s/splitSkimDS%d_%d*" % (dsi.splitDir, dsNum, subNum) fileList.extend(sorted(glob.glob(inPath))) # -run elif subNum == None: inPath = "%s/splitSkimDS%d_run%d*" % (dsi.splitDir, dsNum, runNum) fileList.extend(sorted(glob.glob(inPath))) # cal else: dsRanges = bkg.dsRanges() for run in calList: for key in dsRanges: if dsRanges[key][0] <= run <= dsRanges[key][1]: dsNum = key inPath = "%s/splitSkimDS%d_run%d*" % (dsi.calSplitDir, dsNum, run) fileList.extend(sorted(glob.glob(inPath))) # Pull the cut off the FIRST file and add it to the sub-files if len(fileList) <= 1: print("No files found! Exiting...") exit(1) firstFile = TFile(fileList[0]) theCut = firstFile.Get("theCut").GetTitle() print("Applying this cut:\n", theCut) for f in fileList: print(f) subRangeFile = TFile(f, "UPDATE") thisCut = TNamed("theCut", theCut) thisCut.Write("", TObject.kOverwrite)
def getThreshDB(): calDB = db.TinyDB("%s/calDB-v2.json" % dsi.latSWDir) pars = db.Query() bkg = dsi.BkgInfo() # loop over datasets for ds in [0, 1, 2, 3, 4, 5, 6]: dsNum = ds if isinstance(ds, int) else 5 goodChans = det.getGoodChanList(dsNum) for bkgIdx in bkg.getRanges(ds): # ==== loop over sub-ranges (when TF was run) ==== rFirst, rLast = bkg.getRanges(ds)[bkgIdx][0], bkg.getRanges( ds)[bkgIdx][-1] subRanges = bkg.GetSubRanges(ds, bkgIdx) if len(subRanges) == 0: subRanges.append((rFirst, rLast)) for subIdx, (runLo, runHi) in enumerate(subRanges): key = "thresh_ds%d_bkg%d_sub%d" % (dsNum, bkgIdx, subIdx) print(key)
def ds4Check(): """ For some reason the DS4 auto-thresh jobs didn't finish. Is it because the chains are super enormous? Result: It seems that GATDataSet is super slow for DS4. """ import time from ROOT import GATDataSet, TFile, TTree ds, sub = 4, 13 # ds, sub = 5, 47 bkg = dsi.BkgInfo() for bkgIdx in bkg.getRanges(ds): if bkgIdx not in [sub]: continue runList = bkg.getRunList(ds, bkgIdx) print("DS:",ds,"bkgIdx:",bkgIdx) gds = GATDataSet() for run in runList: start = time.time() gPath = gds.GetPathToRun(run,GATDataSet.kGatified) bPath = gds.GetPathToRun(run,GATDataSet.kBuilt) print(run, "%.4f" % (time.time()-start))
#!/usr/bin/env python import sys, os sys.argv.append("-b") import tinydb as db import numpy as np import waveLibs as wl import dsi bkg = dsi.BkgInfo() cal = dsi.CalInfo() det = dsi.DetInfo() from ROOT import TFile, TTree, MGTWaveform # switches fLimit = None # set to None to run over everything skipDS6Cal = True verbose = True testMode = False def main(argv): """ NOTE: don't use globs when getting files. Manually make sure everything is here. Can submit these commands as separate batch jobs: ./check-files.py -all ./check-files.py -c -all """ global checkCal checkCal = False if checkCal: print("Skip DS6 cal?", skipDS6Cal)
def fillThreshDB(): """ Fill threshold records in to LAT/calDB-v2.json. keys: thresh_ds[i]_bkg[j]_sub[k] vals: {[chan]:[50 pct mean, sigma, isGood (0 good, 1 bad)]} """ from ROOT import TFile, TTree # Do we actually want to write new values? Or just print stuff out? fillDB = True calDB = db.TinyDB("%s/calDB-v2.json" % dsi.latSWDir) pars = db.Query() bkg = dsi.BkgInfo() # loop over datasets and bkgIdx for ds in [0, 1, 2, 3, 4, "5A", "5B", "5C", 6]: dsNum = ds if isinstance(ds, int) else 5 goodChans = det.getGoodChanList(dsNum) for bkgIdx in bkg.getRanges(ds): # ==== loop over sub-ranges (when TF was run) ==== rFirst, rLast = bkg.getRanges(ds)[bkgIdx][0], bkg.getRanges( ds)[bkgIdx][-1] subRanges = bkg.GetSubRanges(ds, bkgIdx) if len(subRanges) == 0: subRanges.append((rFirst, rLast)) for subIdx, (runLo, runHi) in enumerate(subRanges): # Load threshold table if len(subRanges) > 1: fname = "%s/threshDS%d_%d_%d_%d.root" % ( dsi.threshDir, dsNum, bkgIdx, runLo, runHi) else: fname = "%s/threshDS%d_%d.root" % (dsi.threshDir, dsNum, bkgIdx) if not os.path.isfile(fname): print("Couldn't find file:", fname) return tf = TFile(fname) tt = tf.Get("threshTree") try: n = tt.GetEntries() except AttributeError: print("skipped", fname) continue if (n != 1): print("Hmm, %d thresh table entries? %s" % (n, fname)) return tt.GetEntry(0) rLo, rHi = tt.runMin, tt.runMax key = "thresh_ds%d_bkg%d_sub%d" % (dsNum, bkgIdx, subIdx) vals = {} print("") print(key, runLo, runHi) print( "chan CPD g threshKeV sig ADC err E=0 err nThr nNoise status note" ) # loop over channels nGood, nTot = 0, 0 for i in range(tt.channelList.size()): # keep only HG channels, exclude pulser monitors chan = tt.channelList.at(i) if chan % 2 != 0 or chan in det.getPMon(dsNum): continue # load results isGood = 1 if chan in goodChans else 0 thrKeV, thrSig = tt.threshCal.at(i), tt.sigmaCal.at(i) thrADC = tt.threshADC.at(i) thrADCErr = tt.threshADCErr.at(i) thrStatus = tt.threshFitStatus.at(i) thrEvts = tt.numTrigger.at(i) sigADC = tt.sigmaADC.at(i) sigADCErr = tt.sigmaADCErr.at(i) sigStatus = tt.sigmaFitStatus.at(i) sigEvts = tt.numNoise.at(i) calScale = tt.CalOffset.at(i) calOffset = tt.CalScale.at(i) # Error handling isBad = False status = "" if thrStatus == 999999 or sigStatus == 999999 or thrEvts < 10 or sigEvts < 10: status = "Not enough events" isBad = True elif (0 < thrStatus < 999999) or (0 < sigStatus < 999999): status = "auto-fit fail" isBad = True elif int(thrKeV) == 99999 or int(thrSig) == 99999: status = "Fit fail" isBad = True elif int(thrKeV) == 999999: status = "Bad energy calibration" isBad = True elif thrKeV < 0.3: status = "Unphysical threshold" isBad = True elif thrEvts < 100 or sigEvts < 100: status = "Low events" # this is ok if not isBad: nGood += 1 nTot += 1 # pretty print the results table if int(thrKeV) > 99998: print( "%d %s %d %-7.0f s %-7.0f %-4.3f %-6.3f %.2f %-6.2f %-8d %-8d %d %d %d %s" % (chan, det.getChanCPD( dsNum, chan), isGood, thrKeV, thrSig, thrADC, thrADCErr, sigADC, sigADCErr, thrEvts, sigEvts, thrStatus, sigStatus, int(isBad), status)) else: print( "%d %s %d %-7.3f s %-7.3f %-4.3f %-6.3f %.2f %-6.2f %-8d %-8d %d %d %d %s" % (chan, det.getChanCPD( dsNum, chan), isGood, thrKeV, thrSig, thrADC, thrADCErr, sigADC, sigADCErr, thrEvts, sigEvts, thrStatus, sigStatus, int(isBad), status)) # fill the dict vals vals[chan] = [ float("%.5f" % thrKeV), float("%.5f" % thrSig), int(isBad) ] print("good detectors: %d/%d" % (nGood, nTot)) # fill the DB if fillDB: dsi.setDBRecord({ "key": key, "vals": vals }, forceUpdate=True, calDB=calDB, pars=pars) tf.Close()
def runLAT(dsNum, subNum=None, runNum=None, calList=[]): """ ./job-panda.py [-q] -lat (-ds dsNum) (-sub dsNum subNum) (-run dsNum subNum) [-cal] Runs LAT on splitSkim output. Does not combine output files back together. """ bkg = dsi.BkgInfo() # bg if not calList: dsMap = bkg.dsMap() # -ds if subNum == None and runNum == None: for subNum in range(dsMap[dsNum] + 1): files = dsi.getSplitList( "%s/splitSkimDS%d_%d*" % (dsi.splitDir, dsNum, subNum), subNum) for idx, inFile in sorted(files.items()): outFile = "%s/latSkimDS%d_%d_%d.root" % (dsi.latDir, dsNum, subNum, idx) job = "./lat.py -b -r %d %d -p %s %s" % (dsNum, subNum, inFile, outFile) # jspl = job.split() # make SUPER sure stuff is matched # print(jspl[3],jspl[4],jspl[6].split("/")[-1],jspl[7].split("/")[-1]) if useJobQueue: sh("%s >& ./logs/lat-ds%d-%d-%d.txt" % (job, dsNum, subNum, idx)) else: sh("""%s '%s'""" % (jobStr, job)) # -sub elif runNum == None: files = dsi.getSplitList( "%s/splitSkimDS%d_%d*" % (dsi.splitDir, dsNum, subNum), subNum) for idx, inFile in sorted(files.items()): outFile = "%s/latSkimDS%d_%d_%d.root" % (dsi.latDir, dsNum, subNum, idx) job = "./lat.py -b -r %d %d -p %s %s" % (dsNum, subNum, inFile, outFile) if useJobQueue: sh("%s >& ./logs/lat-ds%d-run%d-%d.txt" % (job, dsNum, subNum, idx)) else: sh("""%s '%s'""" % (jobStr, job)) # -run elif subNum == None: files = dsi.getSplitList( "%s/splitSkimDS%d_run%d*" % (dsi.splitDir, dsNum, runNum), runNum) for idx, inFile in sorted(files.items()): outFile = "%s/latSkimDS%d_run%d_%d.root" % (dsi.latDir, dsNum, runNum, idx) job = "./lat.py -b -r %d %d -p %s %s" % (dsNum, runNum, inFile, outFile) if useJobQueue: sh("%s >& ./logs/lat-ds%d-run%d-%d.txt" % (job, dsNum, runNum, idx)) else: sh("""%s '%s'""" % (jobStr, job)) # cal else: dsRanges = bkg.dsRanges() for run in calList: for key in dsRanges: if dsRanges[key][0] <= run <= dsRanges[key][1]: dsNum = key files = dsi.getSplitList( "%s/splitSkimDS%d_run%d*" % (dsi.calSplitDir, dsNum, run), run) for idx, inFile in sorted(files.items()): outFile = "%s/latSkimDS%d_run%d_%d.root" % (dsi.calLatDir, dsNum, run, idx) job = "./lat.py -b -f %d %d -p %s %s" % (dsNum, run, inFile, outFile) if useJobQueue: sh("%s >& ./logs/lat-ds%d-run%d-%d.txt" % (job, dsNum, run, idx)) else: sh("""%s '%s'""" % (jobStr, job))
def batchSplit(dsNum, subNum=None, runNum=None, calList=[]): """ ./job-panda.py [-q] [-cal] -batchSplit (-ds dsNum) (-sub dsNum subNum) (-run dsNum subNum) Submit jobs that call splitTree for each run, splitting files into small chunks. NOTE: The data cleaning cut is NOT written into the output files and the function 'writeCut' must be called after these jobs are done. """ bkg = dsi.BkgInfo() # bg if not calList: dsMap = bkg.dsMap() # -ds if subNum == None and runNum == None: for i in range(dsMap[dsNum] + 1): inPath = "%s/waveSkimDS%d_%d.root" % (dsi.waveDir, dsNum, i) if not os.path.isfile(inPath): print("File", inPath, "not found. Continuing ...") continue else: job = "./job-panda.py -sub %d %d -split" % (dsNum, i) if useJobQueue: sh("%s >& ./logs/split-ds%d-%d.txt" % (job, dsNum, i)) else: sh("""%s '%s'""" % (jobStr, job)) # -sub elif runNum == None: inPath = "%s/waveSkimDS%d_%d.root" % (dsi.waveDir, dsNum, subNum) if not os.path.isfile(inPath): print("File", inPath, "not found.") return else: job = "./job-panda.py -sub %d %d -split" % (dsNum, i) if useJobQueue: sh("%s >& ./logs/split-ds%d-%d.txt" % (job, dsNum, i)) else: sh("""%s '%s'""" % (jobStr, job)) # -run elif subNum == None: inPath = "%s/waveSkimDS%d_run%d.root" % (dsi.waveDir, dsNum, runNum) if not os.path.isfile(inPath): print("File", inPath, "not found.") return else: job = "./job-panda.py -run %d %d -split" % (dsNum, runNum) if useJobQueue: sh("%s >& ./logs/split-ds%d-run%d.txt" % (job, dsNum, runNum)) else: sh("""%s '%s'""" % (jobStr, job)) # cal else: for run in calList: dsRanges = bkg.dsRanges() for key in dsRanges: if dsRanges[key][0] <= run <= dsRanges[key][1]: dsNum = key inPath = "%s/waveSkimDS%d_run%d.root" % (dsi.calWaveDir, dsNum, run) if not os.path.isfile(inPath): print("File", inPath, "not found. Continuing ...") continue else: job = "./job-panda.py -run %d %d -split" % (dsNum, run) if useJobQueue: sh("%s >& ./logs/split-ds%d-run%d.txt" % (job, dsNum, run)) else: sh("""%s '%s'""" % (jobStr, job))