def runMain(samples, args=None, localobjs=None): if args == None: args = sys.argv selsamples = _filterSamples(samples, args) if "help" in args or "--help" in args or "-h" in args: print """ python samplefile.py test [--AAA] [samples] : tries accessing the first file of each sample option -AAA: allow AAA as fallback python samplefile.py locality [samples] : check the locality of the samples python samplefile.py refresh [samples] [ --pretend ] [ --suspicious ]: forces a refresh of the cache option --pretend: print the list of samples to refresh, instead of actually refreshing them option --suspicious: selects for refresh the samples that look bogus (zero files, or zero events for official CMS datasets) python samplefile.py list [samples]: python samplefile.py summary [samples]: two equivalent commands that prints a list of samples, with number of files, events, equivalent luminosity, etc python samplefile.py genXSecAna [samples] [ --pretend ] [ --verbose ] [ --AAA ]: check the cross sections using genXSecAna on one of the files python samplefile.py checkdecl: check that all samples are declared in the samples list """ if "test" in args: from CMGTools.RootTools.samples.ComponentCreator import testSamples testSamples(selsamples, allowAAA=("--AAA" in args)) if "locality" in args: import re from CMGTools.Production.localityChecker import LocalityChecker tier2Checker = LocalityChecker("T2_CH_CERN", datasets="/*/*/MINIAOD*") for comp in selsamples: if len(comp.files) == 0: print '\033[34mE: Empty component: ' + comp.name + '\033[0m' continue if not hasattr(comp, 'dataset'): continue if not re.match("/[^/]+/[^/]+/MINIAOD(SIM)?", comp.dataset): continue if "/store/" not in comp.files[0]: continue if re.search("/store/(group|user|cmst3)/", comp.files[0]): continue if not tier2Checker.available(comp.dataset): print "\033[1;31mN: Dataset %s (%s) is not available on T2_CH_CERN\033[0m" % ( comp.name, comp.dataset) else: print "Y: Dataset %s (%s) is available on T2_CH_CERN" % ( comp.name, comp.dataset) if "refresh" in args: from CMGTools.Production.cacheChecker import CacheChecker checker = CacheChecker() for d in selsamples: if "--suspicious" in args: if len(d.files) > 0: if "/store/mc " in d.files[0] or "/store/data" in d.files[ 0]: if getattr(d, 'dataset_entries', -1) > 0: continue else: continue print "Checking ", d.name, " aka ", d.dataset if "--pretend" in args: continue checker.checkComp(d, verbose=True) if "check_versions" in args: from CMGTools.Production.datasetVersionChecker import DatasetVersionChecker checker = DatasetVersionChecker() for d in selsamples: if "--pretend" in args: print "Would check ", d.name, " aka ", d.dataset else: print "Checking", d.name, " ", checker.checkComp(d, verbose=True) if "list" in args or "summary" in args: from CMGTools.RootTools.samples.configTools import printSummary, mergeExtensions if "--merge-extensions" in args or "--mex" in args: selsamples = mergeExtensions(selsamples, verbose=True)[0] printSummary(selsamples) if "genXSecAna" in args: import subprocess, re if "--fetch" in args or not os.path.exists( "%s/src/genXSecAna.py" % os.environ['CMSSW_BASE']): print "Retrieving genXSecAna.py" os.system( "wget -O " + os.environ['CMSSW_BASE'] + "/src/genXSecAna.py https://raw.githubusercontent.com/syuvivida/generator/master/cross_section/runJob/ana.py" ) for d in selsamples: if not hasattr(d, 'xSection'): print "Skipping %s which has no cross section" % d.name continue if "--pretend" in args: print "Would check ", d.name, " aka ", d.dataset continue if "--AAA" in args: from CMGTools.Production.changeComponentAccessMode import convertComponent convertComponent(d, "root://cms-xrd-global.cern.ch/%s") else: # use LFNs d.files = [ re.sub(".*(/store/.*)(?:\\?.*)?", "\\1", f) for f in d.files[:] ] print "Sample %s: XS(sample file) = %g pb, ... " % (d.name, d.xSection), if len(d.files) == 0: print "\n\033[01;31m ERROR: no files in sample, so cannot run the analyzer \033[00m" continue if "--verbose" in args: print "\n ".join([ "cmsRun", os.environ['CMSSW_BASE'] + "/src/genXSecAna.py", "inputFiles=%s" % d.files[0], "maxEvents=-1" ]) xsecAnaOut = subprocess.check_output([ "cmsRun", os.environ['CMSSW_BASE'] + "/src/genXSecAna.py", "inputFiles=%s" % d.files[0], "maxEvents=-1" ], stderr=subprocess.STDOUT) if "--verbose" in args: for l in xsecAnaOut.split("\n"): print "\t>> " + l m = re.search( r"After filter: final cross section = (\S+) \+- (\S+) pb", xsecAnaOut) if m and float(m.group(1)) == 0: m = re.search( r"Before matching: total cross section = (\S+) \+- (\S+) pb", xsecAnaOut) m1 = re.search( r"After matching: total cross section = (\S+) \+- (\S+) pb", xsecAnaOut) if m1 and m and float(m1.group(1)) < 0 and float( m.group(1)) > 0 and abs( float(m1.group(1)) / float(m.group(1)) + 1) < 1e-2: print "\033[01;33m [after filter Xsec is zero, using before filter one] \033[00m" else: m = None if not m or float(m.group(1)) <= 0: print "\n\033[01;31m ERROR: could not find After filter cross section in the output, or it's zero. \033[00m" continue xs, xserr = float(m.group(1)), float(m.group(2)) kfactor = d.xSection / xs if abs(xs - d.xSection) < min(3 * xserr, 1e-2 * xs): (col, stat) = '\033[01;36m', "OK" elif 0.8 < kfactor and kfactor < 1.4: (col, stat) = '\033[01;36m', "OK?" elif 0.5 < kfactor and kfactor < 2.0: (col, stat) = '\033[01;33m', "WARNING" else: (col, stat) = '\033[01;31m', "ERROR" m = re.search( r"After filter: final fraction of events with negative weights = (\S+) \+- (\S+)", xsecAnaOut) if m: fnegv, fnegerr = (float(m.group(1)), float(m.group(2))) fneg = " f(negw): %.3f +- %.3f " % (fnegv, fnegerr) if getattr(d, 'fracNegWeights', None) != None: if abs(d.fracNegWeights - fnegv) < 0.02: fneg += "(%.3f in sample file, \033[01;36mOK\033[00m)" % d.fracNegWeights else: fneg += "(%.3f in sample file, \033[01;33mWARNING\033[00m)" % d.fracNegWeights else: fneg = "" print "XS(genAnalyzer) = %g +/- %g pb : %s kFactor = %g %s\033[00m%s" % ( xs, xserr, col, kfactor, stat, fneg) if "checkdecl" in args: if localobjs == None: raise RuntimeError( "you have to runMain(samples,localobjs=locals())") import PhysicsTools.HeppyCore.framework.config as cfg ok = 0 for name, obj in localobjs.iteritems(): if name == "comp": continue # local variable used in loops if isinstance(obj, cfg.Component): if obj not in samples: print "\tERROR: component %s is not added to the samples list " % name elif obj.name != name: print "\tERROR: component %s has inconsistent name %s " % ( name, obj.name) else: ok += 1 print "\tINFO: %d correctly declared components" % ok
else: configureSplittingFromTime( filter(lambda x: 'Double' in x.name or 'MuonEG' in x.name, selectedComponents), 50, 5) configureSplittingFromTime( filter(lambda x: 'Single' in x.name, selectedComponents), 30, 5) #printSummary(selectedComponents) if runFRMC: QCD_Mu5 = [ QCD_Pt20to30_Mu5, QCD_Pt30to50_Mu5, QCD_Pt50to80_Mu5, QCD_Pt80to120_Mu5, QCD_Pt120to170_Mu5, QCD_Pt170to300_Mu5 ] autoAAA(QCDPtEMEnriched + QCDPtbcToE) QCDEm, _ = mergeExtensions( [q for q in QCDPtEMEnriched + QCDPtbcToE if "toInf" not in q.name]) selectedComponents = [QCD_Mu15] + QCD_Mu5 + [ WJetsToLNu_LO, DYJetsToLL_M10to50_LO, DYJetsToLL_M50_LO_ext ] + QCDEm selectedComponents = [ TTJets_DiLepton ] #TTJets_SingleLeptonFromT,TTJets_SingleLeptonFromTbar] selectedComponents = [TBar_tWch_noFullyHad, T_tWch_noFullyHad] TTJets_DiLepton.fineSplitFactor = 2 #selectedComponents = TT_pow cropToLumi(selectedComponents, 1.0) time = 5.0 extra = dict(maxFiles=10) configureSplittingFromTime([WJetsToLNu_LO], 20, time, **extra) configureSplittingFromTime([DYJetsToLL_M10to50_LO], 10, time, **extra) configureSplittingFromTime([DYJetsToLL_M50_LO_ext], 40, time, **extra)
comp.vetoTriggers = vetoTriggers[:] dataSamples.append(comp) vetoTriggers += triggers[:] selectedComponents = mcSamples + dataSamples if getHeppyOption('selectComponents'): if getHeppyOption('selectComponents')=='MC': selectedComponents = mcSamples elif getHeppyOption('selectComponents')=='DATA': selectedComponents = dataSamples else: selectedComponents = byCompName(selectedComponents, getHeppyOption('selectComponents').split(",")) autoAAA(selectedComponents, quiet=not(getHeppyOption("verboseAAA",False)), redirectorAAA="xrootd-cms.infn.it") # must be done before mergeExtensions configureSplittingFromTime(mcSamples,250 if preprocessor else 10,10) configureSplittingFromTime(dataSamples,80 if preprocessor else 10,10) selectedComponents, _ = mergeExtensions(selectedComponents, verbose=True) # create and set preprocessor if requested if getHeppyOption("nanoPreProcessor"): from CMGTools.Production.nanoAODPreprocessor import nanoAODPreprocessor preproc_cfg = {2016: ("mc94X2016","data94X2016"), 2017: ("mc94Xv2","data94Xv2"), 2018: ("mc102X","data102X_ABC","data102X_D")} preproc_cmsswArea = "/afs/cern.ch/work/v/vtavolar/SusySOSSW_2_clean/nanoAOD/clean/CMSSW_10_2_16_UL"#/afs/cern.ch/user/v/vtavolar/work/SusySOSSW_2_clean/nanoAOD/CMSSW_10_2_15" #MODIFY ACCORDINGLY preproc_mc = nanoAODPreprocessor(cfg='%s/src/PhysicsTools/NanoAOD/test/%s_NANO.py'%(preproc_cmsswArea,preproc_cfg[year][0]),cmsswArea=preproc_cmsswArea,keepOutput=True) if year==2018: preproc_data_ABC = nanoAODPreprocessor(cfg='%s/src/PhysicsTools/NanoAOD/test/%s_NANO.py'%(preproc_cmsswArea,preproc_cfg[year][1]),cmsswArea=preproc_cmsswArea,keepOutput=True, injectTriggerFilter=True, injectJSON=True) preproc_data_D = nanoAODPreprocessor(cfg='%s/src/PhysicsTools/NanoAOD/test/%s_NANO.py'%(preproc_cmsswArea,preproc_cfg[year][2]),cmsswArea=preproc_cmsswArea,keepOutput=True, injectTriggerFilter=True, injectJSON=True) for comp in selectedComponents: if comp.isData: comp.preprocessor = preproc_data_D if '2018D' in comp.name else preproc_data_ABC
from CMGTools.RootTools.samples.samples_13TeV_DATA2017 import dataSamples_31Mar2018 as allData elif year == 2016: from CMGTools.RootTools.samples.samples_13TeV_RunIISummer16MiniAODv3 import samples as mcSamples_ from CMGTools.RootTools.samples.samples_13TeV_DATA2016 import dataSamples_17Jul2018 as allData else: if year == 2018: from CMGTools.RootTools.samples.samples_13TeV_RunIIAutumn18NanoAODv4 import samples as mcSamples_ from CMGTools.RootTools.samples.samples_13TeV_DATA2018_NanoAOD import dataSamples_1June2019 as allData elif year == 2017: from CMGTools.RootTools.samples.samples_13TeV_RunIIFall17NanoAODv4 import samples as mcSamples_ from CMGTools.RootTools.samples.samples_13TeV_DATA2017_NanoAOD import dataSamples_1June2019 as allData elif year == 2016: from CMGTools.RootTools.samples.samples_13TeV_RunIISummer16NanoAODv4 import samples as mcSamples_ from CMGTools.RootTools.samples.samples_13TeV_DATA2016_NanoAOD import dataSamples_1June2019 as allData autoAAA(mcSamples_+allData, quiet=not(getHeppyOption("verboseAAA",False))) # must be done before mergeExtensions mcSamples_, _ = mergeExtensions(mcSamples_) # Triggers if year == 2018: from CMGTools.RootTools.samples.triggers_13TeV_DATA2018 import all_triggers as triggers elif year == 2017: from CMGTools.RootTools.samples.triggers_13TeV_DATA2017 import all_triggers as triggers triggers["FR_1mu_iso"] = [] # they probably existed but we didn't use them in 2017 elif year == 2016: from CMGTools.RootTools.samples.triggers_13TeV_DATA2016 import all_triggers as triggers triggers["FR_1mu_noiso_smpd"] = [] DatasetsAndTriggers = [] if analysis == "main": mcSamples = byCompName(mcSamples_, [
for pd, triggers in DatasetsAndTriggers: for comp in byCompName(allData, [pd]): comp.triggers = triggers[:] comp.vetoTriggers = vetoTriggers[:] dataSamples.append(comp) vetoTriggers += triggers[:] selectedComponents = mcSamples + dataSamples if getHeppyOption('selectComponents'): selectedComponents = byCompName( selectedComponents, getHeppyOption('selectComponents').split(",")) autoAAA(selectedComponents, quiet=not (getHeppyOption("verboseAAA", False))) configureSplittingFromTime(mcSamples, 250 if preprocessor else 10, 10) configureSplittingFromTime(dataSamples, 80 if preprocessor else 10, 10) selectedComponents, _ = mergeExtensions(selectedComponents) # create and set preprocessor if requested if getHeppyOption("nanoPreProcessor"): from CMGTools.Production.nanoAODPreprocessor import nanoAODPreprocessor preproc_cfg = { 2016: ("mc94X2016", "data94X2016"), 2017: ("mc94Xv2", "data94Xv2"), 2018: ("mc102X", "data102X_ABC", "data102X_D") } preproc_cmsswArea = "/afs/cern.ch/user/v/vtavolar/work/SusySOSSW_2_clean/nanoAOD/CMSSW_10_2_15" #MODIFY ACCORDINGLY preproc_mc = nanoAODPreprocessor( cfg='%s/src/PhysicsTools/NanoAOD/test/%s_NANO.py' % (preproc_cmsswArea, preproc_cfg[year][0]), cmsswArea=preproc_cmsswArea, keepOutput=True)
] # X+G # MISSING: WGToLNuG_amcatnlo_ext,WGToLNuG_amcatnlo_ext2,ZGTo2LG_ext v_jets = [WJetsToLNu_LO, DYJetsToLL_M10to50_LO, DYJetsToLL_M50_LO, WWTo2L2Nu] # V+jets v_jets_more = [W1JetsToLNu_LO, W2JetsToLNu_LO, W3JetsToLNu_LO, W4JetsToLNu_LO ] + DYNJetsToLL + DYJetsToLLM4to50HT tt_1l = [TTSemi_pow, TTJets_SingleLeptonFromT, TTJets_SingleLeptonFromTbar] # TT 1l tt_2l = [TTLep_pow, TTJets_DiLepton] # TT 2l boson = [WZTo3LNu_fxfx] + TriBosons # multi-boson # MISSING: WZTo3LNu_pow samples_slow = sig_ttv + ttv_lo + rares + convs + boson + tt_2l samples_fast = single_t + v_jets + tt_1l + v_jets_more if getHeppyOption("mergeExtensions"): verbose = (getHeppyOption("mergeExtensions") != "quiet") samples_slow = mergeExtensions(samples_slow, verbose=verbose)[0] samples_fast = mergeExtensions(samples_fast, verbose=verbose)[0] cropToLumi(rares, 500) cropToLumi(v_jets_more, 50) configureSplittingFromTime(samples_fast, 50, 3) configureSplittingFromTime(samples_slow, 100, 3) selectedComponents = samples_slow + samples_fast if scaleProdToLumi > 0: # select only a subset of a sample, corresponding to a given luminosity (assuming ~30k events per MiniAOD file, which is ok for central production) target_lumi = scaleProdToLumi # in inverse picobarns for c in selectedComponents: if not c.isMC: continue nfiles = int(min(ceil(target_lumi * c.xSection / 30e3), len(c.files))) #if nfiles < 50: nfiles = min(4*nfiles, len(c.files))