Exemple #1
0
def runMain(samples, args=None, localobjs=None):
    if args == None: args = sys.argv
    selsamples = _filterSamples(samples, args)
    if "help" in args or "--help" in args or "-h" in args:
        print """

python samplefile.py test [--AAA] [samples] :
        tries accessing the first file of each sample
        option -AAA: allow AAA as fallback

python samplefile.py locality [samples] :
        check the locality of the samples

python samplefile.py refresh [samples] [ --pretend ] [ --suspicious ]: 
        forces a refresh of the cache
        option --pretend: print the list of samples to refresh, instead of actually refreshing them
        option --suspicious: selects for refresh the samples that look bogus (zero files, or zero events for official CMS datasets)

python samplefile.py list [samples]:  
python samplefile.py summary [samples]:   
        two equivalent commands that prints a list of samples, with number of files, events, equivalent luminosity, etc

python samplefile.py genXSecAna [samples] [ --pretend ] [ --verbose ] [ --AAA ]:  
        check the cross sections using genXSecAna on one of the files

python samplefile.py checkdecl:  
        check that all samples are declared in the samples list

"""
    if "test" in args:
        from CMGTools.RootTools.samples.ComponentCreator import testSamples
        testSamples(selsamples, allowAAA=("--AAA" in args))
    if "locality" in args:
        import re
        from CMGTools.Production.localityChecker import LocalityChecker
        tier2Checker = LocalityChecker("T2_CH_CERN", datasets="/*/*/MINIAOD*")
        for comp in selsamples:
            if len(comp.files) == 0:
                print '\033[34mE: Empty component: ' + comp.name + '\033[0m'
                continue
            if not hasattr(comp, 'dataset'): continue
            if not re.match("/[^/]+/[^/]+/MINIAOD(SIM)?", comp.dataset):
                continue
            if "/store/" not in comp.files[0]: continue
            if re.search("/store/(group|user|cmst3)/", comp.files[0]): continue
            if not tier2Checker.available(comp.dataset):
                print "\033[1;31mN: Dataset %s (%s) is not available on T2_CH_CERN\033[0m" % (
                    comp.name, comp.dataset)
            else:
                print "Y: Dataset %s (%s) is available on T2_CH_CERN" % (
                    comp.name, comp.dataset)
    if "refresh" in args:
        from CMGTools.Production.cacheChecker import CacheChecker
        checker = CacheChecker()
        for d in selsamples:
            if "--suspicious" in args:
                if len(d.files) > 0:
                    if "/store/mc " in d.files[0] or "/store/data" in d.files[
                            0]:
                        if getattr(d, 'dataset_entries', -1) > 0:
                            continue
                    else:
                        continue
            print "Checking ", d.name, " aka ", d.dataset
            if "--pretend" in args: continue
            checker.checkComp(d, verbose=True)
    if "check_versions" in args:
        from CMGTools.Production.datasetVersionChecker import DatasetVersionChecker
        checker = DatasetVersionChecker()
        for d in selsamples:
            if "--pretend" in args:
                print "Would check ", d.name, " aka ", d.dataset
            else:
                print "Checking", d.name, " ",
                checker.checkComp(d, verbose=True)
    if "list" in args or "summary" in args:
        from CMGTools.RootTools.samples.configTools import printSummary, mergeExtensions
        if "--merge-extensions" in args or "--mex" in args:
            selsamples = mergeExtensions(selsamples, verbose=True)[0]
        printSummary(selsamples)
    if "genXSecAna" in args:
        import subprocess, re
        if "--fetch" in args or not os.path.exists(
                "%s/src/genXSecAna.py" % os.environ['CMSSW_BASE']):
            print "Retrieving genXSecAna.py"
            os.system(
                "wget -O " + os.environ['CMSSW_BASE'] +
                "/src/genXSecAna.py  https://raw.githubusercontent.com/syuvivida/generator/master/cross_section/runJob/ana.py"
            )
        for d in selsamples:
            if not hasattr(d, 'xSection'):
                print "Skipping %s which has no cross section" % d.name
                continue
            if "--pretend" in args:
                print "Would check ", d.name, " aka ", d.dataset
                continue
            if "--AAA" in args:
                from CMGTools.Production.changeComponentAccessMode import convertComponent
                convertComponent(d, "root://cms-xrd-global.cern.ch/%s")
            else:  # use LFNs
                d.files = [
                    re.sub(".*(/store/.*)(?:\\?.*)?", "\\1", f)
                    for f in d.files[:]
                ]
            print "Sample %s: XS(sample file) = %g pb, ... " % (d.name,
                                                                d.xSection),
            if len(d.files) == 0:
                print "\n\033[01;31m ERROR: no files in sample, so cannot run the analyzer \033[00m"
                continue
            if "--verbose" in args:
                print "\n ".join([
                    "cmsRun", os.environ['CMSSW_BASE'] + "/src/genXSecAna.py",
                    "inputFiles=%s" % d.files[0], "maxEvents=-1"
                ])
            xsecAnaOut = subprocess.check_output([
                "cmsRun", os.environ['CMSSW_BASE'] + "/src/genXSecAna.py",
                "inputFiles=%s" % d.files[0], "maxEvents=-1"
            ],
                                                 stderr=subprocess.STDOUT)
            if "--verbose" in args:
                for l in xsecAnaOut.split("\n"):
                    print "\t>> " + l
            m = re.search(
                r"After filter: final cross section = (\S+) \+- (\S+) pb",
                xsecAnaOut)
            if m and float(m.group(1)) == 0:
                m = re.search(
                    r"Before matching: total cross section = (\S+) \+- (\S+) pb",
                    xsecAnaOut)
                m1 = re.search(
                    r"After matching: total cross section = (\S+) \+- (\S+) pb",
                    xsecAnaOut)
                if m1 and m and float(m1.group(1)) < 0 and float(
                        m.group(1)) > 0 and abs(
                            float(m1.group(1)) / float(m.group(1)) + 1) < 1e-2:
                    print "\033[01;33m [after filter Xsec is zero, using before filter one] \033[00m"
                else:
                    m = None
            if not m or float(m.group(1)) <= 0:
                print "\n\033[01;31m ERROR: could not find After filter cross section in the output, or it's zero. \033[00m"
                continue
            xs, xserr = float(m.group(1)), float(m.group(2))
            kfactor = d.xSection / xs
            if abs(xs - d.xSection) < min(3 * xserr, 1e-2 * xs):
                (col, stat) = '\033[01;36m', "OK"
            elif 0.8 < kfactor and kfactor < 1.4:
                (col, stat) = '\033[01;36m', "OK?"
            elif 0.5 < kfactor and kfactor < 2.0:
                (col, stat) = '\033[01;33m', "WARNING"
            else:
                (col, stat) = '\033[01;31m', "ERROR"
            m = re.search(
                r"After filter: final fraction of events with negative weights = (\S+) \+- (\S+)",
                xsecAnaOut)
            if m:
                fnegv, fnegerr = (float(m.group(1)), float(m.group(2)))
                fneg = "  f(negw): %.3f +- %.3f " % (fnegv, fnegerr)
                if getattr(d, 'fracNegWeights', None) != None:
                    if abs(d.fracNegWeights - fnegv) < 0.02:
                        fneg += "(%.3f in sample file, \033[01;36mOK\033[00m)" % d.fracNegWeights
                    else:
                        fneg += "(%.3f in sample file, \033[01;33mWARNING\033[00m)" % d.fracNegWeights
            else:
                fneg = ""
            print "XS(genAnalyzer) = %g +/- %g pb : %s kFactor = %g %s\033[00m%s" % (
                xs, xserr, col, kfactor, stat, fneg)
    if "checkdecl" in args:
        if localobjs == None:
            raise RuntimeError(
                "you have to runMain(samples,localobjs=locals())")
        import PhysicsTools.HeppyCore.framework.config as cfg
        ok = 0
        for name, obj in localobjs.iteritems():
            if name == "comp": continue  # local variable used in loops
            if isinstance(obj, cfg.Component):
                if obj not in samples:
                    print "\tERROR: component %s is not added to the samples list " % name
                elif obj.name != name:
                    print "\tERROR: component %s has inconsistent name %s " % (
                        name, obj.name)
                else:
                    ok += 1
        print "\tINFO: %d correctly declared components" % ok
Exemple #2
0
    else:
        configureSplittingFromTime(
            filter(lambda x: 'Double' in x.name or 'MuonEG' in x.name,
                   selectedComponents), 50, 5)
        configureSplittingFromTime(
            filter(lambda x: 'Single' in x.name, selectedComponents), 30, 5)

#printSummary(selectedComponents)

if runFRMC:
    QCD_Mu5 = [
        QCD_Pt20to30_Mu5, QCD_Pt30to50_Mu5, QCD_Pt50to80_Mu5,
        QCD_Pt80to120_Mu5, QCD_Pt120to170_Mu5, QCD_Pt170to300_Mu5
    ]
    autoAAA(QCDPtEMEnriched + QCDPtbcToE)
    QCDEm, _ = mergeExtensions(
        [q for q in QCDPtEMEnriched + QCDPtbcToE if "toInf" not in q.name])
    selectedComponents = [QCD_Mu15] + QCD_Mu5 + [
        WJetsToLNu_LO, DYJetsToLL_M10to50_LO, DYJetsToLL_M50_LO_ext
    ] + QCDEm
    selectedComponents = [
        TTJets_DiLepton
    ]  #TTJets_SingleLeptonFromT,TTJets_SingleLeptonFromTbar]
    selectedComponents = [TBar_tWch_noFullyHad, T_tWch_noFullyHad]
    TTJets_DiLepton.fineSplitFactor = 2
    #selectedComponents = TT_pow
    cropToLumi(selectedComponents, 1.0)
    time = 5.0
    extra = dict(maxFiles=10)
    configureSplittingFromTime([WJetsToLNu_LO], 20, time, **extra)
    configureSplittingFromTime([DYJetsToLL_M10to50_LO], 10, time, **extra)
    configureSplittingFromTime([DYJetsToLL_M50_LO_ext], 40, time, **extra)
        comp.vetoTriggers = vetoTriggers[:]
        dataSamples.append(comp)
    vetoTriggers += triggers[:]

selectedComponents = mcSamples + dataSamples
if getHeppyOption('selectComponents'):
    if getHeppyOption('selectComponents')=='MC':
        selectedComponents = mcSamples
    elif getHeppyOption('selectComponents')=='DATA':
        selectedComponents = dataSamples
    else:
        selectedComponents = byCompName(selectedComponents, getHeppyOption('selectComponents').split(","))
autoAAA(selectedComponents, quiet=not(getHeppyOption("verboseAAA",False)), redirectorAAA="xrootd-cms.infn.it") # must be done before mergeExtensions
configureSplittingFromTime(mcSamples,250 if preprocessor else 10,10)
configureSplittingFromTime(dataSamples,80 if preprocessor else 10,10)
selectedComponents, _ = mergeExtensions(selectedComponents, verbose=True)

# create and set preprocessor if requested
if getHeppyOption("nanoPreProcessor"):
    from CMGTools.Production.nanoAODPreprocessor import nanoAODPreprocessor
    preproc_cfg = {2016: ("mc94X2016","data94X2016"),
                   2017: ("mc94Xv2","data94Xv2"),
                   2018: ("mc102X","data102X_ABC","data102X_D")}
    preproc_cmsswArea = "/afs/cern.ch/work/v/vtavolar/SusySOSSW_2_clean/nanoAOD/clean/CMSSW_10_2_16_UL"#/afs/cern.ch/user/v/vtavolar/work/SusySOSSW_2_clean/nanoAOD/CMSSW_10_2_15" #MODIFY ACCORDINGLY
    preproc_mc = nanoAODPreprocessor(cfg='%s/src/PhysicsTools/NanoAOD/test/%s_NANO.py'%(preproc_cmsswArea,preproc_cfg[year][0]),cmsswArea=preproc_cmsswArea,keepOutput=True)
    if year==2018:
        preproc_data_ABC = nanoAODPreprocessor(cfg='%s/src/PhysicsTools/NanoAOD/test/%s_NANO.py'%(preproc_cmsswArea,preproc_cfg[year][1]),cmsswArea=preproc_cmsswArea,keepOutput=True, injectTriggerFilter=True, injectJSON=True)
        preproc_data_D = nanoAODPreprocessor(cfg='%s/src/PhysicsTools/NanoAOD/test/%s_NANO.py'%(preproc_cmsswArea,preproc_cfg[year][2]),cmsswArea=preproc_cmsswArea,keepOutput=True, injectTriggerFilter=True, injectJSON=True)
        for comp in selectedComponents:
            if comp.isData:
                comp.preprocessor = preproc_data_D if '2018D' in comp.name else preproc_data_ABC
        from CMGTools.RootTools.samples.samples_13TeV_DATA2017 import dataSamples_31Mar2018 as allData
    elif year == 2016:
        from CMGTools.RootTools.samples.samples_13TeV_RunIISummer16MiniAODv3 import samples as mcSamples_
        from CMGTools.RootTools.samples.samples_13TeV_DATA2016 import dataSamples_17Jul2018 as allData
else:
    if year == 2018:
        from CMGTools.RootTools.samples.samples_13TeV_RunIIAutumn18NanoAODv4 import samples as mcSamples_
        from CMGTools.RootTools.samples.samples_13TeV_DATA2018_NanoAOD import dataSamples_1June2019 as allData
    elif year == 2017:
        from CMGTools.RootTools.samples.samples_13TeV_RunIIFall17NanoAODv4 import samples as mcSamples_
        from CMGTools.RootTools.samples.samples_13TeV_DATA2017_NanoAOD import dataSamples_1June2019 as allData
    elif year == 2016:
        from CMGTools.RootTools.samples.samples_13TeV_RunIISummer16NanoAODv4 import samples as mcSamples_
        from CMGTools.RootTools.samples.samples_13TeV_DATA2016_NanoAOD import dataSamples_1June2019 as allData
autoAAA(mcSamples_+allData, quiet=not(getHeppyOption("verboseAAA",False))) # must be done before mergeExtensions
mcSamples_, _ = mergeExtensions(mcSamples_)

# Triggers
if year == 2018:
    from CMGTools.RootTools.samples.triggers_13TeV_DATA2018 import all_triggers as triggers
elif year == 2017:
    from CMGTools.RootTools.samples.triggers_13TeV_DATA2017 import all_triggers as triggers
    triggers["FR_1mu_iso"] = [] # they probably existed but we didn't use them in 2017
elif year == 2016:
    from CMGTools.RootTools.samples.triggers_13TeV_DATA2016 import all_triggers as triggers
    triggers["FR_1mu_noiso_smpd"] = [] 


DatasetsAndTriggers = []
if analysis == "main":
    mcSamples = byCompName(mcSamples_, [
Exemple #5
0
for pd, triggers in DatasetsAndTriggers:
    for comp in byCompName(allData, [pd]):
        comp.triggers = triggers[:]
        comp.vetoTriggers = vetoTriggers[:]
        dataSamples.append(comp)
    vetoTriggers += triggers[:]

selectedComponents = mcSamples + dataSamples
if getHeppyOption('selectComponents'):
    selectedComponents = byCompName(
        selectedComponents,
        getHeppyOption('selectComponents').split(","))
autoAAA(selectedComponents, quiet=not (getHeppyOption("verboseAAA", False)))
configureSplittingFromTime(mcSamples, 250 if preprocessor else 10, 10)
configureSplittingFromTime(dataSamples, 80 if preprocessor else 10, 10)
selectedComponents, _ = mergeExtensions(selectedComponents)

# create and set preprocessor if requested
if getHeppyOption("nanoPreProcessor"):
    from CMGTools.Production.nanoAODPreprocessor import nanoAODPreprocessor
    preproc_cfg = {
        2016: ("mc94X2016", "data94X2016"),
        2017: ("mc94Xv2", "data94Xv2"),
        2018: ("mc102X", "data102X_ABC", "data102X_D")
    }
    preproc_cmsswArea = "/afs/cern.ch/user/v/vtavolar/work/SusySOSSW_2_clean/nanoAOD/CMSSW_10_2_15"  #MODIFY ACCORDINGLY
    preproc_mc = nanoAODPreprocessor(
        cfg='%s/src/PhysicsTools/NanoAOD/test/%s_NANO.py' %
        (preproc_cmsswArea, preproc_cfg[year][0]),
        cmsswArea=preproc_cmsswArea,
        keepOutput=True)
]  # X+G # MISSING: WGToLNuG_amcatnlo_ext,WGToLNuG_amcatnlo_ext2,ZGTo2LG_ext
v_jets = [WJetsToLNu_LO, DYJetsToLL_M10to50_LO, DYJetsToLL_M50_LO,
          WWTo2L2Nu]  # V+jets
v_jets_more = [W1JetsToLNu_LO, W2JetsToLNu_LO, W3JetsToLNu_LO, W4JetsToLNu_LO
               ] + DYNJetsToLL + DYJetsToLLM4to50HT
tt_1l = [TTSemi_pow, TTJets_SingleLeptonFromT,
         TTJets_SingleLeptonFromTbar]  # TT 1l
tt_2l = [TTLep_pow, TTJets_DiLepton]  # TT 2l
boson = [WZTo3LNu_fxfx] + TriBosons  # multi-boson # MISSING: WZTo3LNu_pow

samples_slow = sig_ttv + ttv_lo + rares + convs + boson + tt_2l
samples_fast = single_t + v_jets + tt_1l + v_jets_more

if getHeppyOption("mergeExtensions"):
    verbose = (getHeppyOption("mergeExtensions") != "quiet")
    samples_slow = mergeExtensions(samples_slow, verbose=verbose)[0]
    samples_fast = mergeExtensions(samples_fast, verbose=verbose)[0]

cropToLumi(rares, 500)
cropToLumi(v_jets_more, 50)
configureSplittingFromTime(samples_fast, 50, 3)
configureSplittingFromTime(samples_slow, 100, 3)

selectedComponents = samples_slow + samples_fast

if scaleProdToLumi > 0:  # select only a subset of a sample, corresponding to a given luminosity (assuming ~30k events per MiniAOD file, which is ok for central production)
    target_lumi = scaleProdToLumi  # in inverse picobarns
    for c in selectedComponents:
        if not c.isMC: continue
        nfiles = int(min(ceil(target_lumi * c.xSection / 30e3), len(c.files)))
        #if nfiles < 50: nfiles = min(4*nfiles, len(c.files))