Example #1
0
def doit(*x):
    print x
    ll_fn, lumi_fn, check_intlumi_sum, goal, out_fn = x
    check_intlumi_sum *= 1e9 # csv in /ub

    random.seed(8675309)
    
    goal *= check_intlumi_sum

    in_ll = LumiList(ll_fn).getLumis()
    
    intlumis, intlumi_sum = intlumi_from_brilcalc_csv(lumi_fn, False)
    assert abs(intlumi_sum - check_intlumi_sum) < 1e6
       
    tot = 0.
    out_ll = []
    
    while tot < goal:
        i = random.randrange(len(in_ll))
        rl = in_ll.pop(i)
        #if not intlumis.has_key(rl):
        #    continue
        tot += intlumis[rl]
        out_ll.append(rl)
    
    print 'tot = %f, picked %i lumis' % (tot, len(out_ll))
    LumiList(lumis=out_ll).writeJSON(out_fn)
 def getStrangeRuns(self):
   hltInfoByLs =self.analysisOutput()
   strangeRunsLumis = LumiList( lumis = [[int(hltInf[0]),int(hltInf[1])] for hltInf in hltInfoByLs if not int(hltInf[2][1]) >= 1] )
   if self.jsonOutput:
     strangeRunsLumis.writeJSON(jsonOutput+"_strange")
   else:
     return strangeRunsLumis
Example #3
0
    def __init__(self, name, nanotrees, weight, triggers, jess, jers, mc,
                 injfile, outjfile):
        self.jess = jess
        self.jers = jers
        self.mc = mc
        self.weight = weight
        self.__book__(name)

        # get json file
        myList = LumiList(filename=injfile)
        # initialize output lumilist
        myrunlumi = []
        # open root files
        files = open(nanotrees, "r")
        #		for file in glob.glob(nanotrees + "*root"):
        for file in files.read().splitlines():
            self.Fill(file, triggers, myList, myrunlumi)

        # only do this for data
        if not (self.mc):
            outList = LumiList(lumis=myrunlumi)
            outList.writeJSON(outjfile)

        print "got to the end"
        files.close()
        self.O.cd()
        self.O.Write()
        self.O.Close()
Example #4
0
def combine_grls(grl1,grl2):
    lumis1 = LumiList(compactList=grl1)
    lumis2 = LumiList(compactList=grl2)
    
    new_lumis = lumis1 & lumis2
#    print new_lumis.compactList
    return new_lumis.compactList
 def createDataDatasets(self):
   self.dataDatasets = {}
   for d in self.datasets:
     dsLumiList = None
     if not os.path.isfile(d['json']):
       oldSsArgv = sys.argv; sys.argv=[] # sys argv fix
       dasC = dasTools.myDasClient();dasC.limit=0
       dsLumiList = dasC.getJsonOfDataset(d["dataset"])
       dsLumiList.writeJSON(d['json'])
       sys.argv = oldSsArgv
     else:
       dsLumiList = LumiList(compactList=json.load(open(d['json'])))
     dsRuns = dsLumiList.getRuns()
     self.dataDatasets[d['label']] = ('{ \n '
       '\t"xSec":None\n'
       '\t,"localFile":None\n'
       '\t,"datasetName":"'+d["dataset"]+'"\n'
       '\t,"label":"Data_'+d['label']+'"\n'
       '\t,"datasetJSON":"'+d['json']+'"\n'
       '\t,"crabConfig":{\n'
         '\t\t"CMSSW":{"lumis_per_job":5\n'
           '\t\t\t,"lumi_mask": os.getenv("CMSSW_BASE") + '+'"/'+d['goldenJson'].lstrip('/')+'"\n'
           '\t\t\t,"total_number_of_lumis" : -1}\n'
         '\t\t}\n'
       '\t,"color":0\n'
       '\t,"runRange":"'+str(dsRuns[0])+"-"+str(dsRuns[-1])+'"\n'
     '\t}\n');
Example #6
0
def mk_secondary_lumimask(dset):

    dq = das_query("file dataset=%s instance=prod/phys03" % dset,
                   cmd='dasgoclient --dasmaps=./')
    assert 'data' in dq.keys()
    fs = [str(f['file'][0]['name']) for f in dq['data']]
    #fs = fs[:2]
    print('N files:', len(fs))

    lumis = []
    dqs = [
        das_query("lumi file=%s instance=prod/phys03" % f,
                  cmd='dasgoclient --dasmaps=./') for f in fs
    ]
    for dq in dqs:
        for data in dq['data']:
            for lumi in data['lumi'][0]['lumi_section_num']:
                lumis.append([data['lumi'][0]['run_number'], lumi])

    jsonList = LumiList(lumis=lumis)
    #print(jsonList)
    output_file = dset.split('/')[2].split('-')[1].split('_')[0]
    #print(output_file)
    jsonList.writeJSON(output_dir + output_file +
                       '_3photons_imgskim_lumi_list.json')
Example #7
0
def getLumiList(lumi_mask_name, logger=None):
    """
    Takes a lumi-mask and returns a LumiList object.
    lumi-mask: either an http address or a json file on disk.
    """
    lumi_list = None
    parts = urlparse(lumi_mask_name)
    if parts[0] in ['http', 'https']:
        if logger:
            logger.debug('Downloading lumi-mask from %s' % lumi_mask_name)
        try:
            lumi_list = LumiList(url=lumi_mask_name)
        except Exception as err:
            raise ConfigurationException(
                "CMSSW failed to get lumimask from URL. Please try to download the lumimask yourself and point to it in crabConfig;\n%s"
                % str(err))
    else:
        if logger:
            logger.debug('Reading lumi-mask from %s' % lumi_mask_name)
        try:
            lumi_list = LumiList(filename=lumi_mask_name)
        except IOError as err:
            raise ConfigurationException("Problem loading lumi-mask file; %s" %
                                         str(err))

    return lumi_list
 def getGoodRuns(self):
   hltInfoByLs =self.analysisOutput()
   goodRunsAndLumis = LumiList(lumis = [[int(hltInf[0]),int(hltInf[1])] for hltInf in hltInfoByLs if int(hltInf[2][1]) == 1] )
   if self.jsonOutput:
     goodRunsAndLumis.writeJSON(jsonOutput+"_good")
     return jsonOutput+"_good"
   else:
     return goodRunsAndLumis
 def getPrescaledRuns(self):
   hltInfoByLs =self.analysisOutput()
   prescaledRunsAndLumis = LumiList( lumis = [[int(hltInf[0]),int(hltInf[1])] for hltInf in hltInfoByLs if int(hltInf[2][1]) >= 1] )
   if self.jsonOutput:
     prescaledRunsAndLumis.writeJSON(jsonOutput+"_prescaled")
     return jsonOutput+"_prescaled"
   else:
     return prescaledRunsAndLumis
Example #10
0
    def getDatasetLumiList(self, name, catalog):
        from FWCore.PythonUtilities.LumiList import LumiList

        dlist = LumiList()
        for fil in catalog[name]["files"]:
            flist = LumiList(runsAndLumis=fil.get("lumis", {}))
            dlist += flist

        return dlist
Example #11
0
def files_for_json(json_fn, dataset, instance='global'):
    json = LumiList(json_fn)
    files = set()
    for file, run_lumis in file_details_run_lumis(dataset,
                                                  instance).iteritems():
        ll = LumiList(runsAndLumis=run_lumis)
        if json & ll:
            files.add(file)
    return sorted(files)
Example #12
0
def getRuns(name=None,bfield=None,bunchSpacing=None):
	ll = LumiList()
	for rp in runPeriods:
		if name is None or rp.name == name:
			if bfield is None or rp.bfield == bfield:
				if bunchSpacing is None or rp.bunchSpacing == bunchSpacing:
					newll = LumiListForRunPeriod(rp)
					ll += LumiListForRunPeriod(rp)
	return ll.getRuns()
Example #13
0
def getRuns(name=None, bfield=None, bunchSpacing=None):
    ll = LumiList()
    for rp in runPeriods:
        if name is None or rp.name == name:
            if bfield is None or rp.bfield == bfield:
                if bunchSpacing is None or rp.bunchSpacing == bunchSpacing:
                    newll = LumiListForRunPeriod(rp)
                    ll += LumiListForRunPeriod(rp)
    return ll.getRuns()
Example #14
0
def createDataPileUpFile(jsonFile, pileUpReweightingType):
    jsonList = LumiList(jsonFile)
    lumiString = jsonList.getCMSSWString()
    thisHash = hashlib.md5(lumiString)
    #print 'Require pileup file with hash ', thisHash.hexdigest()
    dataPileUpFilename = '/data/DataPileUp_'+thisHash.hexdigest()+'_'+pileUpReweightingType+'.root'
    if not os.path.exists(baseDir+dataPileUpFilename):
        print 'Creating new "', pileUpReweightingType, '" data pileup file for json file "',jsonFile,'"'
        subprocess.call(['pileupCalc.py', '-i', jsonFile, '--inputLumiJSON', '/afs/cern.ch/cms/CAF/CMSCOMM/COMM_DQM/certification/Collisions11/7TeV/PileUp/pileup_JSON_2011_4_2_validation.txt', '--calcMode', pileUpReweightingType, '--maxPileupBin', '50', baseDir+dataPileUpFilename], shell=False)
    return dataPileUpFilename
Example #15
0
 def mergeLumis(inputdata, lumimask):
     """
     Computes the processed lumis, merges if needed and returns the compacted list.
     """
     mergedlumis = LumiList()
     doublelumis = LumiList()
     for report in inputdata:
         doublelumis = mergedlumis & LumiList(runsAndLumis=report)
         mergedlumis = mergedlumis | LumiList(runsAndLumis=report)
     return mergedlumis.getCompactList(), (LumiList(compactList=lumimask) - mergedlumis).getCompactList(), doublelumis.getCompactList()
Example #16
0
    def __init__(self, cfg_ana, cfg_comp, looperName):
        super(JSONAnalyzer, self).__init__(cfg_ana, cfg_comp, looperName)
        if not cfg_comp.isMC:
            if self.cfg_comp.json is None:
                raise ValueError('component {cname} is not MC, and contains no JSON file. Either remove the JSONAnalyzer for your path or set the "json" attribute of this component'.format(cname=cfg_comp.name))
            self.lumiList = LumiList(os.path.expandvars(self.cfg_comp.json))
        else:
            self.lumiList = None
        

        self.rltInfo = RLTInfo()
Example #17
0
def LumiListForRunPeriod(rp, MIN_LUMIS=0):
	ll = LumiList(filename = rp.json)
	runs = [ run for run in map(int,ll.getRuns()) if run >= rp.firstRun and run <= rp.lastRun]

	lumis = ll.getLumis()
	nlumis = defaultdict(int)
	for r,l in lumis:
		nlumis[r]+=1
	select_runs = [run for run in runs if nlumis[run] > MIN_LUMIS]
	ll.selectRuns(select_runs)
	return ll
Example #18
0
def crab_fjr_json_to_ll(fn):
    print colors.yellow('this is not fully tested')
    j = crab_fjr_json(fn)
    ll = LumiList()
    for x in j['steps']['cmsRun']['input']['source']:
        x2 = defaultdict(list)
        for k, v in x['runs'].iteritems():
            for l in v.keys():
                x2[int(k)].append(int(l))
        ll += LumiList(runsAndLumis=x2)
    return ll
def load_golden_jsons(golden_json_path):
  if not os.path.isfile(golden_json_path):
    raise RuntimeError("No such file: %s" % golden_json_path)
  lumi_obj = LumiList(golden_json_path)
  lumis = lumi_obj.getLumis()
  runlumi_dict = {}
  for run, lumi in lumis:
    if run not in runlumi_dict:
      runlumi_dict[run] = []
    assert(lumi not in runlumi_dict[run])
    runlumi_dict[run].append(lumi)
  return runlumi_dict
Example #20
0
 def mergeLumis(inputdata, lumimask):
     """
     Computes the processed lumis, merges if needed and returns the compacted list.
     """
     mergedlumis = LumiList()
     doublelumis = LumiList()
     for report in inputdata:
         doublelumis = mergedlumis & LumiList(runsAndLumis=report)
         mergedlumis = mergedlumis | LumiList(runsAndLumis=report)
         if doublelumis:
             self.logger.info("Warning: double run-lumis processed %s" % doublelumis)
     return mergedlumis.getCompactList(), (LumiList(compactList=lumimask) - mergedlumis).getCompactList()
Example #21
0
    def getInputRunLumi(self, file):
        import xml.dom.minidom

        dom = xml.dom.minidom.parse(file)
        ll=[]

        for elem in dom.getElementsByTagName("Job"):
            nJob = int(elem.getAttribute("JobID"))
            lumis = elem.getAttribute('Lumis')
            #lumis = '193752:1'
            #lumis = '193752:1-193752:5,193774:1-193774:5,193775:1'
            if lumis:
                tmp=str.split(str(lumis), ",")
                #print "tmp = ", tmp
            else:
                msg = "The summary file inputLumiSummaryOfTask.json about input run and lumi isn't created"
                common.logger.info(msg)
                return
                

            #tmp = [193752:1-193752:5] [193774:1-193774:5]
            for entry in tmp:
                run_lumi=str.split(entry, "-")
                # run_lumi = [193752:1] [193752:5] 
                if len(run_lumi) == 0: pass
                if len(run_lumi) == 1:
                    lumi = str.split(run_lumi[0],":")[1]
                    run = str.split(run_lumi[0],":")[0]
                    ll.append((run,int(lumi)))
    
                if len(run_lumi) == 2:
                    lumi_max = str.split(run_lumi[1],":")[1]
                    lumi_min = str.split(run_lumi[0],":")[1]
                    run = str.split(run_lumi[1],":")[0]
                    for count in range(int(lumi_min),int(lumi_max) + 1): 
                        ll.append((run,count))
        if len(ll):
            lumiList = LumiList(lumis = ll)
            compactList = lumiList.getCompactList()

            totalLumiFilename = self.fjrDirectory + 'inputLumiSummaryOfTask.json'
            totalLumiSummary = open(totalLumiFilename, 'w')
            json.dump(compactList, totalLumiSummary)
            totalLumiSummary.write('\n')
            totalLumiSummary.close()
            msg = "Summary file of input run and lumi to be analize with this task: %s\n" %totalLumiFilename
            common.logger.info(msg)
        else:    
            msg = "The summary file inputLumiSummaryOfTask.json about input run and lumi isn't created"
            common.logger.info(msg)
        return totalLumiFilename 
Example #22
0
    def getInputRunLumi(self, file):
        import xml.dom.minidom

        dom = xml.dom.minidom.parse(file)
        ll=[]

        for elem in dom.getElementsByTagName("Job"):
            nJob = int(elem.getAttribute("JobID"))
            lumis = elem.getAttribute('Lumis')
            #lumis = '193752:1'
            #lumis = '193752:1-193752:5,193774:1-193774:5,193775:1'
            if lumis:
                tmp=str.split(str(lumis), ",")
                #print "tmp = ", tmp
            else:
                msg = "The summary file inputLumiSummaryOfTask.json about input run and lumi isn't created"
                common.logger.info(msg)
                return
                

            #tmp = [193752:1-193752:5] [193774:1-193774:5]
            for entry in tmp:
                run_lumi=str.split(entry, "-")
                # run_lumi = [193752:1] [193752:5] 
                if len(run_lumi) == 0: pass
                if len(run_lumi) == 1:
                    lumi = str.split(run_lumi[0],":")[1]
                    run = str.split(run_lumi[0],":")[0]
                    ll.append((run,int(lumi)))
    
                if len(run_lumi) == 2:
                    lumi_max = str.split(run_lumi[1],":")[1]
                    lumi_min = str.split(run_lumi[0],":")[1]
                    run = str.split(run_lumi[1],":")[0]
                    for count in range(int(lumi_min),int(lumi_max) + 1): 
                        ll.append((run,count))
        if len(ll):
            lumiList = LumiList(lumis = ll)
            compactList = lumiList.getCompactList()

            totalLumiFilename = self.fjrDirectory + 'inputLumiSummaryOfTask.json'
            totalLumiSummary = open(totalLumiFilename, 'w')
            json.dump(compactList, totalLumiSummary)
            totalLumiSummary.write('\n')
            totalLumiSummary.close()
            msg = "Summary file of input run and lumi to be analize with this task: %s\n" %totalLumiFilename
            common.logger.info(msg)
        else:    
            msg = "The summary file inputLumiSummaryOfTask.json about input run and lumi isn't created"
            common.logger.info(msg)
        return totalLumiFilename 
Example #23
0
def makeLumiBlocks(in_lumi_file, outdir, chunksize=5):
    ll = LumiList(filename=in_lumi_file)
    lumis = ll.getLumis()
    nblock = 0

    blocks = []
    for lumiblock in chunks(lumis, chunksize):
        nblock += 1
        ll2 = LumiList(lumis=lumiblock)
        fn = outdir + "/block_{0}.json".format(nblock)
        of = open(fn, "w")
        of.write(str(ll2))
        of.close()
        blocks += [fn]
    return blocks
def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

    args = parse_command_line(argv)

    loglevel = getattr(logging,args.log)
    logging.basicConfig(format='%(asctime)s.%(msecs)03d %(levelname)s %(name)s: %(message)s', level=loglevel, datefmt='%Y-%m-%d %H:%M:%S', stream=sys.stderr)

    allfiles = []
    for f in args.inputFiles:
        for fname in glob.glob(f):
            allfiles += [fname]

    logging.info('Adding {0} files to tchain'.format(len(allfiles)))
    tchain = ROOT.TChain(args.treeName)
    for fname in allfiles:
        tchain.Add(fname)

    nlumis = tchain.GetEntries()
    logging.info('Processing {0} lumis'.format(nlumis))
    allLumis = {}
    total = 0
    for row in tchain:
        total += 1
        if row.run not in allLumis: allLumis[row.run] = set()
        allLumis[row.run].add(row.lumi)

    lumiJson = LumiList(runsAndLumis = allLumis)
    #lumiJson.writeJSON(args.outputFile)
    print lumiJson
Example #25
0
 def mergeLumis(inputdata):
     """
     Computes the processed lumis, merges if needed and returns the compacted list.
     """
     mergedLumis = set()
     #merge the lumis from single files
     for reports in inputdata.values():
         for report in reports:
             for run, lumis in literal_eval(report['runlumi']).items():
                 if isinstance(run, bytes):
                     run = run.decode(encoding='UTF-8')
                 for lumi in lumis:
                     mergedLumis.add(
                         (run, int(lumi)))  #lumi is str, but need int
     mergedLumis = LumiList(lumis=mergedLumis)
     return mergedLumis.getCompactList()
Example #26
0
class applyJSON(Module):
    def __init__(self, json_file):
        if json_file:
            self.lumiList = LumiList(os.path.expandvars(json_file))
        else:
            self.lumiList = None

    def beginJob(self):
        pass

    def endJob(self):
        pass

    def beginFile(self, inputFile, outputFile, inputTree, wrappedOutputTree):
        self.out = wrappedOutputTree
        self.out.branch("jsonPassed", "I")

    def endFile(self, inputFile, outputFile, inputTree, wrappedOutputTree):
        pass

    def analyze(self, event):
        """process event, return True (go to next module) or False (fail, go to next event)"""

        if self.lumiList:
            jsonPassed = self.lumiList.contains(event.run,
                                                event.luminosityBlock)
        else:
            jsonPassed = 1

        self.out.fillBranch("jsonPassed", jsonPassed)
        return True
Example #27
0
def files_for_events(run_events, dataset, instance='global'):
    wanted_run_lumis = []
    for x in run_events: # list of runs, or list of (run, event), or list of (run, lumi, event)
        if type(x) == int:
            wanted_run_lumis.append((x, None))
        elif len(x) == 2:
            wanted_run_lumis.append((x[0], None))
        else:
            wanted_run_lumis.append(x[:2])

    files = set()
    for file, run_lumis in file_details_run_lumis(dataset, instance).iteritems():
        ll = LumiList(runsAndLumis=run_lumis)
        for x in wanted_run_lumis:
            if ll.contains(*x):
                files.add(file)
    return sorted(files)
Example #28
0
    def getLumisToSkip(self,dataset):
        catalog = self.readCatalog(True)
        if not dataset in catalog:
            return None

        from FWCore.PythonUtilities.LumiList import LumiList
        
        return LumiList( compactList=catalog[dataset].get('lumisToSkip',{}) )
Example #29
0
    def __init__(self, cfg_ana, cfg_comp, looperName):
        super(JSONAnalyzer, self).__init__(cfg_ana, cfg_comp, looperName)
        if not cfg_comp.isMC:
            if self.cfg_comp.json is None:
                raise ValueError('component {cname} is not MC, and contains no JSON file. Either remove the JSONAnalyzer for your path or set the "json" attribute of this component'.format(cname=cfg_comp.name))
            self.lumiList = LumiList(os.path.expandvars(self.cfg_comp.json))
        else:
            self.lumiList = None
        
        if hasattr(self.cfg_comp, 'additionaljson'):
            self.additionalLumiList = LumiList(os.path.expandvars(self.cfg_comp.additionaljson))
            self.twojson = True
        else: 
            self.twojson = False
        
        self.useLumiBlocks = self.cfg_ana.useLumiBlocks if (hasattr(self.cfg_ana,'useLumiBlocks')) else False

        self.rltInfo = RLTInfo()
    def __init__(self, cfg_ana, cfg_comp, looperName):
        super(JSONAnalyzer, self).__init__(cfg_ana, cfg_comp, looperName)
        if not cfg_comp.isMC:
            self.lumiList = LumiList(self.cfg_comp.json)
        else:
            self.lumiList = None
        

        self.rltInfo = RLTInfo()
Example #31
0
def files_for_events(run_events, dataset, instance='global'):
    wanted_run_lumis = []
    for x in run_events:  # list of runs, or list of (run, event), or list of (run, lumi, event)
        if type(x) == int:
            wanted_run_lumis.append((x, None))
        elif len(x) == 2:
            wanted_run_lumis.append((x[0], None))
        else:
            wanted_run_lumis.append(x[:2])

    files = set()
    for file, run_lumis in file_details_run_lumis(dataset,
                                                  instance).iteritems():
        ll = LumiList(runsAndLumis=run_lumis)
        for x in wanted_run_lumis:
            if ll.contains(*x):
                files.add(file)
    return sorted(files)
Example #32
0
 def getDuplicateLumis(lumisDict):
     """
     Get the run-lumis appearing more than once in the input
     dictionary of runs and lumis, which is assumed to have
     the following format:
         {
         '1': [1,2,3,4,6,7,8,9,10],
         '2': [1,4,5,20]
         }
     """
     doubleLumis = set()
     for run, lumis in lumisDict.items():
         seen = set()
         doubleLumis.update(
             set((run, lumi) for lumi in lumis
                 if (run, lumi) in seen or seen.add((run, lumi))))
     doubleLumis = LumiList(lumis=doubleLumis)
     return doubleLumis.getCompactList()
Example #33
0
    def __init__(self, cfg_ana, cfg_comp, looperName):
        super(JSONAnalyzer, self).__init__(cfg_ana, cfg_comp, looperName)
        if not cfg_comp.isMC:
            if self.cfg_comp.json is None and hasattr(self.cfg_ana,
                                                      "json") == False:
                raise ValueError(
                    'component {cname} is not MC, and contains no JSON file. Either remove the JSONAnalyzer for your path or set the "json" attribute of this component'
                    .format(cname=cfg_comp.name))
#use json from this analyzer if given, otherwise use the component json
            self.lumiList = LumiList(
                os.path.expandvars(
                    getattr(self.cfg_ana, "json", self.cfg_comp.json)))
        else:
            self.lumiList = None
        self.passAll = getattr(self.cfg_ana, 'passAll', False)
        self.useLumiBlocks = self.cfg_ana.useLumiBlocks if (hasattr(
            self.cfg_ana, 'useLumiBlocks')) else False

        self.rltInfo = RLTInfo()
Example #34
0
def customise(process):

    lumiFile = 'Cert_246908-260627_13TeV_PromptReco_Collisions15_25ns_JSON.txt'

    runOnMC = True
    for i in process.source.fileNames:
        if 'Run2015' in i:
            runOnMC=False
   
    isTTbar=False
    for i in process.source.fileNames:
        if '/TT' in i or '/tt' in i:
            isTTbar=True
   
    if not runOnMC:
        from FWCore.PythonUtilities.LumiList import LumiList
        lumiList = LumiList(os.environ["CMSSW_BASE"]+'/src/CATTools/CatProducer/prod/LumiMask/'+lumiFile)    
        #lumiList = LumiList(os.environ["CMSSW_BASE"]+'/src/CATTools/CommonTools/test/ttbb/'+lumiFile)    
        process.source.lumisToProcess = lumiList.getVLuminosityBlockRange()  
Example #35
0
    def getLumiList(self, *args):

        catalog = self.readCatalog(True)
        datasets = []
        output = filter(lambda x: "output=" in x, args)
        args = filter(lambda x: not "output=" in x, args)
        for dataset in catalog.keys():
            for arg in args:
                if dataset == arg or fnmatch(dataset, arg):
                    datasets.append(dataset)
                    break
        if len(output) > 1:
            print "ERROR: you specified the output json more than once:\n"
            print "      %s" % " ".join(output)
            sys.exit(-1)

        if len(output) > 0:
            output = output[0].strip("output=")
        else:
            output = None

        from FWCore.PythonUtilities.LumiList import LumiList
        fulist = LumiList()
        for dataset in datasets:
            dlist = LumiList()
            jsonout = dataset.lstrip("/").rstrip("/").replace("/",
                                                              "_") + ".json"
            for fil in catalog[dataset]["files"]:
                flist = LumiList(runsAndLumis=fil.get("lumis", {}))
                ## print flist
                dlist += flist
            if not output:
                with open(jsonout, "w+") as fout:
                    fout.write(json.dumps(dlist.compactList, sort_keys=True))
                    fout.close()
            else:
                fulist += dlist

        if output:
            with open(output, "w+") as fout:
                fout.write(json.dumps(fulist.compactList, sort_keys=True))
                fout.close()
Example #36
0
    def savejsons(self, processed):
        jsondir = os.path.join(self.__plotdir, 'jsons')
        if not os.path.exists(jsondir):
            os.makedirs(jsondir)

        res = {}
        for label in processed:
            jsondir = os.path.join('jsons', label)
            if not os.path.exists(os.path.join(self.__plotdir, jsondir)):
                os.makedirs(os.path.join(self.__plotdir, jsondir))
            lumis = LumiList(lumis=processed[label])
            lumis.writeJSON(os.path.join(self.__plotdir, jsondir, 'processed.json'))
            res[label] = [(os.path.join(jsondir, 'processed.json'), 'processed')]

            published = os.path.join(self.__workdir, label, 'published.json')
            if os.path.isfile(published):
                shutil.copy(published, os.path.join(self.__plotdir, jsondir))
                res[label] += [(os.path.join(jsondir, 'published.json'), 'published')]

        return res
Example #37
0
 def mergeDataset(self,dst,merge):
     dst["vetted"]=False
     
     from FWCore.PythonUtilities.LumiList import LumiList
     dstLumisToSkip = LumiList(compactList=dst.get('lumisToSkip',{}))
     mergeLumisToSkip = LumiList(compactList=merge.get('lumisToSkip',{}))
     dstLumisToSkip += mergeLumisToSkip
     dstLumisToSkip = dstLumisToSkip.compactList
     if len(dstLumisToSkip) > 0:
         dst['lumisToSkip'] = dstLumisToSkip
         print "\nWARNING: Merged lumisToSkip list. It is reccomended to run the 'overlap' command to re-geneate the list from scratch."
     
     dstFiles=dst["files"]
     mergeFiles=merge["files"]
     for fil in mergeFiles:
         skip = False
         for dfil in dstFiles:
             if dfil["name"] == fil["name"]:
                 skip = True
         if not skip:
             dstFiles.append( fil )
Example #38
0
    def getDatasetLumiList(self, name, catalog, check=False):
        from FWCore.PythonUtilities.LumiList import LumiList

        lumisToSkip = catalog[name].get("lumisToSkip", None)
        if lumisToSkip:
            print "Dataset %s has list of lumi sections to skip in catalog" % name
            lumisToSkip = LumiList(compactList=lumisToSkip)
        dlist = LumiList()
        for fil in catalog[name]["files"]:
            flist = LumiList(runsAndLumis=fil.get("lumis", {}))
            if lumisToSkip and not check:
                flist = flist.__sub__(lumisToSkip)
            if check:
                andlist = dlist.__and__(flist)
                ## print andlist,  fil.get("name")
                if len(andlist) != 0:
                    print "Warning: duplicate lumi sections in dataset. %s" % fil.get("name")
                    print andlist, flist
            dlist += flist

        return dlist
Example #39
0
def customise(process):

    lumiFile = 'Cert_246908-260627_13TeV_PromptReco_Collisions15_25ns_JSON.txt'

    runOnMC = True
    for i in process.source.fileNames:
        if 'Run2015' in i:
            runOnMC = False

    isTTbar = False
    for i in process.source.fileNames:
        if '/TT' in i or '/tt' in i:
            isTTbar = True

    if not runOnMC:
        from FWCore.PythonUtilities.LumiList import LumiList
        lumiList = LumiList(os.environ["CMSSW_BASE"] +
                            '/src/CATTools/CatProducer/prod/LumiMask/' +
                            lumiFile)
        #lumiList = LumiList(os.environ["CMSSW_BASE"]+'/src/CATTools/CommonTools/test/ttbb/'+lumiFile)
        process.source.lumisToProcess = lumiList.getVLuminosityBlockRange()
Example #40
0
def writejson(l, out_fn):
    is_data = is_data_fn(out_fn)
    if not is_data:
        run = 1
    rll = defaultdict(list)
    for x in l:
        if is_data:
            run, lumi = x[:2]
        else:
            lumi = x[0]
        rll[run].append(lumi)
    LumiList(runsAndLumis=rll).writeJSON(out_fn)
Example #41
0
    def __init__(self, cfg_ana, cfg_comp, looperName):
        super(JSONAnalyzer, self).__init__(cfg_ana, cfg_comp, looperName)
        if not cfg_comp.isMC:
            if self.cfg_comp.json is None and hasattr(self.cfg_ana,"json") == False :
                raise ValueError('component {cname} is not MC, and contains no JSON file. Either remove the JSONAnalyzer for your path or set the "json" attribute of this component'.format(cname=cfg_comp.name))
	    #use json from this analyzer if given, otherwise use the component json 
            self.lumiList = LumiList(os.path.expandvars(getattr(self.cfg_ana,"json",self.cfg_comp.json)))
        else:
            self.lumiList = None
        self.passAll = getattr(self.cfg_ana,'passAll',False)
        self.useLumiBlocks = self.cfg_ana.useLumiBlocks if (hasattr(self.cfg_ana,'useLumiBlocks')) else False

        self.rltInfo = RLTInfo()
Example #42
0
    def __init__(self, cfg_ana, cfg_comp, looperName):
        super(JSONAnalyzer, self).__init__(cfg_ana, cfg_comp, looperName)
        if not cfg_comp.isMC:
            if self.cfg_comp.json is None:
                raise ValueError(
                    'component {cname} is not MC, and contains no JSON file. Either remove the JSONAnalyzer for your path or set the "json" attribute of this component'
                    .format(cname=cfg_comp.name))
            self.lumiList = LumiList(os.path.expandvars(self.cfg_comp.json))
        else:
            self.lumiList = None

        if hasattr(self.cfg_comp, 'additionaljson'):
            self.additionalLumiList = LumiList(
                os.path.expandvars(self.cfg_comp.additionaljson))
            self.twojson = True
        else:
            self.twojson = False

        self.useLumiBlocks = self.cfg_ana.useLumiBlocks if (hasattr(
            self.cfg_ana, 'useLumiBlocks')) else False

        self.rltInfo = RLTInfo()
Example #43
0
 def __init__(self, fn, mask_fn=None):
     self.mask = LumiList(mask_fn) if mask_fn else None
     self.lls = LumiLines.load(fn)
     self.by_run = defaultdict(list)
     self.by_run_ls = {}
     self.fills = defaultdict(lambda: 999999)
     for ll in self.lls:
         if not self.mask or (ll.run, ll.ls) in self.mask:
             self.by_run[ll.run].append(ll)
             self.by_run_ls[(ll.run, ll.ls)] = ll
             self.fills[ll.fill] = min(self.fills[ll.fill], ll.run)
     self.fill_boundaries = sorted(self.fills.values())
     self.by_run = dict(self.by_run)
Example #44
0
def split_by_lumi(config, dataset_info, task_list):
    if config.has_key('lumi mask'):
        lumi_mask = LumiList(filename=config['lumi mask'])
        dataset_info.total_lumis = 0
        for file in dataset_info.files:
            dataset_info.lumis[file] = lumi_mask.filterLumis(dataset_info.lumis[file])
            dataset_info.total_lumis += len(dataset_info.lumis[file])

    lumis_per_task = config['lumis per task']
    lumis_processed = 0
    task_id = 0
    tasks = []
    files = iter(dataset_info.files)
    file = files.next()
    input_files_this_task = [file]
    task_lumis_remaining = dataset_info.lumis[file]
    while lumis_processed < dataset_info.total_lumis:
        for file in input_files_this_task:
            common_lumis = set(dataset_info.lumis[file]).intersection(set(task_lumis_remaining))
            if len(common_lumis) == 0 or len(dataset_info.lumis[file]) == 0:
                input_files_this_task.remove(file)
        while lumis_per_task <= len(task_lumis_remaining):
            task_lumis = LumiList(lumis=task_lumis_remaining[:lumis_per_task])
            task_lumis_remaining = task_lumis_remaining[lumis_per_task:]
            tasks.append((input_files_this_task, task_lumis.getVLuminosityBlockRange()))
            task_id += 1
            lumis_processed += lumis_per_task
        try:
            file = files.next()
            input_files_this_task.append(file)
            task_lumis_remaining.extend(dataset_info.lumis[file])
        except:
            lumis_per_task = len(task_lumis_remaining)

    with open(task_list, 'w') as json_file:
        json.dump(tasks, json_file)

    return len(tasks)
Example #45
0
 def mergeLumis(inputdata, lumimask):
     """
     Computes the processed lumis, merges if needed and returns the compacted list.
     """
     mergedlumis = LumiList()
     doublelumis = LumiList()
     for report in inputdata:
         doublelumis = mergedlumis & LumiList(runsAndLumis=report)
         mergedlumis = mergedlumis | LumiList(runsAndLumis=report)
         if doublelumis:
             self.logger.info("Warning: double run-lumis processed %s" %
                              doublelumis)
     return mergedlumis.getCompactList(), (LumiList(compactList=lumimask) -
                                           mergedlumis).getCompactList()
Example #46
0
def LumiListForRunPeriod(rp, MIN_LUMIS=0):
    ll = LumiList(filename=rp.json)
    runs = [
        run for run in map(int, ll.getRuns())
        if run >= rp.firstRun and run <= rp.lastRun
    ]

    lumis = ll.getLumis()
    nlumis = defaultdict(int)
    for r, l in lumis:
        nlumis[r] += 1
    select_runs = [run for run in runs if nlumis[run] > MIN_LUMIS]
    ll.selectRuns(select_runs)
    return ll
Example #47
0
def getLumiListInValidFiles(dataset, dbsurl='phys03'):
    """
    Get the runs/lumis in the valid files of a given dataset.

    dataset: the dataset name as published in DBS
    dbsurl: the DBS URL or DBS prod instance

    Returns a LumiList object.
    """

    from dbs.apis.dbsClient import DbsApi

    dbsurl = DBSURLS['reader'].get(dbsurl, dbsurl)
    dbs3api = DbsApi(url=dbsurl)
    try:
        files = dbs3api.listFileArray(dataset=dataset,
                                      validFileOnly=0,
                                      detail=True)
    except Exception as ex:
        msg = "Got DBS client error requesting details of dataset '%s' on DBS URL '%s': %s" % (
            dataset, dbsurl, ex)
        msg += "\n%s" % (traceback.format_exc())
        raise ClientException(msg)
    if not files:
        msg = "Dataset '%s' not found in DBS URL '%s'." % (dataset, dbsurl)
        raise ClientException(msg)
    validFiles = [f['logical_file_name'] for f in files if f['is_file_valid']]
    blocks = set([f['block_name'] for f in files])
    runLumiPairs = []
    for blockName in blocks:
        fileLumis = dbs3api.listFileLumis(block_name=blockName)
        for f in fileLumis:
            if f['logical_file_name'] in validFiles:
                run = f['run_num']
                lumis = f['lumi_section_num']
                for lumi in lumis:
                    runLumiPairs.append((run, lumi))
    lumiList = LumiList(lumis=runLumiPairs)

    return lumiList
Example #48
0
    def getDatasetLumiList(self,name,catalog,check=False):
        from FWCore.PythonUtilities.LumiList import LumiList

        lumisToSkip = catalog[name].get('lumisToSkip',None)
        if lumisToSkip:
            print "Dataset %s has list of lumi sections to skip in catalog" % name
            lumisToSkip = LumiList(compactList=lumisToSkip)
        dlist = LumiList()
        for fil in catalog[name]["files"]:
            flist = LumiList( runsAndLumis=fil.get("lumis",{}) )
            if lumisToSkip and not check:
                flist = flist.__sub__(lumisToSkip)
            if check:
                andlist = dlist.__and__(flist)
                ## print andlist,  fil.get("name")
                if len(andlist) != 0:
                    print "Warning: duplicate lumi sections in dataset. %s" % fil.get("name")
                    print andlist, flist
            dlist += flist
        
        return dlist
Example #49
0
def shortenJson(jsonFile,minRun=0,maxRun=-1,output=None,debug=False):
  from copy import deepcopy
  runList = jsonFile 
  if isinstance(runList,LumiList):
    runList = deepcopy(jsonFile)
  else:
    runList = LumiList (filename = jsonFile)  # Read in first  JSON file
  allRuns = runList.getRuns()
  runsToRemove=[]
  for run in allRuns:
      if  int(run) < minRun:
          runsToRemove.append (run)
      if maxRun > 0 and int(run) > maxRun:
          runsToRemove.append (run)
  if debug:
	print " runsToRemove ",runsToRemove
  runList.removeRuns (runsToRemove)
  if output:
    runList.writeJSON (output)
  else:
    return  runList
Example #50
0
        handle = Handle ('LumiSummary')
        label  = ('lumiProducer')
    else:
        handle, lable = None, None

    runsLumisDict = {}
    lumis = Lumis (args)
    delivered = recorded = 0
    for lum in lumis:
        runList = runsLumisDict.setdefault (lum.aux().run(), [])
        runList.append( lum.aux().id().luminosityBlock() )
        # get the summary and keep track of the totals
        if options.intLumi:
            lum.getByLabel (label, handle)
            summary = handle.product()
            delivered += summary.avgInsDelLumi()
            recorded  += summary.avgInsRecLumi()

    # print out lumi sections in JSON format
    jsonList = LumiList (runsAndLumis = runsLumisDict)
    if options.output:
        jsonList.writeJSON (options.output)
    else:
        print jsonList

    # print out integrated luminosity numbers if requested
    if options.intLumi:
        print "\nNote: These numbers should be considered approximate.  For official numbers, please use lumiCalc.py"
        print "delivered %.1f mb,  recorded %.1f mb" % \
              (delivered, recorded)
Example #51
0
    parser.add_option ('--output', dest='output', type='string',
                       help='Save output to file OUTPUT')
    # required parameters
    (options, args) = parser.parse_args()
    if len (args) != 1:
        raise RuntimeError, "Must provide exactly one input file"

    if options.min and options.max and options.min > options.max:
        raise RuntimeError, "Minimum value (%d) is greater than maximum value (%d)" % (options.min, options.max)

    commaRE = re.compile (r',')
    runsToRemove = []
    for chunk in options.runs:
        runs = commaRE.split (chunk)
        runsToRemove.extend (runs)

    alphaList = LumiList (filename = args[0]) # Read in first JSON file
    allRuns = alphaList.getRuns()
    for run in allRuns:
        if options.min and int(run) < options.min:
            runsToRemove.append (run)
        if options.max and int(run) > options.max:
            runsToRemove.append (run)

    alphaList.removeRuns (runsToRemove)

    if options.output:
        alphaList.writeJSON (options.output)
    else:
        print alphaList
Example #52
0
class JSONAnalyzer( Analyzer ):
    '''Apply a json filter, and creates an RLTInfo TTree.
    See PhysicsTools.HeppyCore.utils.RLTInfo for more information

    example:
    
    jsonFilter = cfg.Analyzer(
      "JSONAnalyzer",
      )

    The path of the json file to be used is set as a component attribute.

    The process function returns:
      - True if
         - the component is MC or
         - if the run/lumi pair is in the JSON file
         - if the json file was not set for this component
      - False if the component is MC or embed (for H->tau tau),
          and if the run/lumi pair is not in the JSON file.
    '''

    def __init__(self, cfg_ana, cfg_comp, looperName):
        super(JSONAnalyzer, self).__init__(cfg_ana, cfg_comp, looperName)
        if not cfg_comp.isMC:
            if self.cfg_comp.json is None:
                raise ValueError('component {cname} is not MC, and contains no JSON file. Either remove the JSONAnalyzer for your path or set the "json" attribute of this component'.format(cname=cfg_comp.name))
            self.lumiList = LumiList(os.path.expandvars(self.cfg_comp.json))
        else:
            self.lumiList = None
        

        self.rltInfo = RLTInfo()

    def beginLoop(self, setup):
        super(JSONAnalyzer,self).beginLoop(setup)
        self.counters.addCounter('JSON')
        self.count = self.counters.counter('JSON')
        self.count.register('All Lumis')
        self.count.register('Passed Lumis')

    def process(self, event):
        self.readCollections( event.input )
        evid = event.input.eventAuxiliary().id()
        run = evid.run()
        lumi = evid.luminosityBlock()
        eventId = evid.event()

        event.run = run
        event.lumi = lumi
        event.eventId = eventId

        if self.cfg_comp.isMC:
            return True

        if self.lumiList is None:
            return True

        self.count.inc('All Lumis')
        if self.lumiList.contains(run,lumi):
            self.count.inc('Passed Lumis')
            self.rltInfo.add('dummy', run, lumi)
            return True
        else:
            return False
        

    def write(self, setup):
        super(JSONAnalyzer, self).write(setup)
        self.rltInfo.write( self.dirName )
Example #53
0
def add_merged_sample(NAME, type, AnaUrl, FWUrl, samples, comment):
    # samples is a simple dict containing three keys: 'process', 'dataset_id', 'sample_id'
    dbstore = DbStore()
    sample = None

    # check that source dataset exist
    # Skip: should exist, the check has been done before calling this function

    # check that there is no existing entry
    update = False
    localpath = ''
    nevents = 0
    checkExisting = dbstore.find(Sample, Sample.name == unicode(NAME))
    if checkExisting.is_empty():
        sample = Sample(unicode(NAME), unicode(localpath), unicode(type), nevents)
    else:
        update = True
        sample = checkExisting.one()
        sample.removeFiles(dbstore)

    # collecting contents
    sample.nevents_processed = 0
    sample.nevents = 0
    sample.normalization = 1
    sample.event_weight_sum = 0
    extras_event_weight_sum = {}
    dataset_nevents = 0
    processed_lumi = LumiList()
    for i, s in enumerate(samples):
        if i == 0:
            sample.source_dataset_id = s['dataset_id']
            sample.source_sample_id = s['sample_id']
        results = dbstore.find(Sample, Sample.sample_id == s['sample_id'])
        # Should exist, the check has been done before calling this function
        sample.nevents_processed += results[0].nevents_processed
        sample.nevents += results[0].nevents
        sample.event_weight_sum += results[0].event_weight_sum
        extra_sumw = results[0].extras_event_weight_sum
        if extra_sumw is not None:
            extra_sumw = json.loads(extra_sumw)
            for key in extra_sumw:
                try:
                    extras_event_weight_sum[key] += extra_sumw[key]
                except KeyError:
                    extras_event_weight_sum[key] = extra_sumw[key]
        tmp_processed_lumi = results[0].processed_lumi
        if tmp_processed_lumi is not None:
            tmp_processed_lumi = json.loads( tmp_processed_lumi )
            processed_lumi = processed_lumi | LumiList(compactList = tmp_processed_lumi)
        # Get info from file table
        results = dbstore.find(File, File.sample_id == s['sample_id'])
        for lfn, pfn, event_weight_sum, file_extras_event_weight_sum, nevents in list(results.values(File.lfn, File.pfn, File.event_weight_sum, File.extras_event_weight_sum, File.nevents)):
            f = File(lfn, pfn, event_weight_sum, file_extras_event_weight_sum, nevents)
            sample.files.add(f)
        # Get info from parent datasets
        results = dbstore.find(Dataset, Dataset.dataset_id == s['dataset_id'])
        dataset_nevents +=  results[0].nevents
    if len(extras_event_weight_sum) > 0:
        sample.extras_event_weight_sum = unicode(json.dumps(extras_event_weight_sum))
    if len(processed_lumi.getCompactList()) > 0:
        sample.processed_lumi = unicode(json.dumps(processed_lumi.getCompactList()))
    sample.code_version = unicode(AnaUrl + ' ' + FWUrl) #NB: limited to 255 characters, but so far so good
    if sample.nevents_processed != dataset_nevents:
        sample.user_comment = unicode("Sample was not fully processed, only " + str(sample.nevents_processed) + "/" + str(dataset_nevents) + " events were processed. " + comment)
    else:
        sample.user_comment = unicode(comment)
    sample.author = unicode(getpwuid(os.stat(os.getcwd()).st_uid).pw_name)

    if not update:
        dbstore.add(sample)
        if sample.luminosity is None:
            sample.luminosity = sample.getLuminosity()

        print sample

        dbstore.commit()
        return

    else:
        sample.luminosity = sample.getLuminosity()
        print("Sample updated")
        print(sample)

        dbstore.commit()
        return

    # rollback
    dbstore.rollback()
Example #54
0
sample = FWLiteSample.fromDAS( sample_name, args.sample, maxN = maxN, dbFile = dbFile)  
output_directory = os.path.join(skim_ntuple_directory, args.targetDir, sample.name) 

# Run only job number "args.job" from total of "args.nJobs"
if args.nJobs>1:
    n_files_before = len(sample.files)
    sample = sample.split(args.nJobs)[args.job]
    n_files_after  = len(sample.files)
    logger.info( "Running job %i/%i over %i files from a total of %i.", args.job, args.nJobs, n_files_after, n_files_before)

output_filename =  os.path.join(output_directory, sample.name + '.root') 

if 'Run2018' in sample.name:
    from FWCore.PythonUtilities.LumiList import LumiList
    json = '$CMSSW_BASE/src/JetMET/diagnosis/python/pu2018/Cert_314472-325175_13TeV_PromptReco_Collisions18_JSON.txt'
    lumiList = LumiList(os.path.expandvars(json))
else:
    lumiList = None

if args.maxFiles > 0:
    sample.files = sample.files[:args.maxFiles]

if not os.path.exists( output_directory ): 
    os.makedirs( output_directory )
    logger.info( "Created output directory %s", output_directory )

products = {
     'muon':                                     {'skip':False,'type':'vector<pat::Muon>', 'label': ("slimmedMuons")},
     'vertices':                                 {'skip':False, 'type':'vector<reco::Vertex>', 'label':('offlineSlimmedPrimaryVertices')},
     'met':                                      {'skip':False,'type':'vector<pat::MET>', 'label': ("slimmedMETs")},
}
Example #55
0
    def fetchDBSInfo(self):
        """
        Contact DBS
        """
        # make assumption that same host won't be used for both
        # this check should catch most deployed servers

        (useDBS2, useDBS3, dbs2_url, dbs3_url) = verify_dbs_url(self)
        # DBS2 is gone
        dbs_url=dbs3_url
        useDBS2 = False
        useDBS3 = True
        verifyDBS23 = False

        common.logger.info("Accessing DBS at: %s" % dbs_url)

        ## check if runs are selected
        runselection = []
        if (self.cfg_params.has_key('CMSSW.runselection')):
            runselection = parseRange2(self.cfg_params['CMSSW.runselection'])
            if len(runselection)>1000000:
                common.logger.info("ERROR: runselection range has more then 1M numbers")
                common.logger.info("ERROR: Too large. runselection is ignored")
                runselection=[]

        ## check if various lumi parameters are set
        self.lumiMask = self.cfg_params.get('CMSSW.lumi_mask',None)
        self.lumiParams = self.cfg_params.get('CMSSW.total_number_of_lumis',None) or \
                          self.cfg_params.get('CMSSW.lumis_per_job',None)

        lumiList = None
        if self.lumiMask:
            lumiList = LumiList(filename=self.lumiMask)
        if runselection:
            runList = LumiList(runs = runselection)

        self.splitByRun = int(self.cfg_params.get('CMSSW.split_by_run', 0))
        self.splitDataByEvent = int(self.cfg_params.get('CMSSW.split_by_event', 0))
        common.logger.log(10-1,"runselection is: %s"%runselection)

        if not self.splitByRun:
            self.splitByLumi = self.lumiMask or self.lumiParams or self.ads

        if self.splitByRun and not runselection:
            msg = "Error: split_by_run must be combined with a runselection"
            raise CrabException(msg)

        ## service API
        if useDBS2 or verifyDBS23:
            args = {}
            args['url']     = dbs2_url
            args['level']   = 'CRITICAL'

        ## check if has been requested to use the parent info
        useparent = int(self.cfg_params.get('CMSSW.use_parent',0))


        defaultName = common.work_space.shareDir()+'AnalyzedBlocks.txt'
        ## check if has been asked for a non default file to store/read analyzed fileBlocks
        #SB no no, we do not want this, it is not even documented !
        #fileBlocks_FileName = os.path.abspath(self.cfg_params.get('CMSSW.fileblocks_file',defaultName))
        if self.cfg_params.get('CMSSW.fileblocks_file') :
            msg = "CMSSW.fileblocks_file option non supported"
            raise CrabException(msg)
        fileBlocks_FileName = os.path.abspath(defaultName)

        if useDBS2 or verifyDBS23:
            #common.logger.info("looking up DBS2 ...")
            import DBSAPI.dbsApi
            import DBSAPI.dbsApiException
            start_time=time.time()
            api2 = DBSAPI.dbsApi.DbsApi(args)
            files2 = self.queryDbs(api2,path=self.datasetPath,runselection=runselection,useParent=useparent)
            elapsed=time.time() - start_time
            common.logger.info("DBS2 lookup took %5.2f sec" % elapsed)
            if useDBS2:
                self.files = files2
        if useDBS3 or verifyDBS23:
            #common.logger.info("looking up DBS3 ...")
            from dbs.apis.dbsClient import DbsApi
            start_time=time.time()
            api3 = DbsApi(dbs3_url)
            files3 = self.queryDbs3(api3,path=self.datasetPath,runselection=runselection,useParent=useparent)
            elapsed=time.time() - start_time
            common.logger.info("DBS3 lookup took %5.2f sec" % elapsed)
            if useDBS3:
                self.files = files3


        # Check to see what the dataset is
        pdsName = self.datasetPath.split("/")[1]
        if useDBS2 :
            primDSs = api2.listPrimaryDatasets(pdsName)
            dataType = primDSs[0]['Type']
        elif useDBS3 :
            dataType=api3.listDataTypes(dataset=self.datasetPath)[0]['data_type']

        common.logger.info("Datatype is %s" % dataType)
        if dataType == 'data' and not \
            (self.splitByRun or self.splitByLumi or self.splitDataByEvent):
            msg = 'Data must be split by lumi or by run. ' \
                  'Please see crab -help for the correct settings'
            raise  CrabException(msg)



        anFileBlocks = []
        if self.skipBlocks: anFileBlocks = readTXTfile(self, fileBlocks_FileName)

        # parse files and fill arrays
        for file in self.files :
            parList  = []
            fileLumis = [] # List of tuples
            # skip already analyzed blocks
            fileblock = file['Block']['Name']
            if fileblock not in anFileBlocks :
                filename = file['LogicalFileName']
                # asked retry the list of parent for the given child
                if useparent==1:
                    parList = [x['LogicalFileName'] for x in file['ParentList']]
                if self.splitByLumi:
                    fileLumis = [ (x['RunNumber'], x['LumiSectionNumber'])
                                 for x in file['LumiList'] ]
                self.parent[filename] = parList
                # For LumiMask, intersection of two lists.
                if self.lumiMask and runselection:
                    self.lumis[filename] = runList.filterLumis(lumiList.filterLumis(fileLumis))
                elif runselection:
                    self.lumis[filename] = runList.filterLumis(fileLumis)
                elif self.lumiMask:
                    self.lumis[filename] = lumiList.filterLumis(fileLumis)
                else:
                    self.lumis[filename] = fileLumis

                if filename.find('.dat') < 0 :
                    events    = file['NumberOfEvents']
                    # Count number of events and lumis per block
                    if fileblock in self.eventsPerBlock.keys() :
                        self.eventsPerBlock[fileblock] += events
                    else :
                        self.eventsPerBlock[fileblock] = events
                    # Number of events per file
                    self.eventsPerFile[filename] = events

                    # List of files per block
                    if fileblock in self.blocksinfo.keys() :
                        self.blocksinfo[fileblock].append(filename)
                    else :
                        self.blocksinfo[fileblock] = [filename]

                    # total number of events
                    self.maxEvents += events
                    self.maxLumis  += len(self.lumis[filename])

        if  self.skipBlocks and len(self.eventsPerBlock.keys()) == 0:
            msg = "No new fileblocks available for dataset: "+str(self.datasetPath)
            raise  CrabException(msg)

        if len(self.eventsPerBlock) <= 0:
            msg="No data for %s in DBS\n Check datasetpath parameter in crab.cfg" % self.datasetPath
            raise  CrabException(msg)
Example #56
0
def SingleTopStep2():

    options = VarParsing("analysis")
    options.register(
        "subChannel",
        "T_t",
        VarParsing.multiplicity.singleton,
        VarParsing.varType.string,
        "The sample that you are running on",
    )
    options.register(
        "reverseIsoCut",
        False,
        VarParsing.multiplicity.singleton,
        VarParsing.varType.bool,
        "Consider anti-isolated region",
    )
    options.register(
        "doDebug", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "Turn on debugging messages"
    )
    options.register("isMC", True, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "Run on MC")
    options.register(
        "doGenParticlePath",
        True,
        VarParsing.multiplicity.singleton,
        VarParsing.varType.bool,
        "Run the gen particle paths (only works on specific MC)",
    )
    options.register(
        "globalTag", Config.globalTagMC, VarParsing.multiplicity.singleton, VarParsing.varType.string, "Global tag"
    )
    options.register(
        "srcPUDistribution",
        "S10",
        VarParsing.multiplicity.singleton,
        VarParsing.varType.string,
        "Source pile-up distribution",
    )
    options.register(
        "destPUDistribution",
        "data",
        VarParsing.multiplicity.singleton,
        VarParsing.varType.string,
        "destination pile-up distribution",
    )

    options.register(
        "isComphep",
        False,
        VarParsing.multiplicity.singleton,
        VarParsing.varType.bool,
        "Use CompHep-specific processing",
    )

    options.register(
        "isAMCatNLO",
        False,
        VarParsing.multiplicity.singleton,
        VarParsing.varType.bool,
        "Use aMC@NLO-specific processing",
    )

    options.register(
        "isSherpa", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "Use sherpa-specific processing"
    )

    options.register(
        "systematic", "", VarParsing.multiplicity.singleton, VarParsing.varType.string, "Apply Systematic variation"
    )

    options.register(
        "dataRun",
        "RunABCD",
        VarParsing.multiplicity.singleton,
        VarParsing.varType.string,
        "A string Run{A,B,C,D} to specify the data period",
    )

    options.register(
        "doSync", False, VarParsing.multiplicity.singleton, VarParsing.varType.bool, "Synchronization exercise"
    )

    options.parseArguments()

    if options.isMC:
        Config.srcPUDistribution = pileUpDistributions.distributions[options.srcPUDistribution]

    Config.Leptons.reverseIsoCut = options.reverseIsoCut
    Config.subChannel = options.subChannel
    Config.doDebug = options.doDebug
    Config.isMC = options.isMC
    Config.doSkim = options.doSync or not sample_types.is_signal(Config.subChannel)
    Config.isCompHep = options.isComphep or "comphep" in Config.subChannel
    Config.isAMCatNLO = Config.isAMCatNLO or options.isAMCatNLO or "aMCatNLO" in Config.subChannel
    Config.isSherpa = options.isSherpa or "sherpa" in Config.subChannel
    Config.systematic = options.systematic
    Config.doSync = options.doSync

    print "Systematic: ", Config.systematic

    if Config.isMC and not Config.doSync:
        logging.info("Changing jet source from %s to smearedPatJetsWithOwnRef" % Config.Jets.source)
        Config.Jets.source = "smearedPatJetsWithOwnRef"

        if Config.systematic in ["ResUp", "ResDown"]:
            logging.info(
                "Changing jet source from %s to smearedPatJetsWithOwnRef%s" % (Config.Jets.source, Config.systematic)
            )
            Config.Jets.source = "smearedPatJetsWithOwnRef" + Config.systematic
            logging.info(
                "Changing MET source from %s to patType1CorrectedPFMetJet%s" % (Config.metSource, Config.systematic)
            )
            Config.metSource = "patType1CorrectedPFMetJet" + Config.systematic
        elif Config.systematic in ["EnUp", "EnDown"]:
            logging.info(
                "Changing jet source from %s to shiftedPatJetsWithOwnRef%sForCorrMEt"
                % (Config.Jets.source, Config.systematic)
            )
            Config.Jets.source = "shiftedPatJetsWithOwnRef" + Config.systematic + "ForCorrMEt"
            logging.info(
                "Changing MET source from %s to patType1CorrectedPFMetJet%s" % (Config.metSource, Config.systematic)
            )
            Config.metSource = "patType1CorrectedPFMetJet" + Config.systematic
        elif Config.systematic in ["UnclusteredEnUp", "UnclusteredEnDown"]:
            logging.info(
                "Changing MET source from %s to patType1CorrectedPFMet%s" % (Config.metSource, Config.systematic)
            )
            Config.metSource = "patType1CorrectedPFMet" + Config.systematic

    print "Configuration"
    print Config._toStr()

    print Config.Jets._toStr()
    print Config.Muons._toStr()
    print Config.Electrons._toStr()
    print ""

    process = cms.Process("STPOLSEL2")
    eventCounting.countProcessed(process)

    process.load("Configuration.Geometry.GeometryIdeal_cff")
    process.load("Configuration.StandardSequences.FrontierConditions_GlobalTag_cff")
    from Configuration.AlCa.autoCond import autoCond

    process.GlobalTag.globaltag = cms.string(options.globalTag)
    process.load("Configuration.StandardSequences.MagneticField_cff")

    if Config.doDebug:
        process.load("FWCore.MessageLogger.MessageLogger_cfi")
        process.MessageLogger = cms.Service(
            "MessageLogger",
            destinations=cms.untracked.vstring("cout", "debug"),
            debugModules=cms.untracked.vstring("*"),
            cout=cms.untracked.PSet(threshold=cms.untracked.string("INFO")),
            debug=cms.untracked.PSet(threshold=cms.untracked.string("DEBUG")),
        )
        logging.basicConfig(level=logging.DEBUG)
    else:
        process.load("FWCore.MessageLogger.MessageLogger_cfi")
        process.MessageLogger.cerr.FwkReport.reportEvery = 1000
        process.MessageLogger.cerr.threshold = cms.untracked.string("INFO")
        logging.basicConfig(level=logging.DEBUG)

    process.maxEvents = cms.untracked.PSet(input=cms.untracked.int32(options.maxEvents))

    process.options = cms.untracked.PSet(wantSummary=cms.untracked.bool(True))

    import os
    from FWCore.PythonUtilities.LumiList import LumiList

    if not Config.isMC:
        ll1 = LumiList(
            os.environ["CMSSW_BASE"] + "/../crabs/lumis/Cert_190456-208686_8TeV_22Jan2013ReReco_Collisions12_JSON.txt"
        )

    process.source = cms.Source(
        "PoolSource",
        fileNames=cms.untracked.vstring(options.inputFiles),
        cacheSize=cms.untracked.uint32(50 * 1024 * 1024),
        lumisToProcess=ll1.getVLuminosityBlockRange() if not Config.isMC else cms.untracked.VLuminosityBlockRange(),
    )

    print options

    # -------------------------------------------------
    # Jets
    # -------------------------------------------------

    from SingleTopPolarization.Analysis.jets_step2_cfi import JetSetup

    JetSetup(process, Config)

    # -------------------------------------------------
    # Leptons
    # -------------------------------------------------

    from SingleTopPolarization.Analysis.muons_step2_cfi import MuonSetup

    MuonSetup(process, Config)

    from SingleTopPolarization.Analysis.electrons_step2_cfi import ElectronSetup

    ElectronSetup(process, Config)

    process.looseVetoMuCount = cms.EDProducer(
        "CollectionSizeProducer<reco::Candidate>", src=cms.InputTag("looseVetoMuons")
    )

    process.looseVetoEleCount = cms.EDProducer(
        "CollectionSizeProducer<reco::Candidate>", src=cms.InputTag("looseVetoElectrons")
    )

    process.decayTreeProducerMu = cms.EDProducer(
        "GenParticleDecayTreeProducer", src=cms.untracked.InputTag("singleIsoMu")
    )
    process.decayTreeProducerEle = cms.EDProducer(
        "GenParticleDecayTreeProducer", src=cms.untracked.InputTag("singleIsoEle")
    )

    # -----------------------------------------------
    # Top reco and cosine calcs
    # -----------------------------------------------

    from SingleTopPolarization.Analysis.top_step2_cfi import TopRecoSetup

    TopRecoSetup(process, Config)

    process.allEventObjects = cms.EDProducer(
        "CandRefCombiner",
        sources=cms.vstring(["goodJets", "goodSignalLeptons", Config.metSource]),
        maxOut=cms.uint32(9999),
        minOut=cms.uint32(0),
        logErrors=cms.bool(False),
    )

    process.hadronicEventObjects = cms.EDProducer(
        "CandRefCombiner",
        sources=cms.vstring(["goodJets"]),
        maxOut=cms.uint32(9999),
        minOut=cms.uint32(0),
        logErrors=cms.bool(False),
    )

    process.allEventObjectsWithNu = cms.EDProducer(
        "CandRefCombiner",
        sources=cms.vstring(["goodJets", "goodSignalLeptons", Config.metSource, "recoNuProducer"]),
        maxOut=cms.uint32(9999),
        minOut=cms.uint32(0),
        logErrors=cms.bool(False),
    )

    process.eventShapeVars = cms.EDProducer("EventShapeVarsProducer", src=cms.InputTag("allEventObjects"))

    process.eventShapeVarsWithNu = cms.EDProducer("EventShapeVarsProducer", src=cms.InputTag("allEventObjectsWithNu"))

    # Vector sum of all reconstructed objects
    process.shat = cms.EDProducer("SimpleCompositeCandProducer", sources=cms.VInputTag(["allEventObjects"]))

    # Hadronic final state
    process.ht = cms.EDProducer("SimpleCompositeCandProducer", sources=cms.VInputTag(["hadronicEventObjects"]))

    process.shatNTupleProducer = cms.EDProducer(
        "CandViewNtpProducer2",
        src=cms.InputTag("shat"),
        lazyParser=cms.untracked.bool(True),
        prefix=cms.untracked.string(""),
        # eventInfo = cms.untracked.bool(True),
        variables=ntupleCollection([["Pt", "pt"], ["Eta", "eta"], ["Phi", "phi"], ["Mass", "mass"]]),
    )

    process.htNTupleProducer = process.shatNTupleProducer.clone(src=cms.InputTag("ht"))

    process.eventShapeSequence = cms.Sequence(
        process.allEventObjects
        * process.hadronicEventObjects
        * process.eventShapeVars
        * process.allEventObjectsWithNu
        * process.eventShapeVarsWithNu
        * process.shat
        * process.ht
        * process.shatNTupleProducer
        * process.htNTupleProducer
    )

    # -----------------------------------------------
    # Treemaking
    # -----------------------------------------------

    process.recoTopNTupleProducer = cms.EDProducer(
        "CandViewNtpProducer2",
        src=cms.InputTag("recoTop"),
        lazyParser=cms.untracked.bool(True),
        prefix=cms.untracked.string(""),
        # eventInfo = cms.untracked.bool(True),
        variables=ntupleCollection([["Pt", "pt"], ["Eta", "eta"], ["Phi", "phi"], ["Mass", "mass"]]),
    )
    process.recoNuNTupleProducer = cms.EDProducer(
        "CandViewNtpProducer2",
        src=cms.InputTag("recoNu"),
        lazyParser=cms.untracked.bool(True),
        prefix=cms.untracked.string(""),
        # eventInfo = cms.untracked.bool(True),
        variables=ntupleCollection(
            [
                ["Pt", "pt"],
                ["Eta", "eta"],
                ["Phi", "phi"],
                ["Px", "p4().Px()"],
                ["Py", "p4().Py()"],
                ["Pz", "p4().Pz()"],
            ]
        ),
    )

    process.recoWNTupleProducer = cms.EDProducer(
        "CandViewNtpProducer2",
        src=cms.InputTag("recoW"),
        lazyParser=cms.untracked.bool(True),
        prefix=cms.untracked.string(""),
        variables=ntupleCollection([["Pt", "pt"], ["Eta", "eta"], ["Phi", "phi"], ["Mass", "mass"]]),
    )

    process.trueNuNTupleProducer = process.recoNuNTupleProducer.clone(
        src=cms.InputTag("genParticleSelector", "trueNeutrino", "STPOLSEL2")
    )
    process.trueWNTupleProducer = process.recoTopNTupleProducer.clone(
        src=cms.InputTag("genParticleSelector", "trueWboson", "STPOLSEL2")
    )
    process.trueTopNTupleProducer = process.recoTopNTupleProducer.clone(
        src=cms.InputTag("genParticleSelector", "trueTop", "STPOLSEL2")
    )

    process.patMETDeltaRProducer = cms.EDProducer(
        "DeltaRProducerMET",
        muonSrc=cms.InputTag("goodSignalMuons"),
        electronSrc=cms.InputTag("goodSignalElectrons"),
        metSrc=cms.InputTag(Config.metSource),
    )

    process.patMETNTupleProducer = cms.EDProducer(
        "CandViewNtpProducer2",
        src=cms.InputTag(Config.metSource),
        lazyParser=cms.untracked.bool(True),
        prefix=cms.untracked.string(""),
        variables=ntupleCollection(
            [
                ["Pt", "pt"],
                ["Eta", "eta"],
                ["Phi", "phi"],
                ["Px", "p4().Px()"],
                ["Py", "p4().Py()"],
                ["Pz", "p4().Pz()"],
            ]
        ),
    )

    process.trueLeptonNTupleProducer = process.recoTopNTupleProducer.clone(
        src=cms.InputTag("genParticleSelector", "trueLepton", "STPOLSEL2")
    )

    process.trueLightJetNTupleProducer = process.recoTopNTupleProducer.clone(
        src=cms.InputTag("genParticleSelector", "trueLightJet", "STPOLSEL2")
    )

    def userfloat(key):
        return "? hasUserFloat('{0}') ? userFloat('{0}') : {1}".format(key, nanval)

    def userint(key):
        return "? hasUserInt('{0}') ? userInt('{0}') : {1}".format(key, nanval)

    process.goodSignalMuonsNTupleProducer = cms.EDProducer(
        "CandViewNtpProducer2",
        src=cms.InputTag("patMETDeltaRProducer", "muons"),
        lazyParser=cms.untracked.bool(True),
        prefix=cms.untracked.string(""),
        # eventInfo = cms.untracked.bool(True),
        variables=ntupleCollection(
            [
                ["Pt", "pt"],
                ["Eta", "eta"],
                ["Phi", "phi"],
                ["relIso", userfloat(Config.Muons.relIsoType)],
                ["Charge", "charge"],
                ["genPdgId", "? genParticlesSize() > 0 ? genParticle(0).pdgId() : {0}".format(nanval)],
                ["motherGenPdgId", "? genParticlesSize() > 0 ? genParticle(0).mother(0).pdgId() : {0}".format(nanval)],
                ["normChi2", "? globalTrack().isNonnull() ? normChi2 : {0}".format(nanval)],
                [
                    "trackhitPatterntrackerLayersWithMeasurement",
                    userfloat("track_hitPattern_trackerLayersWithMeasurement"),
                ],
                [
                    "globalTrackhitPatternnumberOfValidMuonHits",
                    userfloat("globalTrack_hitPattern_numberOfValidMuonHits"),
                ],
                [
                    "innerTrackhitPatternnumberOfValidPixelHits",
                    userfloat("innerTrack_hitPattern_numberOfValidPixelHits"),
                ],
                ["db", "dB"],
                ["dz", userfloat("dz")],
                ["numberOfMatchedStations", "numberOfMatchedStations"],
                [
                    "triggerMatch",
                    "? triggerObjectMatchesByPath('{0}').size()==1 ? triggerObjectMatchByPath('{0}').hasPathLastFilterAccepted() : {1}".format(
                        Config.Muons.triggerPath, nanval
                    ),
                ],
                ["deltaRMET", userfloat("deltaRMET")],
                ["deltaPhiMET", userfloat("deltaPhiMET")],
            ]
        ),
    )

    process.isoMuonsNTP = process.goodSignalMuonsNTupleProducer.clone(src=cms.InputTag("muonsWithIso"))

    process.allMuonsNTP = process.goodSignalMuonsNTupleProducer.clone(src=cms.InputTag("muonsWithIDAll"))

    process.goodSignalElectronsNTupleProducer = cms.EDProducer(
        "CandViewNtpProducer2",
        src=cms.InputTag("patMETDeltaRProducer", "electrons"),
        lazyParser=cms.untracked.bool(True),
        prefix=cms.untracked.string(""),
        # eventInfo = cms.untracked.bool(True),
        variables=ntupleCollection(
            [
                ["Pt", "%s" % Config.Electrons.pt],
                ["Eta", "eta"],
                ["Phi", "phi"],
                ["relIso", userfloat(Config.Electrons.relIsoType)],
                ["mvaID", "electronID('mvaTrigV0')"],
                ["Charge", "charge"],
                ["superClustereta", "superCluster.eta"],
                ["passConversionVeto", "passConversionVeto()"],
                [
                    "gsfTracktrackerExpectedHitsInnernumberOfHits",
                    userint("gsfTrack_trackerExpectedHitsInner_numberOfHits"),
                ],
                [
                    "triggerMatch",
                    "? triggerObjectMatchesByPath('{0}').size()==1 ? triggerObjectMatchByPath('{0}').hasPathLastFilterAccepted() : {1}".format(
                        Config.Electrons.triggerPath, nanval
                    ),
                ],
                ["genPdgId", "? genParticlesSize() > 0 ? genParticle(0).pdgId() : {0}".format(nanval)],
                ["motherGenPdgId", "? genParticlesSize() > 0 ? genParticle(0).mother(0).pdgId() : {0}".format(nanval)],
                ["deltaRMET", userfloat("deltaRMET")],
                ["deltaPhiMET", userfloat("deltaPhiMET")],
            ]
        ),
    )

    process.isoElectronsNTP = process.goodSignalElectronsNTupleProducer.clone(src=cms.InputTag("electronsWithIso"))

    process.allElectronsNTP = process.goodSignalElectronsNTupleProducer.clone(src=cms.InputTag("electronsWithIDAll"))

    process.goodJetsNTupleProducer = cms.EDProducer(
        "CandViewNtpProducer2",
        src=cms.InputTag("goodJets"),
        lazyParser=cms.untracked.bool(True),
        prefix=cms.untracked.string(""),
        eventInfo=cms.untracked.bool(False),
        variables=ntupleCollection(
            [
                ["Pt", "pt"],
                ["Eta", "eta"],
                ["Phi", "phi"],
                ["Mass", "mass"],
                # ["bDiscriminator", "bDiscriminator('%s')" % Config.Jets.bTagDiscriminant],
                ["bDiscriminatorTCHP", "bDiscriminator('%s')" % Config.Jets.BTagDiscriminant.TCHP],
                ["bDiscriminatorCSV", "bDiscriminator('%s')" % Config.Jets.BTagDiscriminant.CSV],
                ["rms", userfloat("rms")],
                ["partonFlavour", "partonFlavour()"],
                ["area", "jetArea()"],
                # These require PFCandidates to be present (huge collection)
                # ["n90", "n90()"],
                # ["n60", "n60()"],
                # ["genJetFlavour", "? genJet()>0 ? (genJet()->pdgId()) : 0"], #FIXME
                ["deltaR", userfloat("deltaR")],
                ["deltaPhi", userfloat("deltaPhi")],
                ["numberOfDaughters", "numberOfDaughters"],
                ["neutralHadronEnergy", "neutralHadronEnergy"],
                ["HFHadronEnergy", "HFHadronEnergy"],
                ["chargedEmEnergyFraction", "chargedEmEnergyFraction"],
                ["neutralEmEnergyFraction", "neutralEmEnergyFraction"],
                ["chargedHadronEnergyFraction", "chargedHadronEnergyFraction"],
                ["chargedMultiplicity", "chargedMultiplicity"],
                ["nParticles", userfloat("nParticles")],
                ["puMva", userfloat("mva")],
                ["nCharged", userfloat("nCharged")],
                ["nNeutral", userfloat("nNeutral")],
                ["deltaRMET", userfloat("deltaRMET")],
                ["deltaPhiMET", userfloat("deltaPhiMET")],
            ]
        ),
    )
    process.lowestBTagJetNTupleProducer = process.goodJetsNTupleProducer.clone(src=cms.InputTag("lowestBTagJet"))
    process.highestBTagJetNTupleProducer = process.goodJetsNTupleProducer.clone(src=cms.InputTag("highestBTagJet"))

    process.treeSequenceNew = cms.Sequence(
        process.patMETNTupleProducer
        * process.recoTopNTupleProducer
        * process.recoNuNTupleProducer
        * process.recoWNTupleProducer
        * process.trueTopNTupleProducer
        * process.trueNuNTupleProducer
        * process.trueWNTupleProducer
        * process.trueLeptonNTupleProducer
        * process.trueLightJetNTupleProducer
        * process.goodJetsNTupleProducer
        * process.lowestBTagJetNTupleProducer
        * process.highestBTagJetNTupleProducer
        * process.goodSignalMuonsNTupleProducer
        * process.goodSignalElectronsNTupleProducer
        * process.isoMuonsNTP
        * process.isoElectronsNTP
    )
    # -----------------------------------------------
    # Flavour analyzer
    # -----------------------------------------------

    Config.doWJetsFlavour = Config.isMC and sample_types.is_wjets(Config.subChannel) and not Config.isSherpa
    if Config.doWJetsFlavour:
        process.flavourAnalyzer = cms.EDProducer(
            "FlavourAnalyzer",
            genParticles=cms.InputTag("genParticles"),
            generator=cms.InputTag("generator"),
            genJets=cms.InputTag("selectedPatJets", "genJets"),
            saveGenJets=cms.bool(False),
            savePDFInfo=cms.bool(True),
        )

    # -----------------------------------------------
    # Paths
    # -----------------------------------------------

    from SingleTopPolarization.Analysis.hlt_step2_cfi import HLTSetup

    HLTSetup(process, Config)

    from SingleTopPolarization.Analysis.leptons_cfg import LeptonSetup

    LeptonSetup(process, Config)

    if Config.isMC:
        WeightSetup(process, Config)

    if Config.isMC and options.doGenParticlePath:
        if Config.isCompHep:
            from SingleTopPolarization.Analysis.partonStudy_comphep_step2_cfi import PartonStudySetup
        elif Config.isAMCatNLO:
            from SingleTopPolarization.Analysis.partonStudy_aMCatNLO_step2_cfi import PartonStudySetup
        else:
            from SingleTopPolarization.Analysis.partonStudy_step2_cfi import PartonStudySetup
        PartonStudySetup(process)
        process.partonPath = cms.Path()

        # NOTE: this path will REJECT events not having a true t-channel lepton
        if sample_types.is_signal(Config.subChannel):
            logging.warning(
                "Using signal-only sequence 'process.partonStudyTrueSequence' on subChannel=%s" % Config.subChannel
            )
            process.partonPath += process.partonStudyTrueSequence

    from SingleTopPolarization.Analysis.muons_step2_cfi import MuonPath

    MuonPath(process, Config)

    from SingleTopPolarization.Analysis.electrons_step2_cfi import ElectronPath

    ElectronPath(process, Config)

    if Config.isMC:
        process.muPath += process.weightSequence
        process.elePath += process.weightSequence

    if Config.isMC and sample_types.is_signal(Config.subChannel):
        process.muPath += process.partonStudyCompareSequence
        process.elePath += process.partonStudyCompareSequence

    process.treePath = cms.Path(process.treeSequenceNew)

    process.eventVarsPath = cms.Path(process.eventShapeSequence)

    # enable embedding the gen-level weight, which is relevant for the Sherpa sample
    if Config.isMC:
        process.genWeightProducer = cms.EDProducer("GenWeightProducer")
        process.eventVarsPath += process.genWeightProducer
        if Config.isAMCatNLO:
            process.lheWeightProducer = cms.EDProducer("LHEWeightProducer")
            process.eventVarsPath += process.lheWeightProducer

    if Config.doWJetsFlavour:
        process.treePath += process.flavourAnalyzer

    if Config.isMC:
        if not Config.isSherpa:
            process.meWeightProducer = cms.EDProducer("MEWeightProducer")
            process.eventVarsPath += process.meWeightProducer

        process.load("SimGeneral.HepPDTESSource.pythiapdt_cfi")
        process.prunedGenParticles = cms.EDProducer(
            "GenParticlePruner",
            src=cms.InputTag("genParticles"),
            select=cms.vstring(
                "drop  *",
                "keep status = 3",  # keeps all particles from the hard matrix element
                "+keep abs(pdgId) = 15 & status = 1",  # keeps intermediate decaying tau
            ),
        )

        """process.pat2pxlio=cms.EDAnalyzer('EDM2PXLIO',
            SelectEventsFromProcess=cms.vstring("USER"),  
            SelectEventsFromPath = cms.vstring("p0"),
            OutFileName=cms.untracked.string("wjets.pxlio"),
            process=cms.untracked.string("test"),
    
            genCollection = cms.PSet(
                type=cms.string("GenParticle2Pxlio"),
                srcs=cms.VInputTag(cms.InputTag("prunedGenParticles")),
                EventInfo=cms.InputTag('generator')
            ),
            
            genJets = cms.PSet(
                type=cms.string("GenJet2Pxlio"),
                srcs=cms.VInputTag("ak5GenJets","kt4GenJets","kt6GenJets"),
                names=cms.vstring("AK5GenJets","KT4GenJets","KT6GenJets")
            ),
            
            q2weights = cms.PSet(
                type=cms.string("ValueList2Pxlio"),
                srcs=cms.VInputTag(
                    cms.InputTag("extraPartons","nExtraPartons"),
                ),
                names = cms.vstring("nExtraPartons")
            )
            
            
        )"""

        process.extraPartons = cms.EDProducer("ExtraPartonCounter", isTTJets=cms.bool("TTJets" in Config.subChannel))

        process.extraPartonSequence = cms.Sequence(process.prunedGenParticles * process.extraPartons)
        # process.pxlioOut=cms.EndPath(process.out*process.pat2pxlio)

        process.eventVarsPath += process.extraPartonSequence

    # -----------------------------------------------
    # Outpath
    # -----------------------------------------------
    process.out = cms.OutputModule(
        "PoolOutputModule",
        dropMetaData=cms.untracked.string("DROPPED"),
        splitLevel=cms.untracked.int32(99),
        fileName=cms.untracked.string(options.outputFile),
        SelectEvents=cms.untracked.PSet(SelectEvents=cms.vstring(["*"])),
        outputCommands=cms.untracked.vstring(
            "drop *",
            #'keep *',
            "keep edmMergeableCounter_*__*",
            "keep *_generator__*",
            #'keep *_genParticles__*', #hack for powheg PDF sets
            "keep edmTriggerResults_TriggerResults__*",
            "keep *_flavourAnalyzer_*_STPOLSEL2",
            "keep floats_*_*_STPOLSEL2",
            "keep double_*__STPOLSEL2",
            "keep float_*__STPOLSEL2",
            "keep double_*_*_STPOLSEL2",
            "keep float_*_*_STPOLSEL2",
            "keep int_*__STPOLSEL2",
            "keep int_*_*_STPOLSEL2",
            "keep int_*_*_*",
            "keep String_*_*_*",  # the decay trees
            "keep *_pdfInfo1_*_STPOLSEL2",
            "keep *_pdfInfo2_*_STPOLSEL2",
            "keep *_pdfInfo3_*_STPOLSEL2",
            "keep *_pdfInfo4_*_STPOLSEL2",
            "keep *_pdfInfo5_*_STPOLSEL2",
            #'keep *',
            #'keep *_recoTop_*_*',
            #'keep *_goodSignalMuons_*_*',
            #'keep *_goodSignalElectrons_*_*',
            #'keep *_goodJets_*_*',
            #'keep *_bTaggedJets_*_*',
            #'keep *_untaggedJets_*_*',
        ),
    )
    if Config.doDebug:
        process.out.outputCommands.append("keep *")
        process.debugpath = cms.Path(
            process.muAnalyzer * process.eleAnalyzer * process.jetAnalyzer * process.metAnalyzer
        )
    process.outpath = cms.EndPath(process.out)
    if Config.doSkim:
        process.out.SelectEvents.SelectEvents = []
    process.out.SelectEvents.SelectEvents.append("elePath")
    process.out.SelectEvents.SelectEvents.append("muPath")

    # -----------------------------------------------
    # Final printout
    # -----------------------------------------------

    if hasattr(process, "out"):
        print "Output patTuples: %s" % process.out.fileName.value()
    print 80 * "-"
    print "Step2 configured"

    return process
def makeJSON(optlist):
    outdir = optlist[0]
    basedir = optlist[1]
    lastUnblindRun = optlist[2]
    name = optlist[3]
    files = optlist[4:]
    s = sampleInfo(name,basedir,files)
    
    #lumi set for this sample
    mergedLumisUnblind = set()
    mergedLumisBlinded = set()

    for f in s.fileList:
        file = TFile.Open(f)
        if file == None: continue
        # only keep necessary branches
        t = file.Get("TreeMaker2/PreSelection")
        if t == None: continue
        t.SetBranchStatus("*",0)
        t.SetBranchStatus("RunNum",1)
        t.SetBranchStatus("LumiBlockNum",1)

        #get tree entries
        nentries = t.GetEntries()
        if nentries==0: continue
        t.SetEstimate(nentries)
        t.Draw("RunNum:LumiBlockNum","","goff")
        v1 = t.GetV1(); v1.SetSize(t.GetSelectedRows()); a1 = array.array('d',v1); v1 = None;
        v2 = t.GetV2(); v2.SetSize(t.GetSelectedRows()); a2 = array.array('d',v2); v2 = None;
        
        #loop over tree entries
        for run,ls in izip(a1,a2):
            irun = int(run)
            ils = int(ls)
            if irun <= lastUnblindRun or lastUnblindRun==-1:
                if not (irun,ils) in mergedLumisUnblind:
                    mergedLumisUnblind.add((irun,ils))                
            else:
                if not (irun,ils) in mergedLumisBlinded:
                    mergedLumisBlinded.add((irun,ils))

        file.Close()

    ### end loop over files in sample

    #convert the runlumis from list of pairs to dict: [(123,3), (123,4), (123,5), (123,7), (234,6)] => {123 : [3,4,5,7], 234 : [6]}
    mLumisDictUnblind = {}
    mLumisDictBlinded = {}
    for k, v in mergedLumisUnblind:
        mLumisDictUnblind.setdefault(k, []).append(int(v))
    for k, v in mergedLumisBlinded:
        mLumisDictBlinded.setdefault(k, []).append(int(v))

    #make lumi list from dict
    mergedLumiListUnblind = LumiList(runsAndLumis=mLumisDictUnblind)
    mergedLumiListBlinded = LumiList(runsAndLumis=mLumisDictBlinded)
    if mergedLumiListUnblind:
        outfile = outdir+'/lumiSummary_unblind_'+s.outName+'.json'
        mergedLumiListUnblind.writeJSON(outfile)
        print "wrote "+outfile
    if mergedLumiListBlinded:
        outfile = outdir+'/lumiSummary_blinded_'+s.outName+'.json'
        mergedLumiListBlinded.writeJSON(outfile)
        print "wrote "+outfile
Example #58
0
if isMC :
    process.skimEventProducer.SelectedPaths = cms.vstring ("")
    # special paths always saved
    setattr(stepBTree.variables, "std_vector_trigger_special",   cms.string("specialRateTrigger/8") )



# mc
if dataset[0] == "MC":
    stepBTree.variables.baseW = "%.12f" % scalef
# data
else:
    from FWCore.PythonUtilities.LumiList import LumiList
    import os
    if json != None :
      lumis = LumiList(filename = os.getenv('CMSSW_BASE')+'/src/LatinoTrees/Misc/Jsons/%s.json'%json)
      process.source.lumisToProcess = cms.untracked.VLuminosityBlockRange()
      process.source.lumisToProcess = lumis.getCMSSWString().split(',')
    stepBTree.variables.baseW = "1"
    stepBTree.variables.trpu = cms.string("1")
    stepBTree.variables.itpu = cms.string("1")
    stepBTree.variables.ootpup1 = cms.string("1")
    stepBTree.variables.ootpum1 = cms.string("1")
    stepBTree.variables.puW = cms.string("1")
    stepBTree.variables.puAW = cms.string("1")
    stepBTree.variables.puBW = cms.string("1")


####################
# run electron id ##
# see twiki:
        ls = set(l for r,l in lumi_mask)

        if ls == set([-1]):
            is_mc = True
        elif -1 in ls:
            raise ValueError('batch for dataset %s has lumis -1 and others' % dataset)
        else:
            is_mc = False

        if not is_mc:
            job_control = '''
lumi_mask = pick_events.json
total_number_of_lumis = -1
lumis_per_job = 1'''
            ll = LumiList(lumis=lumi_mask)
            ll.writeJSON('pick_events.json')
        else:
            job_control = '''
total_number_of_events = -1
events_per_job = 100000'''

        scheduler = 'condor' if 'condor' in sys.argv else 'glite'
        open('crab.cfg', 'wt').write(crab_cfg % locals())

        pset = open('pick_events.py').read()
        pset += '\nevents_to_process = '
        pset += pformat(events_to_process)
        pset += '\nset_events_to_process(process, events_to_process)\n'
        open('pick_events_crab.py', 'wt').write(pset)