def doit(*x): print x ll_fn, lumi_fn, check_intlumi_sum, goal, out_fn = x check_intlumi_sum *= 1e9 # csv in /ub random.seed(8675309) goal *= check_intlumi_sum in_ll = LumiList(ll_fn).getLumis() intlumis, intlumi_sum = intlumi_from_brilcalc_csv(lumi_fn, False) assert abs(intlumi_sum - check_intlumi_sum) < 1e6 tot = 0. out_ll = [] while tot < goal: i = random.randrange(len(in_ll)) rl = in_ll.pop(i) #if not intlumis.has_key(rl): # continue tot += intlumis[rl] out_ll.append(rl) print 'tot = %f, picked %i lumis' % (tot, len(out_ll)) LumiList(lumis=out_ll).writeJSON(out_fn)
def getLumiList(lumi_mask_name, logger=None): """ Takes a lumi-mask and returns a LumiList object. lumi-mask: either an http address or a json file on disk. """ lumi_list = None parts = urlparse(lumi_mask_name) if parts[0] in ['http', 'https']: if logger: logger.debug('Downloading lumi-mask from %s' % lumi_mask_name) try: lumi_list = LumiList(url=lumi_mask_name) except Exception as err: raise ConfigurationException( "CMSSW failed to get lumimask from URL. Please try to download the lumimask yourself and point to it in crabConfig;\n%s" % str(err)) else: if logger: logger.debug('Reading lumi-mask from %s' % lumi_mask_name) try: lumi_list = LumiList(filename=lumi_mask_name) except IOError as err: raise ConfigurationException("Problem loading lumi-mask file; %s" % str(err)) return lumi_list
def __init__(self, name, nanotrees, weight, triggers, jess, jers, mc, injfile, outjfile): self.jess = jess self.jers = jers self.mc = mc self.weight = weight self.__book__(name) # get json file myList = LumiList(filename=injfile) # initialize output lumilist myrunlumi = [] # open root files files = open(nanotrees, "r") # for file in glob.glob(nanotrees + "*root"): for file in files.read().splitlines(): self.Fill(file, triggers, myList, myrunlumi) # only do this for data if not (self.mc): outList = LumiList(lumis=myrunlumi) outList.writeJSON(outjfile) print "got to the end" files.close() self.O.cd() self.O.Write() self.O.Close()
def combine_grls(grl1,grl2): lumis1 = LumiList(compactList=grl1) lumis2 = LumiList(compactList=grl2) new_lumis = lumis1 & lumis2 # print new_lumis.compactList return new_lumis.compactList
def files_for_json(json_fn, dataset, instance='global'): json = LumiList(json_fn) files = set() for file, run_lumis in file_details_run_lumis(dataset, instance).iteritems(): ll = LumiList(runsAndLumis=run_lumis) if json & ll: files.add(file) return sorted(files)
def getDatasetLumiList(self, name, catalog): from FWCore.PythonUtilities.LumiList import LumiList dlist = LumiList() for fil in catalog[name]["files"]: flist = LumiList(runsAndLumis=fil.get("lumis", {})) dlist += flist return dlist
def crab_fjr_json_to_ll(fn): print colors.yellow('this is not fully tested') j = crab_fjr_json(fn) ll = LumiList() for x in j['steps']['cmsRun']['input']['source']: x2 = defaultdict(list) for k, v in x['runs'].iteritems(): for l in v.keys(): x2[int(k)].append(int(l)) ll += LumiList(runsAndLumis=x2) return ll
def mergeLumis(inputdata, lumimask): """ Computes the processed lumis, merges if needed and returns the compacted list. """ mergedlumis = LumiList() doublelumis = LumiList() for report in inputdata: doublelumis = mergedlumis & LumiList(runsAndLumis=report) mergedlumis = mergedlumis | LumiList(runsAndLumis=report) if doublelumis: self.logger.info("Warning: double run-lumis processed %s" % doublelumis) return mergedlumis.getCompactList(), (LumiList(compactList=lumimask) - mergedlumis).getCompactList()
def makeLumiBlocks(in_lumi_file, outdir, chunksize=5): ll = LumiList(filename=in_lumi_file) lumis = ll.getLumis() nblock = 0 blocks = [] for lumiblock in chunks(lumis, chunksize): nblock += 1 ll2 = LumiList(lumis=lumiblock) fn = outdir + "/block_{0}.json".format(nblock) of = open(fn, "w") of.write(str(ll2)) of.close() blocks += [fn] return blocks
def mk_secondary_lumimask(dset): dq = das_query("file dataset=%s instance=prod/phys03" % dset, cmd='dasgoclient --dasmaps=./') assert 'data' in dq.keys() fs = [str(f['file'][0]['name']) for f in dq['data']] #fs = fs[:2] print('N files:', len(fs)) lumis = [] dqs = [ das_query("lumi file=%s instance=prod/phys03" % f, cmd='dasgoclient --dasmaps=./') for f in fs ] for dq in dqs: for data in dq['data']: for lumi in data['lumi'][0]['lumi_section_num']: lumis.append([data['lumi'][0]['run_number'], lumi]) jsonList = LumiList(lumis=lumis) #print(jsonList) output_file = dset.split('/')[2].split('-')[1].split('_')[0] #print(output_file) jsonList.writeJSON(output_dir + output_file + '_3photons_imgskim_lumi_list.json')
def main(argv=None): if argv is None: argv = sys.argv[1:] args = parse_command_line(argv) loglevel = getattr(logging,args.log) logging.basicConfig(format='%(asctime)s.%(msecs)03d %(levelname)s %(name)s: %(message)s', level=loglevel, datefmt='%Y-%m-%d %H:%M:%S', stream=sys.stderr) allfiles = [] for f in args.inputFiles: for fname in glob.glob(f): allfiles += [fname] logging.info('Adding {0} files to tchain'.format(len(allfiles))) tchain = ROOT.TChain(args.treeName) for fname in allfiles: tchain.Add(fname) nlumis = tchain.GetEntries() logging.info('Processing {0} lumis'.format(nlumis)) allLumis = {} total = 0 for row in tchain: total += 1 if row.run not in allLumis: allLumis[row.run] = set() allLumis[row.run].add(row.lumi) lumiJson = LumiList(runsAndLumis = allLumis) #lumiJson.writeJSON(args.outputFile) print lumiJson
def getLumisToSkip(self,dataset): catalog = self.readCatalog(True) if not dataset in catalog: return None from FWCore.PythonUtilities.LumiList import LumiList return LumiList( compactList=catalog[dataset].get('lumisToSkip',{}) )
def getRuns(name=None, bfield=None, bunchSpacing=None): ll = LumiList() for rp in runPeriods: if name is None or rp.name == name: if bfield is None or rp.bfield == bfield: if bunchSpacing is None or rp.bunchSpacing == bunchSpacing: newll = LumiListForRunPeriod(rp) ll += LumiListForRunPeriod(rp) return ll.getRuns()
def getLumiList(self, *args): catalog = self.readCatalog(True) datasets = [] output = filter(lambda x: "output=" in x, args) args = filter(lambda x: not "output=" in x, args) for dataset in catalog.keys(): for arg in args: if dataset == arg or fnmatch(dataset, arg): datasets.append(dataset) break if len(output) > 1: print "ERROR: you specified the output json more than once:\n" print " %s" % " ".join(output) sys.exit(-1) if len(output) > 0: output = output[0].strip("output=") else: output = None from FWCore.PythonUtilities.LumiList import LumiList fulist = LumiList() for dataset in datasets: dlist = LumiList() jsonout = dataset.lstrip("/").rstrip("/").replace("/", "_") + ".json" for fil in catalog[dataset]["files"]: flist = LumiList(runsAndLumis=fil.get("lumis", {})) ## print flist dlist += flist if not output: with open(jsonout, "w+") as fout: fout.write(json.dumps(dlist.compactList, sort_keys=True)) fout.close() else: fulist += dlist if output: with open(output, "w+") as fout: fout.write(json.dumps(fulist.compactList, sort_keys=True)) fout.close()
def __init__(self, cfg_ana, cfg_comp, looperName): super(JSONAnalyzer, self).__init__(cfg_ana, cfg_comp, looperName) if not cfg_comp.isMC: if self.cfg_comp.json is None: raise ValueError('component {cname} is not MC, and contains no JSON file. Either remove the JSONAnalyzer for your path or set the "json" attribute of this component'.format(cname=cfg_comp.name)) self.lumiList = LumiList(os.path.expandvars(self.cfg_comp.json)) else: self.lumiList = None self.rltInfo = RLTInfo()
def getDatasetLumiList(self,name,catalog,check=False): from FWCore.PythonUtilities.LumiList import LumiList lumisToSkip = catalog[name].get('lumisToSkip',None) if lumisToSkip: print "Dataset %s has list of lumi sections to skip in catalog" % name lumisToSkip = LumiList(compactList=lumisToSkip) dlist = LumiList() for fil in catalog[name]["files"]: flist = LumiList( runsAndLumis=fil.get("lumis",{}) ) if lumisToSkip and not check: flist = flist.__sub__(lumisToSkip) if check: andlist = dlist.__and__(flist) ## print andlist, fil.get("name") if len(andlist) != 0: print "Warning: duplicate lumi sections in dataset. %s" % fil.get("name") print andlist, flist dlist += flist return dlist
def load_golden_jsons(golden_json_path): if not os.path.isfile(golden_json_path): raise RuntimeError("No such file: %s" % golden_json_path) lumi_obj = LumiList(golden_json_path) lumis = lumi_obj.getLumis() runlumi_dict = {} for run, lumi in lumis: if run not in runlumi_dict: runlumi_dict[run] = [] assert(lumi not in runlumi_dict[run]) runlumi_dict[run].append(lumi) return runlumi_dict
def mergeDataset(self,dst,merge): dst["vetted"]=False from FWCore.PythonUtilities.LumiList import LumiList dstLumisToSkip = LumiList(compactList=dst.get('lumisToSkip',{})) mergeLumisToSkip = LumiList(compactList=merge.get('lumisToSkip',{})) dstLumisToSkip += mergeLumisToSkip dstLumisToSkip = dstLumisToSkip.compactList if len(dstLumisToSkip) > 0: dst['lumisToSkip'] = dstLumisToSkip print "\nWARNING: Merged lumisToSkip list. It is reccomended to run the 'overlap' command to re-geneate the list from scratch." dstFiles=dst["files"] mergeFiles=merge["files"] for fil in mergeFiles: skip = False for dfil in dstFiles: if dfil["name"] == fil["name"]: skip = True if not skip: dstFiles.append( fil )
def writejson(l, out_fn): is_data = is_data_fn(out_fn) if not is_data: run = 1 rll = defaultdict(list) for x in l: if is_data: run, lumi = x[:2] else: lumi = x[0] rll[run].append(lumi) LumiList(runsAndLumis=rll).writeJSON(out_fn)
def __init__(self, cfg_ana, cfg_comp, looperName): super(JSONAnalyzer, self).__init__(cfg_ana, cfg_comp, looperName) if not cfg_comp.isMC: if self.cfg_comp.json is None: raise ValueError( 'component {cname} is not MC, and contains no JSON file. Either remove the JSONAnalyzer for your path or set the "json" attribute of this component' .format(cname=cfg_comp.name)) self.lumiList = LumiList(os.path.expandvars(self.cfg_comp.json)) else: self.lumiList = None if hasattr(self.cfg_comp, 'additionaljson'): self.additionalLumiList = LumiList( os.path.expandvars(self.cfg_comp.additionaljson)) self.twojson = True else: self.twojson = False self.useLumiBlocks = self.cfg_ana.useLumiBlocks if (hasattr( self.cfg_ana, 'useLumiBlocks')) else False self.rltInfo = RLTInfo()
def __init__(self, fn, mask_fn=None): self.mask = LumiList(mask_fn) if mask_fn else None self.lls = LumiLines.load(fn) self.by_run = defaultdict(list) self.by_run_ls = {} self.fills = defaultdict(lambda: 999999) for ll in self.lls: if not self.mask or (ll.run, ll.ls) in self.mask: self.by_run[ll.run].append(ll) self.by_run_ls[(ll.run, ll.ls)] = ll self.fills[ll.fill] = min(self.fills[ll.fill], ll.run) self.fill_boundaries = sorted(self.fills.values()) self.by_run = dict(self.by_run)
def getInputRunLumi(self, file): import xml.dom.minidom dom = xml.dom.minidom.parse(file) ll=[] for elem in dom.getElementsByTagName("Job"): nJob = int(elem.getAttribute("JobID")) lumis = elem.getAttribute('Lumis') #lumis = '193752:1' #lumis = '193752:1-193752:5,193774:1-193774:5,193775:1' if lumis: tmp=str.split(str(lumis), ",") #print "tmp = ", tmp else: msg = "The summary file inputLumiSummaryOfTask.json about input run and lumi isn't created" common.logger.info(msg) return #tmp = [193752:1-193752:5] [193774:1-193774:5] for entry in tmp: run_lumi=str.split(entry, "-") # run_lumi = [193752:1] [193752:5] if len(run_lumi) == 0: pass if len(run_lumi) == 1: lumi = str.split(run_lumi[0],":")[1] run = str.split(run_lumi[0],":")[0] ll.append((run,int(lumi))) if len(run_lumi) == 2: lumi_max = str.split(run_lumi[1],":")[1] lumi_min = str.split(run_lumi[0],":")[1] run = str.split(run_lumi[1],":")[0] for count in range(int(lumi_min),int(lumi_max) + 1): ll.append((run,count)) if len(ll): lumiList = LumiList(lumis = ll) compactList = lumiList.getCompactList() totalLumiFilename = self.fjrDirectory + 'inputLumiSummaryOfTask.json' totalLumiSummary = open(totalLumiFilename, 'w') json.dump(compactList, totalLumiSummary) totalLumiSummary.write('\n') totalLumiSummary.close() msg = "Summary file of input run and lumi to be analize with this task: %s\n" %totalLumiFilename common.logger.info(msg) else: msg = "The summary file inputLumiSummaryOfTask.json about input run and lumi isn't created" common.logger.info(msg) return totalLumiFilename
def LumiListForRunPeriod(rp, MIN_LUMIS=0): ll = LumiList(filename=rp.json) runs = [ run for run in map(int, ll.getRuns()) if run >= rp.firstRun and run <= rp.lastRun ] lumis = ll.getLumis() nlumis = defaultdict(int) for r, l in lumis: nlumis[r] += 1 select_runs = [run for run in runs if nlumis[run] > MIN_LUMIS] ll.selectRuns(select_runs) return ll
def mergeLumis(inputdata): """ Computes the processed lumis, merges if needed and returns the compacted list. """ mergedLumis = set() #merge the lumis from single files for reports in inputdata.values(): for report in reports: for run, lumis in literal_eval(report['runlumi']).items(): if isinstance(run, bytes): run = run.decode(encoding='UTF-8') for lumi in lumis: mergedLumis.add( (run, int(lumi))) #lumi is str, but need int mergedLumis = LumiList(lumis=mergedLumis) return mergedLumis.getCompactList()
def files_for_events(run_events, dataset, instance='global'): wanted_run_lumis = [] for x in run_events: # list of runs, or list of (run, event), or list of (run, lumi, event) if type(x) == int: wanted_run_lumis.append((x, None)) elif len(x) == 2: wanted_run_lumis.append((x[0], None)) else: wanted_run_lumis.append(x[:2]) files = set() for file, run_lumis in file_details_run_lumis(dataset, instance).iteritems(): ll = LumiList(runsAndLumis=run_lumis) for x in wanted_run_lumis: if ll.contains(*x): files.add(file) return sorted(files)
def getDuplicateLumis(lumisDict): """ Get the run-lumis appearing more than once in the input dictionary of runs and lumis, which is assumed to have the following format: { '1': [1,2,3,4,6,7,8,9,10], '2': [1,4,5,20] } """ doubleLumis = set() for run, lumis in lumisDict.items(): seen = set() doubleLumis.update( set((run, lumi) for lumi in lumis if (run, lumi) in seen or seen.add((run, lumi)))) doubleLumis = LumiList(lumis=doubleLumis) return doubleLumis.getCompactList()
def __init__(self, cfg_ana, cfg_comp, looperName): super(JSONAnalyzer, self).__init__(cfg_ana, cfg_comp, looperName) if not cfg_comp.isMC: if self.cfg_comp.json is None and hasattr(self.cfg_ana, "json") == False: raise ValueError( 'component {cname} is not MC, and contains no JSON file. Either remove the JSONAnalyzer for your path or set the "json" attribute of this component' .format(cname=cfg_comp.name)) #use json from this analyzer if given, otherwise use the component json self.lumiList = LumiList( os.path.expandvars( getattr(self.cfg_ana, "json", self.cfg_comp.json))) else: self.lumiList = None self.passAll = getattr(self.cfg_ana, 'passAll', False) self.useLumiBlocks = self.cfg_ana.useLumiBlocks if (hasattr( self.cfg_ana, 'useLumiBlocks')) else False self.rltInfo = RLTInfo()
def customise(process): lumiFile = 'Cert_246908-260627_13TeV_PromptReco_Collisions15_25ns_JSON.txt' runOnMC = True for i in process.source.fileNames: if 'Run2015' in i: runOnMC = False isTTbar = False for i in process.source.fileNames: if '/TT' in i or '/tt' in i: isTTbar = True if not runOnMC: from FWCore.PythonUtilities.LumiList import LumiList lumiList = LumiList(os.environ["CMSSW_BASE"] + '/src/CATTools/CatProducer/prod/LumiMask/' + lumiFile) #lumiList = LumiList(os.environ["CMSSW_BASE"]+'/src/CATTools/CommonTools/test/ttbb/'+lumiFile) process.source.lumisToProcess = lumiList.getVLuminosityBlockRange()
def getLumiListInValidFiles(dataset, dbsurl='phys03'): """ Get the runs/lumis in the valid files of a given dataset. dataset: the dataset name as published in DBS dbsurl: the DBS URL or DBS prod instance Returns a LumiList object. """ from dbs.apis.dbsClient import DbsApi dbsurl = DBSURLS['reader'].get(dbsurl, dbsurl) dbs3api = DbsApi(url=dbsurl) try: files = dbs3api.listFileArray(dataset=dataset, validFileOnly=0, detail=True) except Exception as ex: msg = "Got DBS client error requesting details of dataset '%s' on DBS URL '%s': %s" % ( dataset, dbsurl, ex) msg += "\n%s" % (traceback.format_exc()) raise ClientException(msg) if not files: msg = "Dataset '%s' not found in DBS URL '%s'." % (dataset, dbsurl) raise ClientException(msg) validFiles = [f['logical_file_name'] for f in files if f['is_file_valid']] blocks = set([f['block_name'] for f in files]) runLumiPairs = [] for blockName in blocks: fileLumis = dbs3api.listFileLumis(block_name=blockName) for f in fileLumis: if f['logical_file_name'] in validFiles: run = f['run_num'] lumis = f['lumi_section_num'] for lumi in lumis: runLumiPairs.append((run, lumi)) lumiList = LumiList(lumis=runLumiPairs) return lumiList
def fjr2ll(fjr_fn): '''Ripped off from fjr2json.py that comes with CMSSW.''' runsLumisDict = {} obj = xml2obj(filename=fjr_fn) if obj.InputFile is None: print 'problem with', fjr_fn else: for inputFile in obj.InputFile: if not inputFile.Runs: assert inputFile.Runs == '' else: try: # Regular XML version, assume only one of these runObjects = inputFile.Runs.Run for run in runObjects: runNumber = int(run.ID) runList = runsLumisDict.setdefault(runNumber, []) for lumiPiece in run.LumiSection: lumi = int(lumiPiece.ID) runList.append(lumi) except: if isinstance(inputFile.Runs, basestring): runObjects = [inputFile.Runs] else: runObjects = inputFile.Runs for runObject in runObjects: try: runs = ast.literal_eval(runObject) for run, lumis in runs.iteritems(): runList = runsLumisDict.setdefault( int(run), []) runList.extend(lumis) except ValueError: # Old style handled above pass return LumiList(runsAndLumis=runsLumisDict)