def __init__(self, config, inputPaths, name=None): from os.path import split as splitPath from os.path import exists as pathExists from os import makedirs from sys import modules if name == None: assert len( inputPaths ) == 1, "can only determine names for singlePaths automatically. Got '%s'" % inputPaths name = splitPath(inputPaths[0])[1].split(".")[2] self.config = config self.name = name self.tasks = list( set([splitPath(i)[1].split(".")[1] for i in inputPaths])) self.flags = list( set([splitPath(i)[1].split(".")[0] for i in inputPaths])) self.inputPaths = inputPaths self.counterSum = None self.outPath = config.get("general", "outPath") if not pathExists(self.outPath): makedirs(self.outPath) self.treeProcessors = {} for section in self.config.sections(): if section.startswith("treeProcessor:"): processorName = section.split("treeProcessor:")[1] processorType = self.config.get(section, "type") #warning black magic ahead :P self.treeProcessors[processorName] = getattr( modules[globals()["__name__"]], processorType)(self.config, processorName)
def _getIsoTrees(self, section): from re import match from os.path import split as splitPath result = {} datasetPaths = [] treeProducerName = self.config.get(section, "treeProducerName") datasetExpression = self.config.get(section, "Dataset") datasetSelection = self.config.get(section, "Selection") for path in self.inputPaths: if not match(datasetExpression, splitPath(path)[1].split(".")[2]) == None: datasetPaths.append(path) if datasetPaths == []: datasetSelection = self.config.get(section, "otherSelection") datasetPaths = self.inputPaths result[""] = [] for path in datasetPaths: task = None for t in self.tasks: if ".%s." % t in splitPath(path)[1]: assert task == None, "unable to disambiguate tasks '%s' matches both '%s' and '%s'" ( path, task, t) task = t result[""].append("%s/%s%s%s/Trees/Iso" % (path, task, datasetSelection, treeProducerName)) return result
def getHistoFromDQM(serverUrl, runNr, dataset, histoPath): print "**************>>>> GETTING HISTO" from src.dqmjson import dqm_get_json from os.path import split as splitPath print "fetching",serverUrl, runNr, dataset, histoPath print "*** histoPath = ", histoPath paths = re.split('[,]',histoPath) print "paths = ", paths print "histoPath = ", histoPath if len(paths)>1 : print "paths[0] = ", paths[0] print "paths[1] = ", paths[1] for path in paths: print "splitPath(path)[1] = ", splitPath(path)[1] if splitPath(path)[1] : #histoPath=path print "path = ", path #print "DEBUUUUG1" #print dataset #print "histoPath[0]: ", histoPath[0] #print "histoPath: " , histoPath #json = dqm_get_json( serverUrl, runNr, dataset, histoPath[0], rootContent=True) result = None for path in paths: print "looping over histo paths, path = ", path print "DEBUUUUG2" print dataset print splitPath(histoPath)[0] json = dqm_get_json( serverUrl, runNr, dataset, splitPath(histoPath)[0], rootContent=True) # print dqm_get_json( serverUrl, runNr, dataset, splitPath(histoPath)[0]+"/"+splitPath(histoPath)[1], rootContent=True) #jsonT = dqm_get_json( serverUrl, runNr, dataset, splitPath(path)[0], rootContent=True) #if ( path == "SiStrip/MechanicalView/TIB/Summary_ClusterStoNCorr_OnTrack__TIB" ): #print "JSOOOOON " , jsonT #print "JSOOOOON REPR " , repr(jsonT) #print "OK?" #data_string = json.dumps(jsonT) #print "OK!" #print "JSOOOOON STRING " , data_string #with open('testjson.txt', 'w') as outfile: # json.dump(jsonT, outfile) #outfile.write(data_string) #outfile.close() #print "===> if this crashes you might consider changing the relativePats in ./cfg/trendPlotsTracker.py" #print "test = ", splitPath(path)[1] in json #if splitPath(histoPath)[1] in json : if splitPath(path)[1] in json : print "path in json = ", path #assert splitPath(histoPath)[1] in json, "could not find '%s' in run %s of '%s'"%(histoPath, runNr, dataset) assert splitPath(path)[1] in json, "could not find '%s' in run %s of '%s'"%(path, runNr, dataset) # print "using split path" #result = json[splitPath(histoPath)[1]]["rootobj"] result = json[splitPath(path)[1]]["rootobj"] print result return result
def getHistoFromDQM(serverUrl, runNr, dataset, histoPath): print "**************>>>> GETTING HISTO" from src.dqmjson import dqm_get_json from os.path import split as splitPath print "fetching",serverUrl, runNr, dataset, histoPath print "*** histoPath = ", histoPath paths = re.split('[,]',histoPath) print "paths = ", paths print "histoPath = ", histoPath if len(paths)>1 : print "paths[0] = ", paths[0] print "paths[1] = ", paths[1] for path in paths: print "splitPath(path)[1] = ", splitPath(path)[1] if splitPath(path)[1] : #histoPath=path print "path = ", path #print "DEBUUUUG1" #print dataset #print "histoPath[0]: ", histoPath[0] #print "histoPath: " , histoPath #json = dqm_get_json( serverUrl, runNr, dataset, histoPath[0], rootContent=True) result = None for path in paths: print "looping over histo paths, path = ", path print "DEBUUUUG2" print dataset print splitPath(histoPath)[0] json = dqm_get_json( serverUrl, runNr, dataset, splitPath(histoPath)[0], rootContent=True) #jsonT = dqm_get_json( serverUrl, runNr, dataset, splitPath(path)[0], rootContent=True) #if ( path == "SiStrip/MechanicalView/TIB/Summary_ClusterStoNCorr_OnTrack__TIB" ): #print "JSOOOOON " , jsonT #print "JSOOOOON REPR " , repr(jsonT) #print "OK?" #data_string = json.dumps(jsonT) #print "OK!" #print "JSOOOOON STRING " , data_string #with open('testjson.txt', 'w') as outfile: # json.dump(jsonT, outfile) #outfile.write(data_string) #outfile.close() #print "===> if this crashes you might consider changing the relativePats in ./cfg/trendPlotsTracker.py" #print "test = ", splitPath(path)[1] in json #if splitPath(histoPath)[1] in json : if splitPath(path)[1] in json : print "path in json = ", path #assert splitPath(histoPath)[1] in json, "could not find '%s' in run %s of '%s'"%(histoPath, runNr, dataset) assert splitPath(path)[1] in json, "could not find '%s' in run %s of '%s'"%(path, runNr, dataset) # print "using split path" #result = json[splitPath(histoPath)[1]]["rootobj"] result = json[splitPath(path)[1]]["rootobj"] return result
def importFromPath(path, memberName=None): scriptDir, scriptName = splitPath(path) moduleName = rootFileName(scriptName) sysPath.append(scriptDir) if memberName is None: exec(__IMPORT_TMPLT % (moduleName, )) else: exec(__FROM_IMPORT_TMPLT % (moduleName, memberName)) sysPath.remove(scriptDir) return locals()[memberName] if memberName else locals()[moduleName]
def _getDileptonTrees(self, section): from re import match from os.path import split as splitPath result = {} objects = self.config.get(section, "objects").split() treeProducerName = self.config.get(section, "treeProducerName") for object in objects: processors = self.config.get(section, "%sProcessors" % object).split() datasetPaths = [] datasetExpression = self.config.get(section, "%sDataset" % object) datasetSelection = self.config.get(section, "%sSelection" % object) for path in self.inputPaths: if not match(datasetExpression, splitPath(path)[1].split(".")[2]) == None: datasetPaths.append(path) if datasetPaths == []: datasetSelection = self.config.get(section, "otherSelection") datasetPaths = self.inputPaths result[object] = [] for path in datasetPaths: task = None if "vtxWeighter" in processors: t = self.config.get("general", "tasks") if ".%s." % t in splitPath(path)[1]: assert task == None, "unable to disambiguate tasks '%s' matches both '%s' and '%s'" ( path, task, t) task = t else: task = t else: for t in self.tasks: if ".%s." % t in splitPath(path)[1]: assert task == None, "unable to disambiguate tasks '%s' matches both '%s' and '%s'" ( path, task, t) task = t result[object].append( "%s/%s%s%s/%sDileptonTree" % (path, task, datasetSelection, treeProducerName, object)) return result
def getHistoFromDQM(serverUrl, runNr, dataset, histoPath): print "**************>>>> GETTING HISTO" from src.dqmjson import dqm_get_json from os.path import split as splitPath print "fetching",serverUrl, runNr, dataset, histoPath print "*** histoPath = ", histoPath paths = re.split('[,]',histoPath) print "paths = ", paths print "histoPath = ", histoPath if len(paths)>1 : print "paths[0] = ", paths[0] print "paths[1] = ", paths[1] for path in paths: print "splitPath(path)[1] = ", splitPath(path)[1] if splitPath(path)[1] : #histoPath=path print "path = ", path json = dqm_get_json( serverUrl, runNr, dataset, histoPath[0], rootContent=True) result = None for path in paths: print "looping over histo paths, path = ", path #json = dqm_get_json( serverUrl, runNr, dataset, splitPath(histoPath)[0], rootContent=True) json = dqm_get_json( serverUrl, runNr, dataset, splitPath(path)[0], rootContent=True) #print "===> if this crashes you might consider changing the relativePats in ./cfg/trendPlotsTracker.py" #print "test = ", splitPath(path)[1] in json #if splitPath(histoPath)[1] in json : if splitPath(path)[1] in json : print "path in json = ", path #assert splitPath(histoPath)[1] in json, "could not find '%s' in run %s of '%s'"%(histoPath, runNr, dataset) assert splitPath(path)[1] in json, "could not find '%s' in run %s of '%s'"%(path, runNr, dataset) # print "using split path" #result = json[splitPath(histoPath)[1]]["rootobj"] result = json[splitPath(path)[1]]["rootobj"] return result
def request_and_save(self, whatData:str, Economy:list, Product:list, Partner:list, output_path:str, Period = None, display_progress_rate=True): ''' @param whatData: What data do you want to get? We support "import", "export" , and "both" @param Economy: What economy do you want to know? @param Product: What product do you want to know? @param Partner: What partner do you want to know? @param output_path: After crawling the ordered data, where should we store them @param display_progress_rate: If this parameter is set to True, then you can track the progress of the crawler when it start crawling. @param Period: not support yet. Note: when the number of Economy is more than 1, the crawler will store the data into saperate files. Their names whould become output_path_<Economy_name>.xlsx. ''' if(whatData not in SUPPORT_DATA): raise ValueError("[!] The data you request is not supported. ") # This block may move to _save_one_ws to get more flexibility in the fulture, # though put it here can increase some efficiency Economy = tuple([self.economyCode.search(i) for i in Economy]) Product = tuple([self.productCode.search(i) for i in Product]) Partner = tuple([self.partnerCode.search(i) for i in Partner]) # --- category = CATEGORY[whatData.lower()] for economy in Economy: wb = Workbook() ws = wb.active for i, param in enumerate(category): if(i != 0): ws = wb.create_sheet() self._save_one_sheet(ws, param, economy, Product, Partner, Period, display_progress_rate=display_progress_rate) adjust_output_path = output_path folder, file = splitPath(output_path) if(not isdir(folder)): mkdir(folder) if(len(Economy) != 1): fileName, ext = file.split('.') adjust_fileName = "{}_{}".format(fileName, economy.name.strip().replace(' ', '_')) adjust_output_path = joinPath(folder, "{}.{}".format(adjust_fileName, ext)) wb.save(adjust_output_path) if(display_progress_rate): print("Finish saving the data of economy:{}. ".format(economy.name.strip()))
def getProducers(config, path): from glob import glob from re import match from os.path import split as splitPath mcExpression = config.get("general", "MCDatasets") result = [] dataPaths = [] for inputPath in glob("%s/*.root" % path): if match(mcExpression, splitPath(inputPath)[1]) == None: #~ print "TEST!!!" #~ print mcExpression #~ print splitPath(inputPath)[1] dataPaths.append(inputPath) else: result.append(TreeProducer(config, [inputPath])) if len(dataPaths) > 0: result.append(TreeProducer(config, dataPaths, "MergedData")) return result
def getHistoFromDQM(serverUrl, runNr, dataset, histoPath): print "**************>>>> GETTING HISTO" from src.dqmjson import dqm_get_json from os.path import split as splitPath print "fetching", serverUrl, runNr, dataset, histoPath print "*** histoPath = ", histoPath paths = re.split('[,]', histoPath) print "paths = ", paths print "histoPath = ", histoPath if len(paths) > 1: print "paths[0] = ", paths[0] print "paths[1] = ", paths[1] for path in paths: print "splitPath(path)[1] = ", splitPath(path)[1] if splitPath(path)[1]: #histoPath=path print "path = ", path json = dqm_get_json(serverUrl, runNr, dataset, histoPath[0], rootContent=True) result = None for path in paths: print "looping over histo paths, path = ", path #json = dqm_get_json( serverUrl, runNr, dataset, splitPath(histoPath)[0], rootContent=True) json = dqm_get_json(serverUrl, runNr, dataset, splitPath(path)[0], rootContent=True) #print "===> if this crashes you might consider changing the relativePats in ./cfg/trendPlotsTracker.py" #print "test = ", splitPath(path)[1] in json #if splitPath(histoPath)[1] in json : if splitPath(path)[1] in json: print "path in json = ", path #assert splitPath(histoPath)[1] in json, "could not find '%s' in run %s of '%s'"%(histoPath, runNr, dataset) assert splitPath( path)[1] in json, "could not find '%s' in run %s of '%s'" % ( path, runNr, dataset) # print "using split path" #result = json[splitPath(histoPath)[1]]["rootobj"] result = json[splitPath(path)[1]]["rootobj"] return result
def _toSrcDestPair(pathPair, destDir=None, basePath=None): #print( "_toSrcDestPair: pathPair=%s, destDir=%s, basePath=%s" % (pathPair, destDir, basePath) ) src = dest = None if isinstance(pathPair, string_types): # string pair representation if PATH_PAIR_DELIMITER in pathPair: pathPair = pathPair.split(PATH_PAIR_DELIMITER) try: src = pathPair[0].strip() except: pass try: dest = pathPair[1].strip() except: pass # shortcut syntax - only provide the source, # (the destination is relative) else: src = pathPair elif isinstance(pathPair, dict): # if a dictionary is provided, use the first k/v pair try: src, dest = iteritems(pathPair).next() except: pass else: # a two element tuple (or list) is the expected src/dest format try: src = pathPair[0] except: pass try: dest = pathPair[1] except: pass if src is None: return None src = normpath(src) if dest == True: # True=="same" if not isabs(src): dest = src else: raise Exception("A resource destination cannot be the same " "as an ABSOLUTE source path") elif dest is not None: dest = normpath(dest) relSrcDir = basePath if basePath else THIS_DIR srcHead, srcTail = splitPath(src) # NOTE: for a relative source path to have a **nested** destination # path, the destination MUST be explicitly provided if srcHead == "" or not isParentDir(relSrcDir, srcHead): srcHead = relSrcDir src = absPath(src, basePath) if destDir is None: # (i.e. PyInstaller Argument) if dest is None: dest = relpath(srcHead, THIS_DIR) else: if dest is None: dest = srcTail if dest.startswith(PATH_DELIM): # must remove a leading slash from dest for joinPath to # make the dest a child of destDir try: dest = dest[1:] except: pass dest = absPath(joinPath(destDir, dest), relSrcDir) #print( "result: src=%s, dest=%s" % (src, dest) ) return (src, dest)
def runPy(pyPath, args=[], isElevated=False): wrkDir, fileName = splitPath(pyPath) pyArgs = [fileName] if isinstance(args, list): pyArgs.extend(args) run(PYTHON_PATH, pyArgs, wrkDir, isElevated, isDebug=False)
def _run(binPath, args=[], wrkDir=None, isElevated=False, isDebug=False, sharedFilePath=None): def __printCmd(elevate, binPath, args, wrkDir): cmdList = [elevate, binPath] if isinstance(args, list): cmdList.extend(args) elif args is not None: cmdList.append(args) print('cd "%s"' % (wrkDir, )) print(list2cmdline(cmdList)) def __printRetCode(retCode): stdout.write("\nReturn code: %d\n" % (retCode, )) stdout.flush() binDir, fileName = splitPath(binPath) if wrkDir is None: wrkDir = binDir isMacApp = _isMacApp(binPath) # Handle elevated sub processes in Windows if IS_WINDOWS and isElevated and not __windowsIsElevated(): __printCmd("", binPath, args, wrkDir) retCode = __windowsElevated(binPath, args, wrkDir) # always in debug if isDebug and retCode is not None: __printRetCode(retCode) return # "Debug" mode if isDebug: setEnv(DEBUG_ENV_VAR_NAME, DEBUG_ENV_VAR_VALUE) if isMacApp: binPath = __INTERNAL_MACOS_APP_BINARY_TMPLT % (normBinaryName( fileName, isGui=True), normBinaryName(fileName, isGui=False)) cmdList = [binPath] if isinstance(args, list): cmdList.extend(args) elif args is not None: cmdList.append(args) if not IS_WINDOWS and isElevated: elevate = "sudo" cmdList = [elevate] + cmdList else: elevate = "" __printCmd(elevate, binPath, args, wrkDir) sharedFile = (_WindowsSharedFile(isProducer=True, filePath=sharedFilePath) if sharedFilePath else None) p = Popen(cmdList, cwd=wrkDir, shell=False, stdout=PIPE, stderr=STDOUT, bufsize=1) while p.poll() is None: line = p.stdout.readline() if PY2 else p.stdout.readline().decode() if sharedFile: sharedFile.write(line) else: stdout.write(line) stdout.flush() if sharedFile: sharedFile.write(__SHARED_RET_CODE_TMPLT % (p.returncode, )) sharedFile.close() else: __printRetCode(p.returncode) delEnv(DEBUG_ENV_VAR_NAME) return # All other run conditions... if isMacApp: newArgs = [ __LAUNCH_MACOS_APP_NEW_SWITCH, __LAUNCH_MACOS_APP_BLOCK_SWITCH, fileName ] if isinstance(args, list): newArgs.extend([_LAUNCH_MACOS_APP_ARGS_SWITCH] + args) args = newArgs binPath = fileName = _LAUNCH_MACOS_APP_CMD if isinstance(args, list): args = list2cmdline(args) elif args is None: args = "" elevate = "" if not isElevated or IS_WINDOWS else "sudo" if IS_WINDOWS or isMacApp: pwdCmd = "" else: pwdCmd = "./" if wrkDir == binDir else "" cmd = ('%s %s%s %s' % (elevate, pwdCmd, fileName, args)).strip() _system(cmd, wrkDir)
def addRun(self, serverUrl, runNr, dataset): from math import sqrt from src.dqmjson import dqm_getSingleHist_json from ROOT import TH1, TFile, TObject, TBufferFile, TH1F, TProfile, TH1F, TH2F, TH1D, TH2D # import ROOT import os, sys, string from os.path import split as splitPath self.__count = self.__count + 1 histoPath = self.__config.get(self.__section, "relativePath") print "in addRun, histoPath = ", histoPath #refNr = self.__allReferenceRunNrs[-1] #if len(self.__allReferenceRunNrs) > 1: # #refNr = [i for i in self.__allReferenceRunNrs if i <= runNr][-1] # refNr=min(self.__allReferenceRunNrs) # for refRunNr in self.__allReferenceRunNrs: # if refRunNr>refNr and refRunNr<runNr: # refNr = refRunNr # print "refNr = " , refNr #if self.__reference == None or refNr > self.__reference[0]: # print "refNr = " , refNr # print "getting histo from DQM for ref run" # #refHisto = getHistoFromDQM( serverUrl, refNr, dataset, histoPath) # #self.__reference = (refNr, refHisto) # #self.__metric.setReference( self.__reference[1] ) cacheLocation = (serverUrl, runNr, dataset, histoPath, self.__config.get(self.__section, "metric")) print "cachelocation = ", cacheLocation print "dataset = ", dataset if self.__config.has_option(self.__section, "saveHistos"): try: # histo1 = getHistoFromDQM( serverUrl, runNr, dataset, histoPath) histo1 = dqm_getSingleHist_json(serverUrl, runNr, dataset, histoPath, rootContent=True) histosFile = self.__config.get(self.__section, "saveHistos") if not os.path.exists(histosFile): os.makedirs(histosFile) if self.__histoSum == None: self.__histoSum = histo1 self.__FileHisto = TFile.Open( os.path.join("%s/SumAll_%s.root" % (histosFile, self.__title)), "RECREATE") self.__histoSum.Write() else: self.__histoSum.Add(histo1) self.__FileHisto.cd() self.__histoSum.Write("", TObject.kOverwrite) histo1.SetName(str(runNr)) histo1.Write() except StandardError as msg: print "WARNING: something went wrong getting the histogram ", runNr, msg try: if self.__cache == None or cacheLocation not in self.__cache: # histo = getHistoFromDQM( serverUrl, runNr, dataset, histoPath) histo = dqm_getSingleHist_json(serverUrl, runNr, dataset, histoPath, rootContent=True) if self.__config.has_option(self.__section, "histo1Path"): h1Path = self.__config.get(self.__section, "histo1Path") h1 = dqm_getSingleHist_json(serverUrl, runNr, dataset, h1Path, rootContent=True) self.__metric.setOptionalHisto1(h1) print h1 if self.__config.has_option(self.__section, "histo2Path"): h2Path = self.__config.get(self.__section, "histo2Path") h2 = dqm_getSingleHist_json(serverUrl, runNr, dataset, h2Path, rootContent=True) self.__metric.setOptionalHisto2(h2) print h2 print histo if (histo != -99): Entr = 0 Entr = histo.GetEntries() print "############### GOT HISTO #################" y = 0 yErr = (0.0, 0.0) if Entr > self.__threshold: (y, yErr) = self.__metric(histo, cacheLocation) else: self.__cache[cacheLocation] = ((0., 0.), 0.) else: print "WARNING: something went wrong downloading histo=", splitPath( histoPath)[1] return elif cacheLocation in self.__cache: (y, yErr) = self.__metric(None, cacheLocation) except StandardError as msg: print "WARNING: something went wrong calculating", self.__metric, msg self.__count = self.__count - 1 return ySysErr = (0., 0.) if self.__config.has_option(self.__section, "relSystematic"): fraction = self.__config.getfloat(self.__section, "relSystematic") ySysErr = (fraction * y, fraction * y) if self.__config.has_option(self.__section, "absSystematic"): component = self.__config.getfloat(self.__section, "absSystematic") ySysErr = (component, component) self.__config.get(self.__section, "relativePath") self.__y.append(y) ##To turn off Errors, uncomment below... ##yErr = (0.0,0.0) ##ySysErr = (0.0,0.0) self.__yErrLow.append(yErr[0]) self.__yErrHigh.append(yErr[1]) self.__ySysErrLow.append(sqrt(yErr[0]**2 + ySysErr[0]**2)) self.__ySysErrHigh.append(sqrt(yErr[1]**2 + ySysErr[1]**2)) self.__runs.append(runNr) if self.__config.has_option(self.__section, "xMode"): xMode = self.__config.get(self.__section, "xMode") elif self.__config.has_option("styleDefaults", "xMode"): xMode = self.__config.get("styleDefaults", "xMode") else: xMode = "counted" if xMode == "runNumber": self.__x.append(run) print "*** appending run to x" self.__xTitle = "Run No." elif xMode == "runNumberOffset": runOffset = int(self.__config.get(self.__section, "runOffset")) self.__x.append(run - runOffset) self.__xTitle = "Run No. - %s" % runOffset elif xMode == "counted": self.__x.append(self.__count) self.__xTitle = "Nth processed run" elif xMode.startswith("runNumberEvery") or xMode.startswith( "runNumbers"): self.__x.append(self.__count) self.__xTitle = "Run No." else: raise StandardError, "Unknown xMode: %s in %s" % (xMode, self__section) self.__addAnnotaion(runNr, self.__x[-1], y, (sqrt(yErr[0]**2 + ySysErr[0]**2), sqrt(yErr[1]**2 + ySysErr[1]**2)))
def produce(self): from ROOT import TFile, TChain, TH1 from os.path import exists as pathExists from os.path import split as splitPath outFilePath = "%s/%s.%s.%s.root" % (self.outPath, "".join( self.flags), "processed", self.name) #if pathExists(outFilePath): # return outFile = TFile( "%s/%s.%s.%s.root" % (self.outPath, "".join(self.flags), "processed", self.name), "recreate") for section in self.config.sections(): trees = None if section.startswith("dileptonTree:"): treeProducerName = self.config.get(section, "treeProducerName") trees = self._getDileptonTrees(section) treeName = "DileptonTree" subDirName = "%s%s" % (section.split("dileptonTree:")[1], treeProducerName) if not trees == None: outDir = None srcTree = {} for object in trees: srcTree[object] = TChain("%s%s" % (object, treeName)) processors = self.config.get(section, "%sProcessors" % object).split() filter = " and ".join(processors) if self.config.has_option(section, "%sFilter" % object): filter = self.config.get(section, "%sFilter" % object) ### Quick and dirty workaround to prefer certain primary datasets (double muon > double electron > emu) ### To be in synch with ETH group ### Remove duplication of loops and conditions on path if this is to be removed again for treePath in trees[object]: ### Use events from dimuon primary dataset first if not "DoubleMuon" in treePath: continue #srcFile = TFile(filePath,"r") #srcTree = srcFile.Get(treePath) filePath = "%s.root" % treePath.split(".root")[0] inFile = TFile(filePath, "READ") makeCounterSum = eval( self.config.get("general", "counterSum")) print "Add counter sum: %s" % makeCounterSum if not self.counterSum and makeCounterSum: if not outFile.GetDirectory( "%sCounters" % section.split("dileptonTree:")[1]): outFile.mkdir( "%sCounters" % section.split("dileptonTree:")[1]) outFile.cd("%sCounters" % section.split("dileptonTree:")[1]) task = None if "vtxWeighter" in processors: t = self.config.get("general", "tasks") if ".%s." % t in splitPath(filePath)[1]: assert task == None, "unable to disambiguate tasks '%s' matches both '%s' and '%s'" ( filePath, task, t) task = t else: task = t else: for t in self.tasks: if ".%s." % t in splitPath(filePath)[1]: assert task == None, "unable to disambiguate tasks '%s' matches both '%s' and '%s'" ( filePath, task, t) task = t self.counterSum = inFile.Get( "%sCounters/analysis paths" % task).Clone() else: pass inFile.Close() srcTree[object].Add(treePath) print "adding", treePath for treePath in trees[object]: ### Then take ee primary dataset if not "DoubleElectron" in treePath: continue #srcFile = TFile(filePath,"r") #srcTree = srcFile.Get(treePath) filePath = "%s.root" % treePath.split(".root")[0] inFile = TFile(filePath, "READ") makeCounterSum = eval( self.config.get("general", "counterSum")) print "Add counter sum: %s" % makeCounterSum if not self.counterSum and makeCounterSum: if not outFile.GetDirectory( "%sCounters" % section.split("dileptonTree:")[1]): outFile.mkdir( "%sCounters" % section.split("dileptonTree:")[1]) outFile.cd("%sCounters" % section.split("dileptonTree:")[1]) task = None if "vtxWeighter" in processors: t = self.config.get("general", "tasks") if ".%s." % t in splitPath(filePath)[1]: assert task == None, "unable to disambiguate tasks '%s' matches both '%s' and '%s'" ( filePath, task, t) task = t else: task = t else: for t in self.tasks: if ".%s." % t in splitPath(filePath)[1]: assert task == None, "unable to disambiguate tasks '%s' matches both '%s' and '%s'" ( filePath, task, t) task = t self.counterSum = inFile.Get( "%sCounters/analysis paths" % task).Clone() else: pass inFile.Close() srcTree[object].Add(treePath) print "adding", treePath for treePath in trees[object]: ### Now the rest, but avoid taking a tree twice if "DoubleElectron" in treePath or "DoubleMuon" in treePath: continue #srcFile = TFile(filePath,"r") #srcTree = srcFile.Get(treePath) filePath = "%s.root" % treePath.split(".root")[0] inFile = TFile(filePath, "READ") makeCounterSum = eval( self.config.get("general", "counterSum")) print "Add counter sum: %s" % makeCounterSum if not self.counterSum and makeCounterSum: if not outFile.GetDirectory( "%sCounters" % section.split("dileptonTree:")[1]): outFile.mkdir( "%sCounters" % section.split("dileptonTree:")[1]) outFile.cd("%sCounters" % section.split("dileptonTree:")[1]) task = None if "vtxWeighter" in processors: t = self.config.get("general", "tasks") if ".%s." % t in splitPath(filePath)[1]: assert task == None, "unable to disambiguate tasks '%s' matches both '%s' and '%s'" ( filePath, task, t) task = t else: task = t else: for t in self.tasks: if ".%s." % t in splitPath(filePath)[1]: assert task == None, "unable to disambiguate tasks '%s' matches both '%s' and '%s'" ( filePath, task, t) task = t self.counterSum = inFile.Get( "%sCounters/analysis paths" % task).Clone() else: pass inFile.Close() srcTree[object].Add(treePath) print "adding", treePath srcTree[object].SetBranchStatus("*", 1) for processorName in processors: if processorName == "vtxWeighter": srcTree[object].SetBranchStatus("weight", 0) #### old code if (self.treeProcessors[processorName].__class__. __name__ == SimpleSelector.__name__ and not self.config.has_option( section, "%sFilter" % object)): print "Requirements met, applying simple selection boosting =)" expression = self.treeProcessors[ processorName].getExpression(object) print "Cutting tree down to: '%s'" % (expression) srcTree[object] = srcTree[object].CopyTree( expression) ###### self.treeProcessors[processorName].prepareSrc( srcTree[object], object, self.treeProcessors) for object in trees: processors = self.config.get(section, "%sProcessors" % object).split() filter = " and ".join(processors) if self.config.has_option(section, "%sFilter" % object): filter = self.config.get(section, "%sFilter" % object) if not outDir: outDir = outFile.mkdir(subDirName) outFile.cd(subDirName) destTree = srcTree[object].CloneTree(0) destTree.SetAutoSave(5000000000) #print processors for processorName in processors: self.treeProcessors[processorName].prepareDest( destTree, object) #~ print "%s: %d" % (str(processorName), self.treeProcessors[processorName].nEntries) endOfLine = 1000 for i in srcTree[object]: if endOfLine < 1: pass #continue endOfLine -= 1 processingResults = {} ### old code for processorName in processors: processingResults[ processorName] = self.treeProcessors[ processorName].processEvent( srcTree[object], object) if filter == "" or eval(filter, processingResults): destTree.Fill() #### #srcFile.Close() outFile.Write() #from pprint import pprint #pprint( trees) outFile.Purge() outFile.Close()
def produce(self): from ROOT import TFile, TChain, TH1 from os.path import exists as pathExists from os.path import split as splitPath outFilePath = "%s/%s.%s.%s.root" % (self.outPath, "".join( self.flags), "processed", self.name) #if pathExists(outFilePath): # return outFile = TFile( "%s/%s.%s.%s.root" % (self.outPath, "".join(self.flags), "processed", self.name), "recreate") for section in self.config.sections(): trees = None if section.startswith("dileptonTree:"): treeProducerName = self.config.get(section, "treeProducerName") trees = self._getDileptonTrees(section) treeName = "DileptonTree" subDirName = "%s%s" % (section.split("dileptonTree:")[1], treeProducerName) if "PFHTHLT" in subDirName: subDirName = "%sFinalTrees" % subDirName.split( treeProducerName)[0] if section.startswith("isoTree:"): treeProducerName = self.config.get(section, "treeProducerName") trees = self._getIsoTrees(section) treeName = "Iso" subDirName = "%s%s" % (section.split("isoTree:")[1], treeProducerName) if not trees == None: outDir = None srcTree = {} for object in trees: srcTree[object] = TChain("%s%s" % (object, treeName)) processors = self.config.get(section, "%sProcessors" % object).split() filter = " and ".join(processors) if self.config.has_option(section, "%sFilter" % object): filter = self.config.get(section, "%sFilter" % object) for treePath in trees[object]: #srcFile = TFile(filePath,"r") #srcTree = srcFile.Get(treePath) filePath = "%s.root" % treePath.split(".root")[0] inFile = TFile(filePath, "READ") makeCounterSum = eval( self.config.get("general", "counterSum")) print "Add counter sum: %s" % makeCounterSum if not self.counterSum and makeCounterSum: if not outFile.GetDirectory( "%sCounters" % section.split("dileptonTree:")[1]): outFile.mkdir( "%sCounters" % section.split("dileptonTree:")[1]) outFile.cd("%sCounters" % section.split("dileptonTree:")[1]) task = None if "vtxWeighter" in processors: t = self.config.get("general", "tasks") if ".%s." % t in splitPath(filePath)[1]: assert task == None, "unable to disambiguate tasks '%s' matches both '%s' and '%s'" ( filePath, task, t) task = t else: task = t else: for t in self.tasks: if ".%s." % t in splitPath(filePath)[1]: assert task == None, "unable to disambiguate tasks '%s' matches both '%s' and '%s'" ( filePath, task, t) task = t self.counterSum = inFile.Get( "%sCounters/analysis paths" % task).Clone() ## also add 3D weights #outFile.mkdir("%sWeightSummer" % section.split("dileptonTree:")[1]) #outFile.cd("%sWeightSummer" % section.split("dileptonTree:")[1]) ## task has been defined above #self.weightSum = inFile.Get("%sWeightSummer/Weights"%task).Clone() else: pass #need to cope with different lumis :( #h = inFile.Get("%sCounters/analysis paths"%task) #print inFile, "%sCounters/analysis paths"%task, h #self.counterSum.Add( h,1. ) inFile.Close() srcTree[object].Add(treePath) print "adding", treePath srcTree[object].SetBranchStatus("*", 1) ### signal code if (self.treeProcessors[filter].__class__.__name__ == SimpleSelector.__name__ and self.config.has_option(section, "%sFilter" % object)): expression = self.treeProcessors[filter].getExpression( object) print "Cutting tree down to: '%s'" % (expression) outFile.Write() outFile.Close() srcTree[object] = srcTree[object].CopyTree(expression) outFile = TFile( "%s/%s.%s.%s.root" % (self.outPath, "".join( self.flags), "processed", self.name), "UPDATE") ##### for processorName in processors: if processorName == "vtxWeighter": srcTree[object].SetBranchStatus("weight", 0) #### default code #~ if (self.treeProcessors[processorName].__class__.__name__ == SimpleSelector.__name__ and not self.config.has_option(section,"%sFilter"%object)): #~ print "Requirements met, applying simple selection boosting =)" #~ expression = self.treeProcessors[processorName].getExpression(object) #~ print "Cutting tree down to: '%s'" % (expression) #~ srcTree[object] = srcTree[object].CopyTree(expression) ###### self.treeProcessors[processorName].prepareSrc( srcTree[object], object, self.treeProcessors) for object in trees: processors = self.config.get(section, "%sProcessors" % object).split() filter = " and ".join(processors) if self.config.has_option(section, "%sFilter" % object): filter = self.config.get(section, "%sFilter" % object) if not outDir: outDir = outFile.mkdir(subDirName) outFile.cd(subDirName) destTree = srcTree[object].CloneTree(0) destTree.SetAutoSave(5000000000) #print processors for processorName in processors: self.treeProcessors[processorName].prepareDest( destTree, object) #~ print "%s: %d" % (str(processorName), self.treeProcessors[processorName].nEntries) endOfLine = 1000 for i in srcTree[object]: if endOfLine < 1: pass #continue endOfLine -= 1 processingResults = {} ### default code #~ for processorName in processors: #~ processingResults[processorName] = self.treeProcessors[processorName].processEvent(srcTree[object], object) #~ if filter == "" or eval(filter, processingResults): #~ destTree.Fill() #### ### signal code keep = False for processorName in processors: keep = self.treeProcessors[ processorName].processEvent( srcTree[object], object) if keep: destTree.Fill() #### #srcFile.Close() outFile.Write() #from pprint import pprint #pprint( trees) outFile.Purge() outFile.Close()
def addRun(self, serverUrl, runNr, dataset, tfile): from math import sqrt from ROOT import TH1, TFile, TObject, TBufferFile, TH1F, TProfile, TProfile2D, TH2F import ROOT import os, sys, string from os.path import split as splitPath from src.dqmjson import dqm_get_json_hist self.__count = self.__count + 1 histoPath = self.__config.get(self.__section, "relativePath") cacheLocation = (serverUrl, runNr, dataset, histoPath, self.__config.get(self.__section, "metric")) if self.__config.has_option(self.__section, "saveHistos"): try: if (histoPath[0] == '/'): histoPath = histoPath.replace('/', '', 1) subdet = histoPath.split('/')[0] if tfile == None: histo1 = dqm_get_json_hist(serverUrl, runNr, dataset, splitPath(histoPath)[0], splitPath(histoPath)[1], rootContent=True) else: histo1 = tfile.Get( ('DQMData/Run %d/%s/Run summary/%s') % (runNr, subdet, histoPath.replace('%s/' % (subdet), '', 1))) histosFile = self.__config.get(self.__section, "saveHistos") if not os.path.exists(histosFile): os.makedirs(histosFile) if self.__histoSum == None: self.__histoSum = histo1 self.__FileHisto = TFile.Open( os.path.join("%s/SumAll_%s.root" % (histosFile, self.__title)), "RECREATE") self.__histoSum.Write() else: self.__histoSum.Add(histo1) self.__FileHisto.cd() self.__histoSum.Write("", TObject.kOverwrite) histo1.SetName(str(runNr)) histo1.Write() except StandardError as msg: print "WARNING: something went wrong getting the histogram ", runNr, msg try: if self.__cache == None or cacheLocation not in self.__cache: if (histoPath[0] == '/'): histoPath = histoPath.replace('/', '', 1) subdet = histoPath.split('/')[0] if tfile == None: histo = dqm_get_json_hist(serverUrl, runNr, dataset, splitPath(histoPath)[0], splitPath(histoPath)[1], rootContent=True) else: histo = tfile.Get( ('DQMData/Run %d/%s/Run summary/%s') % (runNr, subdet, histoPath.replace('%s/' % (subdet), '', 1))) if self.__config.has_option(self.__section, "histo1Path"): h1Path = self.__config.get(self.__section, "histo1Path") if (h1Path[0] == '/'): h1Path = h1Path.replace('/', '', 1) subdet = h1Path.split('/')[0] if tfile == None: h1 = dqm_get_json_hist(serverUrl, runNr, dataset, splitPath(h1Path)[0], splitPath(h1Path)[1], rootContent=True) else: h1 = tfile.Get( ('DQMData/Run %d/%s/Run summary/%s') % (runNr, subdet, h1Path.replace('%s/' % (subdet), '', 1))) self.__metric.setOptionalHisto1(h1) if self.__config.has_option(self.__section, "histo2Path"): h2Path = self.__config.get(self.__section, "histo2Path") if (h2Path[0] == '/'): h2Path = h1Path.replace('/', '', 1) subdet = h2Path.split('/')[0] if tfile == None: h2 = dqm_get_json_hist(serverUrl, runNr, dataset, splitPath(h2Path)[0], splitPath(h2Path)[1], rootContent=True) else: h2 = tfile.Get( ('DQMData/Run %d/%s/Run summary/%s') % (runNr, subdet, h2Path.replace('%s/' % (subdet), '', 1))) self.__metric.setOptionalHisto2(h2) self.__metric.setRun(runNr) if (histo != None): print "-> Got histogram {0} as {1}".format( splitPath(histoPath)[1], histo) if self.__config.has_option(self.__section, "histo1Path"): print " -> Got auxiliary histogram {0} as {1}".format( splitPath(h1Path)[1], h1) if self.__config.has_option(self.__section, "histo2Path"): print " -> Got auxiliary histogram {0} as {1}".format( splitPath(h2Path)[1], h2) Entr = 0 Entr = histo.GetEntries() #print "############### GOT HISTO #################" y = 0 yErr = (0.0, 0.0) if Entr > self.__threshold: print " -> {0} will be evaluated".format( self.__metricName) (y, yErr) = self.__metric(histo, cacheLocation) else: print " -> Histogram entries are {0} while threshold is {1}. Metric will not be evalueted, results set at 0".format( Entr, self.__threshold) self.__cache[cacheLocation] = ((0., 0.), 0.) else: print "WARNING: something went wrong downloading histo=", splitPath( histoPath)[1] return elif cacheLocation in self.__cache: print "-> Got {0} for histogram {1} from cache".format( self.__metricName, splitPath(histoPath)[1]) (y, yErr) = self.__metric(None, cacheLocation) except StandardError as msg: print "WARNING: something went wrong calculating", self.__metric, msg self.__count = self.__count - 1 return ySysErr = (0., 0.) if self.__config.has_option(self.__section, "relSystematic"): fraction = self.__config.getfloat(self.__section, "relSystematic") ySysErr = (fraction * y, fraction * y) if self.__config.has_option(self.__section, "absSystematic"): component = self.__config.getfloat(self.__section, "absSystematic") ySysErr = (component, component) self.__config.get(self.__section, "relativePath") self.__y.append(y) self.__yErrLow.append(yErr[0]) self.__yErrHigh.append(yErr[1]) self.__ySysErrLow.append(sqrt(yErr[0]**2 + ySysErr[0]**2)) self.__ySysErrHigh.append(sqrt(yErr[1]**2 + ySysErr[1]**2)) self.__runs.append(runNr) if self.__config.has_option(self.__section, "xMode"): xMode = self.__config.get(self.__section, "xMode") elif self.__config.has_option("styleDefaults", "xMode"): xMode = self.__config.get("styleDefaults", "xMode") else: xMode = "counted" if xMode == "runNumber": self.__x.append(run) print "*** appending run to x" self.__xTitle = "Run No." elif xMode == "runNumberOffset": runOffset = int(self.__config.get(self.__section, "runOffset")) self.__x.append(run - runOffset) self.__xTitle = "Run No. - %s" % runOffset elif xMode == "counted": self.__x.append(self.__count) self.__xTitle = "Nth processed run" elif xMode.startswith("runNumberEvery") or xMode.startswith( "runNumbers"): self.__x.append(self.__count) self.__xTitle = "Run No." else: raise StandardError, "Unknown xMode: %s in %s" % (xMode, self__section)