def __lumiSelectionSnippet(self, jsonPath=None, firstRun=None, lastRun=None): lumiSecExtend = "" if firstRun or lastRun or jsonPath: if not jsonPath: selectedRunList = self.__getRunList() if firstRun: selectedRunList = [ run for run in selectedRunList \ if self.__findInJson(run, "run_number") >= firstRun ] if lastRun: selectedRunList = [ run for run in selectedRunList \ if self.__findInJson(run, "run_number") <= lastRun ] lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \ + str( self.__findInJson(run, "run_number") ) + ":max" \ for run in selectedRunList ] splitLumiList = list(self.__chunks(lumiList, 255)) else: theLumiList = None try: theLumiList = LumiList(filename=jsonPath) except ValueError: pass if theLumiList is not None: allRuns = theLumiList.getRuns() runsToRemove = [] for run in allRuns: if firstRun and int(run) < firstRun: runsToRemove.append(run) if lastRun and int(run) > lastRun: runsToRemove.append(run) theLumiList.removeRuns(runsToRemove) splitLumiList = list( self.__chunks(theLumiList.getCMSSWString().split(','), 255)) if not (splitLumiList and splitLumiList[0] and splitLumiList[0][0]): splitLumiList = None else: with open(jsonPath) as f: jsoncontents = f.read() if "process.source.lumisToProcess" in jsoncontents: msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath if firstRun or lastRun: msg += ( "\n (after applying firstRun and/or lastRun)" ) msg += ".\nPlease note that, depending on the format of this file, it may not work as expected." msg += "\nCheck your config file to make sure that it worked properly." print msg runlist = self.__getRunList() if firstRun or lastRun: self.__firstusedrun = -1 self.__lastusedrun = -1 jsoncontents = re.sub( r"\d+:(\d+|max)(-\d+:(\d+|max))?", self.getForceRunRangeFunction( firstRun, lastRun), jsoncontents) jsoncontents = (jsoncontents.replace( "'',\n", "").replace("''\n", "").replace( '"",\n', '').replace('""\n', '')) self.__firstusedrun = max( self.__firstusedrun, int( self.__findInJson( runlist[0], "run_number"))) self.__lastusedrun = min( self.__lastusedrun, int( self.__findInJson( runlist[-1], "run_number"))) if self.__lastusedrun < self.__firstusedrun: jsoncontents = None else: self.__firstusedrun = int( self.__findInJson(runlist[0], "run_number")) self.__lastusedrun = int( self.__findInJson(runlist[-1], "run_number")) lumiSecExtend = jsoncontents splitLumiList = None else: raise AllInOneError( "%s is not a valid json file!" % jsonPath) if splitLumiList and splitLumiList[0] and splitLumiList[0][0]: lumiSecStr = [ "',\n'".join( lumis ) \ for lumis in splitLumiList ] lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \ for lumis in lumiSecStr ] lumiSecExtend = "\n".join(lumiSecStr) runlist = self.__getRunList() self.__firstusedrun = max( int(splitLumiList[0][0].split(":")[0]), int(self.__findInJson(runlist[0], "run_number"))) self.__lastusedrun = min( int(splitLumiList[-1][-1].split(":")[0]), int(self.__findInJson(runlist[-1], "run_number"))) elif lumiSecExtend: pass else: msg = "You are trying to run a validation without any runs! Check that:" if firstRun or lastRun: msg += "\n - firstRun/begin and lastRun/end are correct for this dataset, and there are runs in between containing data" if jsonPath: msg += "\n - your JSON file is correct for this dataset, and the runs contain data" if (firstRun or lastRun) and jsonPath: msg += "\n - firstRun/begin and lastRun/end are consistent with your JSON file" raise AllInOneError(msg) else: if self.__inputMagneticField is not None: pass #never need self.__firstusedrun or self.__lastusedrun else: runlist = self.__getRunList() self.__firstusedrun = int( self.__findInJson(self.__getRunList()[0], "run_number")) self.__lastusedrun = int( self.__findInJson(self.__getRunList()[-1], "run_number")) return lumiSecExtend
def createCrabCfg(self, *args, **kwargs): raise AllInOneError("Shouldn't be here...")
def __init__(self, valName, config): super(PreexistingOfflineValidation, self).__init__(valName, config) for option in self.deprecateddefaults: if self.general[option]: raise AllInOneError("The '%s' option has been moved to the [plots:offline] section. Please specify it there."%option)
def __getFileInfoList(self, dasLimit, parent=False): if self.__predefined: if parent: extendstring = "secFiles.extend" else: extendstring = "readFiles.extend" with open(self.__fileName) as f: files = [] copy = False for line in f.readlines(): if "]" in line: copy = False if copy: files.append({name: line.translate(None, "', " + '"')}) if extendstring in line and "[" in line and "]" not in line: copy = True return files if self.__fileInfoList and not parent: return self.__fileInfoList if self.__parentFileInfoList and parent: return self.__parentFileInfoList if parent: searchdataset = self.parentDataset() else: searchdataset = self.__name dasQuery_files = ('file dataset=%s | grep file.name, file.nevents, ' 'file.creation_time, ' 'file.modification_time' % (searchdataset)) print "Requesting file information for '%s' from DAS..." % ( searchdataset), data = self.__getData(dasQuery_files, dasLimit) print "Done." data = [self.__findInJson(entry, "file") for entry in data] if len(data) == 0: msg = ("No files are available for the dataset '%s'. This can be " "due to a typo or due to a DAS problem. Please check the " "spelling of the dataset and/or retry to run " "'validateAlignments.py'." % (self.name())) raise AllInOneError(msg) fileInformationList = [] for file in data: fileName = 'unknown' try: fileName = self.__findInJson(file, "name") fileCreationTime = self.__findInJson(file, "creation_time") fileNEvents = self.__findInJson(file, "nevents") except KeyError: print( "DAS query gives bad output for file '%s'. Skipping it.\n" "It may work if you try again later.") % fileName fileNEvents = 0 # select only non-empty files if fileNEvents == 0: continue fileDict = { "name": fileName, "creation_time": fileCreationTime, "nevents": fileNEvents } fileInformationList.append(fileDict) fileInformationList.sort( key=lambda info: self.__findInJson(info, "name")) if parent: self.__parentFileInfoList = fileInformationList else: self.__fileInfoList = fileInformationList return fileInformationList
def sysexit(number): if number != 0: raise AllInOneError("conddb exited with status {}".format(number))
def dump_cff(self, outName=None, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, parent=False): if outName == None: outName = "Dataset" + self.__name.replace("/", "_") packageName = os.path.join("Alignment", "OfflineValidation") if not os.path.exists(os.path.join(self.__cmssw, "src", packageName)): msg = ("You try to store the predefined dataset'%s'.\n" "For that you need to check out the package '%s' to your " "private relase area in\n" % (outName, packageName) + self.__cmssw) raise AllInOneError(msg) theMap = { "process": "", "tab": "", "nEvents": str(-1), "skipEventsString": "", "importCms": "import FWCore.ParameterSet.Config as cms\n", "header": "#Do not delete or (unless you know what you're doing) change these comments\n" "#%(name)s\n" "#data type: %(dataType)s\n" "#magnetic field: .oO[magneticField]Oo.\n" #put in magnetic field later % { "name": self. __name, #need to create the snippet before getting the magnetic field "dataType": self.__dataType } #so that we know the first and last runs } dataset_cff = self.__createSnippet(jsonPath=jsonPath, begin=begin, end=end, firstRun=firstRun, lastRun=lastRun, repMap=theMap, parent=parent) magneticField = self.__magneticField if magneticField == "MagneticField": magneticField = "%s, %s #%s" % ( magneticField, str(self.__getMagneticFieldForRun()).replace( "\n", " ").split("#")[0].strip(), "Use MagneticField_cff.py; the number is for determining which track selection to use." ) dataset_cff = dataset_cff.replace(".oO[magneticField]Oo.", magneticField) filePath = os.path.join(self.__cmssw, "src", packageName, "python", outName + "_cff.py") if os.path.exists(filePath): existMsg = "The predefined dataset '%s' already exists.\n" % ( outName) askString = "Do you want to overwrite it? [y/n]\n" inputQuery = existMsg + askString while True: userInput = raw_input(inputQuery).lower() if userInput == "y": break elif userInput == "n": return else: inputQuery = askString print( "The predefined dataset '%s' will be stored in the file\n" % (outName) + filePath + "\nFor future use you have to do 'scram b'.") print theFile = open(filePath, "w") theFile.write(dataset_cff) theFile.close() return
def __init__(self, datasetName, dasLimit=0, tryPredefinedFirst=True, cmssw=os.environ["CMSSW_BASE"], cmsswrelease=os.environ["CMSSW_RELEASE_BASE"]): self.__name = datasetName self.__origName = datasetName self.__dasLimit = dasLimit self.__fileList = None self.__fileInfoList = None self.__runList = None self.__alreadyStored = False self.__cmssw = cmssw self.__cmsswrelease = cmsswrelease self.__firstusedrun = None self.__lastusedrun = None self.__parentDataset = None self.__parentFileList = None self.__parentFileInfoList = None # check, if dataset name matches CMS dataset naming scheme if re.match(r'/.+/.+/.+', self.__name): self.__official = True fileName = "Dataset" + self.__name.replace("/", "_") + "_cff.py" else: self.__official = False fileName = self.__name + "_cff.py" searchPath1 = os.path.join(self.__cmssw, "python", "Alignment", "OfflineValidation", fileName) searchPath2 = os.path.join(self.__cmssw, "src", "Alignment", "OfflineValidation", "python", fileName) searchPath3 = os.path.join(self.__cmsswrelease, "python", "Alignment", "OfflineValidation", fileName) if self.__official and not tryPredefinedFirst: self.__predefined = False elif os.path.exists(searchPath1): self.__predefined = True self.__filename = searchPath1 elif os.path.exists(searchPath2): msg = ("The predefined dataset '%s' does exist in '%s', but " "you need to run 'scram b' first." % (self.__name, searchPath2)) if self.__official: print msg print "Getting the data from DAS again. To go faster next time, run scram b." else: raise AllInOneError(msg) elif os.path.exists(searchPath3): self.__predefined = True self.__filename = searchPath3 elif self.__official: self.__predefined = False else: msg = ("The predefined dataset '%s' does not exist. Please " "create it first or check for typos." % (self.__name)) raise AllInOneError(msg) if self.__predefined and self.__official: self.__name = "Dataset" + self.__name.replace("/", "_") self.__dataType = self.__getDataType() self.__magneticField = self.__getMagneticField()
"datasetDefinition"] = self.dataset.datasetSnippet( jsonPath=self.general["JSON"], nEvents=self.general["maxevents"], firstRun=self.general["firstRun"], lastRun=self.general["lastRun"], begin=self.general["begin"], end=self.general["end"]) except AllInOneError, e: msg = "In section [%s:%s]: " % (valType, self.name) msg += str(e) raise AllInOneError(msg) else: if self.dataset.predefined(): msg = ("For jobmode 'crab' you cannot use predefined datasets " "(in your case: '%s')." % (self.dataset.name())) raise AllInOneError(msg) try: theUpdate = config.getResultingSection( valType + ":" + self.name, demandPars=["parallelJobs"]) except AllInOneError, e: msg = str(e)[:-1] + " when using 'jobmode: crab'." raise AllInOneError(msg) self.general.update(theUpdate) if self.general["begin"] or self.general["end"]: (self.general["begin"], self.general["end"], self.general["firstRun"], self.general["lastRun"]) = self.dataset.convertTimeToRun( firstRun=self.general["firstRun"], lastRun=self.general["lastRun"], begin=self.general["begin"], end=self.general["end"],
def __init__(self, valName, alignment, config, valType, addDefaults={}, addMandatories=[]): """ This method adds additional items to the `self.general` dictionary which are only needed for validations using datasets. Arguments: - `valName`: String which identifies individual validation instances - `alignment`: `Alignment` instance to validate - `config`: `BetterConfigParser` instance which includes the configuration of the validations - `valType`: String which specifies the type of validation - `addDefaults`: Dictionary which contains default values for individual validations in addition to the general default values - `addMandatories`: List which contains mandatory parameters for individual validations in addition to the general mandatory parameters (currently there are no general mandatories) """ GenericValidation.__init__(self, valName, alignment, config) defaults = { "jobmode": self.jobmode, "runRange": "", "firstRun": "", "lastRun": "", "begin": "", "end": "", "JSON": "" } defaults.update(addDefaults) mandatories = [] mandatories += addMandatories theUpdate = config.getResultingSection(valType + ":" + self.name, defaultDict=defaults, demandPars=mandatories) self.general.update(theUpdate) self.jobmode = self.general["jobmode"] knownOpts = defaults.keys() + mandatories ignoreOpts = [] if self.jobmode.split(",")[0] == "crab" \ or self.__class__.__name__=="OfflineValidationParallel": knownOpts.append("parallelJobs") else: ignoreOpts.append("parallelJobs") config.checkInput(valType + ":" + self.name, knownSimpleOptions=knownOpts, ignoreOptions=ignoreOpts) if self.general["dataset"] not in globalDictionaries.usedDatasets: globalDictionaries.usedDatasets[self.general["dataset"]] = Dataset( self.general["dataset"]) self.dataset = globalDictionaries.usedDatasets[self.general["dataset"]] if not self.jobmode.split(',')[0] == "crab": try: self.general[ "datasetDefinition"] = self.dataset.datasetSnippet( jsonPath=self.general["JSON"], nEvents=self.general["maxevents"], firstRun=self.general["firstRun"], lastRun=self.general["lastRun"], begin=self.general["begin"], end=self.general["end"]) except AllInOneError, e: msg = "In section [%s:%s]: " % (valType, self.name) msg += str(e) raise AllInOneError(msg)
def __init__(self, config, valType, addDefaults={}, addMandatories=[], addneedpackages=[]): import random self.type = valType self.general = config.getGeneral() self.randomWorkdirPart = "%0i" % random.randint(1, 10e9) self.config = config defaults = { "cmssw": os.environ["CMSSW_BASE"], "publicationstatus": "", "customtitle": "", "customrighttitle": "", "era": "NONE", "legendheader": "", "legendoptions": "all", } defaults.update(addDefaults) mandatories = [] mandatories += addMandatories needpackages = ["Alignment/OfflineValidation"] needpackages += addneedpackages theUpdate = config.getResultingSection("plots:" + self.type, defaultDict=defaults, demandPars=mandatories) self.general.update(theUpdate) self.cmssw = self.general["cmssw"] badcharacters = r"\'" for character in badcharacters: if character in self.cmssw: raise AllInOneError( "The bad characters " + badcharacters + " are not allowed in the cmssw\n" "path name. If you really have it in such a ridiculously named location,\n" "try making a symbolic link somewhere with a decent name.") try: os.listdir(self.cmssw) except OSError: raise AllInOneError("Your cmssw release " + self.cmssw + ' does not exist') if self.cmssw == os.environ["CMSSW_BASE"]: self.scramarch = os.environ["SCRAM_ARCH"] self.cmsswreleasebase = os.environ["CMSSW_RELEASE_BASE"] else: command = ( "cd '" + self.cmssw + "' && eval `scramv1 ru -sh 2> /dev/null`" ' && echo "$CMSSW_BASE\n$SCRAM_ARCH\n$CMSSW_RELEASE_BASE"') commandoutput = getCommandOutput2(command).split('\n') self.cmssw = commandoutput[0] self.scramarch = commandoutput[1] self.cmsswreleasebase = commandoutput[2] for package in needpackages: for placetolook in self.cmssw, self.cmsswreleasebase: pkgpath = os.path.join(placetolook, "src", package) if os.path.exists(pkgpath): self.general[package] = pkgpath break else: raise AllInOneError( "Package {} does not exist in {} or {}!".format( package, self.cmssw, self.cmsswreleasebase)) self.general["publicationstatus"] = self.general[ "publicationstatus"].upper() self.general["era"] = self.general["era"].upper() if not self.general["publicationstatus"] and not self.general[ "customtitle"]: self.general["publicationstatus"] = "INTERNAL" if self.general[ "customtitle"] and not self.general["publicationstatus"]: self.general["publicationstatus"] = "CUSTOM" if self.general["publicationstatus"] != "CUSTOM" and self.general[ "customtitle"]: raise AllInOneError( "If you would like to use a custom title, please leave out the 'publicationstatus' parameter" ) if self.general["publicationstatus"] == "CUSTOM" and not self.general[ "customtitle"]: raise AllInOneError( "If you want to use a custom title, you should provide it using 'customtitle' in the [plots:%s] section" % valType) if self.general["era"] != "NONE" and self.general["customrighttitle"]: raise AllInOneError( "If you would like to use a custom right title, please leave out the 'era' parameter" ) publicationstatusenum = [ "INTERNAL", "INTERNAL_SIMULATION", "PRELIMINARY", "PUBLIC", "SIMULATION", "UNPUBLISHED", "CUSTOM" ] eraenum = ["NONE", "CRUZET15", "CRAFT15", "COLL0T15"] if self.general["publicationstatus"] not in publicationstatusenum: raise AllInOneError("Publication status must be one of " + ", ".join(publicationstatusenum) + "!") if self.general["era"] not in eraenum: raise AllInOneError("Era must be one of " + ", ".join(eraenum) + "!") knownOpts = defaults.keys() + mandatories ignoreOpts = [] config.checkInput("plots:" + self.type, knownSimpleOptions=knownOpts, ignoreOptions=ignoreOpts)
def parsestyle(style): try: int(style) return style except ValueError: raise AllInOneError("style has to be an integer!")
def createCrabCfg(self, path): msg = ("Parallelization not supported for geometry comparison. Please " "choose another 'jobmode'.") raise AllInOneError(msg)
def __init__(self, valName, alignment, config, valType, addDefaults = {}, addMandatories=[]): """ This method adds additional items to the `self.general` dictionary which are only needed for validations using datasets. Arguments: - `valName`: String which identifies individual validation instances - `alignment`: `Alignment` instance to validate - `config`: `BetterConfigParser` instance which includes the configuration of the validations - `valType`: String which specifies the type of validation - `addDefaults`: Dictionary which contains default values for individual validations in addition to the general default values - `addMandatories`: List which contains mandatory parameters for individual validations in addition to the general mandatory parameters """ defaults = {"runRange": "", "firstRun": "", "lastRun": "", "begin": "", "end": "", "JSON": "" } defaults.update(addDefaults) mandatories = [ "dataset", "maxevents" ] mandatories += addMandatories GenericValidation.__init__(self, valName, alignment, config, valType, defaults, mandatories) # if maxevents is not specified, cannot calculate number of events for # each parallel job, and therefore running only a single job if int( self.general["maxevents"] ) == -1 and self.NJobs > 1: msg = ("Maximum number of events (maxevents) not specified: " "cannot use parallel jobs.") raise AllInOneError(msg) tryPredefinedFirst = (not self.jobmode.split( ',' )[0] == "crab" and self.general["JSON"] == "" and self.general["firstRun"] == "" and self.general["lastRun"] == "" and self.general["begin"] == "" and self.general["end"] == "") if self.general["dataset"] not in globalDictionaries.usedDatasets: globalDictionaries.usedDatasets[self.general["dataset"]] = {} if self.cmssw not in globalDictionaries.usedDatasets[self.general["dataset"]]: if globalDictionaries.usedDatasets[self.general["dataset"]] != {}: print ("Warning: you use the same dataset '%s' in multiple cmssw releases.\n" "This is allowed, but make sure it's not a mistake") % self.general["dataset"] globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw] = {False: None, True: None} if globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][tryPredefinedFirst] is None: dataset = Dataset( self.general["dataset"], tryPredefinedFirst = tryPredefinedFirst, cmssw = self.cmssw, cmsswrelease = self.cmsswreleasebase ) globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][tryPredefinedFirst] = dataset if tryPredefinedFirst and not dataset.predefined(): #No point finding the data twice in that case globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][False] = dataset self.dataset = globalDictionaries.usedDatasets[self.general["dataset"]][self.cmssw][tryPredefinedFirst] self.general["magneticField"] = self.dataset.magneticField() self.general["defaultMagneticField"] = "38T" if self.general["magneticField"] == "unknown": print "Could not get the magnetic field for this dataset." print "Using the default: ", self.general["defaultMagneticField"] self.general["magneticField"] = '.oO[defaultMagneticField]Oo.' if not self.jobmode.split( ',' )[0] == "crab": try: self.general["datasetDefinition"] = self.dataset.datasetSnippet( jsonPath = self.general["JSON"], firstRun = self.general["firstRun"], lastRun = self.general["lastRun"], begin = self.general["begin"], end = self.general["end"], parent = self.needParentFiles ) except AllInOneError, e: msg = "In section [%s:%s]: "%(valType, self.name) msg += str(e) raise AllInOneError(msg)
def __init__(self, valName, alignment, config, valType, addDefaults = {}, addMandatories=[]): import random self.name = valName self.alignmentToValidate = alignment self.general = config.getGeneral() self.randomWorkdirPart = "%0i"%random.randint(1,10e9) self.configFiles = [] self.filesToCompare = {} self.config = config defaults = {"jobmode": self.general["jobmode"], "cmssw": os.environ['CMSSW_BASE'], "parallelJobs": "1" } defaults.update(addDefaults) mandatories = [] mandatories += addMandatories theUpdate = config.getResultingSection(valType+":"+self.name, defaultDict = defaults, demandPars = mandatories) self.general.update(theUpdate) self.jobmode = self.general["jobmode"] self.NJobs = int(self.general["parallelJobs"]) # limit maximum number of parallel jobs to 40 # (each output file is approximately 20MB) maximumNumberJobs = 40 if self.NJobs > maximumNumberJobs: msg = ("Maximum allowed number of parallel jobs " +str(maximumNumberJobs)+" exceeded!!!") raise AllInOneError(msg) self.cmssw = self.general["cmssw"] badcharacters = r"\'" for character in badcharacters: if character in self.cmssw: raise AllInOneError("The bad characters " + badcharacters + " are not allowed in the cmssw\n" "path name. If you really have it in such a ridiculously named location,\n" "try making a symbolic link somewhere with a decent name.") try: os.listdir(self.cmssw) except OSError: raise AllInOneError("Your cmssw release " + self.cmssw + ' does not exist') if self.cmssw == os.environ["CMSSW_BASE"]: self.scramarch = os.environ["SCRAM_ARCH"] self.cmsswreleasebase = os.environ["CMSSW_RELEASE_BASE"] else: self.scramarch = None self.cmsswreleasebase = None command = ("cd '" + self.cmssw + "' && eval `scramv1 ru -sh 2> /dev/null`" ' && echo "$SCRAM_ARCH\n$CMSSW_RELEASE_BASE"') commandoutput = getCommandOutput2(command).split('\n') self.scramarch = commandoutput[0] self.cmsswreleasebase = commandoutput[1] self.AutoAlternates = True if config.has_option("alternateTemplates","AutoAlternates"): try: self.AutoAlternates = json.loads(config.get("alternateTemplates","AutoAlternates").lower()) except ValueError: raise AllInOneError("AutoAlternates needs to be true or false, not %s" % config.get("alternateTemplates","AutoAlternates")) knownOpts = defaults.keys()+mandatories ignoreOpts = [] config.checkInput(valType+":"+self.name, knownSimpleOptions = knownOpts, ignoreOptions = ignoreOpts)
def __getMagneticFieldForRun(self, run=-1, tolerance=0.5): """For MC, this returns the same as the previous function. For data, it gets the magnetic field from the runs. This is important for deciding which template to use for offlinevalidation """ if self.__dataType == "mc" and self.__magneticField == "MagneticField": return 3.8 #For 3.8T MC the default MagneticField is used if self.__inputMagneticField is not None: return self.__inputMagneticField if "T" in self.__magneticField: Bfield = self.__magneticField.split("T")[0].replace( "MagneticField_", "") try: return float( Bfield) / 10.0 #e.g. 38T and 38T_PostLS1 both return 3.8 except ValueError: pass if self.__predefined: with open(self.__filename) as f: Bfield = None for line in f.readlines(): if line.startswith("#magnetic field: ") and "," in line: if Bfield is not None: raise AllInOneError( self.__filename + " has multiple 'magnetic field' lines.") return float( line.replace( "#magnetic field: ", "").split(",")[1].split("#")[0].strip()) if run > 0: dasQuery = ('run=%s instance=%s detail=true' % (run, self.__dasinstance)) #for data data = self.__getData(dasQuery) try: return self.__findInJson(data, ["run", "bfield"]) except KeyError: return "unknown Can't get the magnetic field for run %s from DAS" % run #run < 0 - find B field for the first and last runs, and make sure they're compatible # (to within tolerance) #NOT FOOLPROOF! The magnetic field might go up and then down, or vice versa if self.__firstusedrun is None or self.__lastusedrun is None: return "unknown Can't get the exact magnetic field for the dataset until data has been retrieved from DAS." firstrunB = self.__getMagneticFieldForRun(self.__firstusedrun) lastrunB = self.__getMagneticFieldForRun(self.__lastusedrun) try: if abs(firstrunB - lastrunB) <= tolerance: return .5 * (firstrunB + lastrunB) print firstrunB, lastrunB, tolerance return ( "unknown The beginning and end of your run range for %s\n" "have different magnetic fields (%s, %s)!\n" "Try limiting the run range using firstRun, lastRun, begin, end, or JSON,\n" "or increasing the tolerance (in dataset.py) from %s.") % ( self.__name, firstrunB, lastrunB, tolerance) except TypeError: try: if "unknown" in firstrunB: return firstrunB else: return lastrunB except TypeError: return lastrunB
def appendToMerge(self, *args, **kwargs): raise AllInOneError("Shouldn't be here...")
def datasetSnippet(self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, crab=False, parent=False): if not firstRun: firstRun = None if not lastRun: lastRun = None if not begin: begin = None if not end: end = None if self.__predefined and (jsonPath or begin or end or firstRun or lastRun): msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun' " "only work for official datasets, not predefined _cff.py files" ) raise AllInOneError(msg) if self.__predefined and parent: with open(self.__filename) as f: if "secFiles.extend" not in f.read(): msg = ( "The predefined dataset '%s' does not contain secondary files, " "which your validation requires!") % self.__name if self.__official: self.__name = self.__origName self.__predefined = False print msg print( "Retreiving the files from DAS. You will be asked if you want " "to overwrite the old dataset.\n" "It will still be compatible with validations that don't need secondary files." ) else: raise AllInOneError(msg) if self.__predefined: snippet = ( "process.load(\"Alignment.OfflineValidation.%s_cff\")\n" "process.maxEvents = cms.untracked.PSet(\n" " input = cms.untracked.int32(.oO[nEvents]Oo. / .oO[parallelJobs]Oo.)\n" ")\n" "process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)" % (self.__name)) if not parent: with open(self.__filename) as f: if "secFiles.extend" in f.read(): snippet += "\nprocess.source.secondaryFileNames = cms.untracked.vstring()" return snippet theMap = { "process": "process.", "tab": " " * len("process."), "nEvents": ".oO[nEvents]Oo. / .oO[parallelJobs]Oo.", "skipEventsString": "process.source.skipEvents=cms.untracked.uint32(.oO[nIndex]Oo.*.oO[nEvents]Oo./.oO[parallelJobs]Oo.)\n", "importCms": "", "header": "" } datasetSnippet = self.__createSnippet(jsonPath=jsonPath, begin=begin, end=end, firstRun=firstRun, lastRun=lastRun, repMap=theMap, crab=crab, parent=parent) if jsonPath == "" and begin == "" and end == "" and firstRun == "" and lastRun == "": try: self.dump_cff(parent=parent) except AllInOneError as e: print "Can't store the dataset as a cff:" print e print "This may be inconvenient in the future, but will not cause a problem for this validation." return datasetSnippet
def __init__(self, *args, **kwargs): raise AllInOneError("Preexisting Z->mumu validation not implemented")
def __createSnippet(self, jsonPath=None, begin=None, end=None, firstRun=None, lastRun=None, repMap=None, crab=False, parent=False): if firstRun: firstRun = int(firstRun) if lastRun: lastRun = int(lastRun) if (begin and firstRun) or (end and lastRun): msg = ( "The Usage of " + "'begin' & 'firstRun' " * int(bool(begin and firstRun)) + "and " * int(bool( (begin and firstRun) and (end and lastRun))) + "'end' & 'lastRun' " * int(bool(end and lastRun)) + "is ambigous.") raise AllInOneError(msg) if begin or end: (firstRun, lastRun) = self.convertTimeToRun(begin=begin, end=end, firstRun=firstRun, lastRun=lastRun) if (firstRun and lastRun) and (firstRun > lastRun): msg = ("The lower time/runrange limit ('begin'/'firstRun') " "chosen is greater than the upper time/runrange limit " "('end'/'lastRun').") raise AllInOneError(msg) if self.predefined() and (jsonPath or begin or end or firstRun or lastRun): msg = ( "The parameters 'JSON', 'begin', 'end', 'firstRun', and 'lastRun'" "only work for official datasets, not predefined _cff.py files" ) raise AllInOneError(msg) goodLumiSecStr = "" lumiStr = "" lumiSecExtend = "" if firstRun or lastRun or jsonPath: goodLumiSecStr = ("lumiSecs = cms.untracked." "VLuminosityBlockRange()\n") lumiStr = " lumisToProcess = lumiSecs,\n" if not jsonPath: selectedRunList = self.__getRunList() if firstRun: selectedRunList = [ run for run in selectedRunList \ if self.__findInJson(run, "run_number") >= firstRun ] if lastRun: selectedRunList = [ run for run in selectedRunList \ if self.__findInJson(run, "run_number") <= lastRun ] lumiList = [ str( self.__findInJson(run, "run_number") ) + ":1-" \ + str( self.__findInJson(run, "run_number") ) + ":max" \ for run in selectedRunList ] splitLumiList = list(self.__chunks(lumiList, 255)) else: theLumiList = None try: theLumiList = LumiList(filename=jsonPath) except ValueError: pass if theLumiList is not None: allRuns = theLumiList.getRuns() runsToRemove = [] for run in allRuns: if firstRun and int(run) < firstRun: runsToRemove.append(run) if lastRun and int(run) > lastRun: runsToRemove.append(run) theLumiList.removeRuns(runsToRemove) splitLumiList = list( self.__chunks(theLumiList.getCMSSWString().split(','), 255)) else: with open(jsonPath) as f: jsoncontents = f.read() if "process.source.lumisToProcess" in jsoncontents: msg = "%s is not a json file, but it seems to be a CMSSW lumi selection cff snippet. Trying to use it" % jsonPath if firstRun or lastRun: msg += ( "\n (after applying firstRun and/or lastRun)" ) msg += ".\nPlease note that, depending on the format of this file, it may not work as expected." msg += "\nCheck your config file to make sure that it worked properly." print msg self.__firstUsedRun = -1 self.__lastUsedRun = -1 if firstRun or lastRun: jsoncontents = re.sub( "\d+:(\d+|max)-\d+:(\d+|max)", self.getForceRunRangeFunction( firstRun, lastRun), jsoncontents) lumiSecExtend = jsoncontents splitLumiList = [[""]] if not len(splitLumiList[0][0]) == 0: lumiSecStr = [ "',\n'".join( lumis ) \ for lumis in splitLumiList ] lumiSecStr = [ "lumiSecs.extend( [\n'" + lumis + "'\n] )" \ for lumis in lumiSecStr ] lumiSecExtend = "\n".join(lumiSecStr) self.__firstusedrun = splitLumiList[0][0].split(":")[0] self.__lastusedrun = splitLumiList[-1][-1].split(":")[0] else: self.__firstusedrun = self.__findInJson(self.__getRunList()[0], "run_number") self.__lastusedrun = self.__findInJson(self.__getRunList()[-1], "run_number") if crab: files = "" else: splitFileList = list(self.__chunks(self.fileList(), 255)) fileStr = ["',\n'".join(files) for files in splitFileList] fileStr = [ "readFiles.extend( [\n'" + files + "'\n] )" \ for files in fileStr ] files = "\n".join(fileStr) if parent: splitParentFileList = list( self.__chunks(self.fileList(parent=True), 255)) parentFileStr = [ "',\n'".join(parentFiles) for parentFiles in splitParentFileList ] parentFileStr = [ "secFiles.extend( [\n'" + parentFiles + "'\n] )" \ for parentFiles in parentFileStr ] parentFiles = "\n".join(parentFileStr) files += "\n\n" + parentFiles theMap = repMap theMap["files"] = files theMap["json"] = jsonPath theMap["lumiStr"] = lumiStr theMap["goodLumiSecStr"] = goodLumiSecStr % (theMap) theMap["lumiSecExtend"] = lumiSecExtend if crab: dataset_snippet = self.__dummy_source_template % (theMap) else: dataset_snippet = self.__source_template % (theMap) return dataset_snippet
def __init__(self, *args, **kwargs): raise AllInOneError("Preexisting geometry comparison not implemented")
def __getMagneticField(self): Bfieldlocation = os.path.join(self.__cmsswrelease, "python", "Configuration", "StandardSequences") Bfieldlist = [ f.replace("MagneticField_",'').replace("_cff.py",'') \ for f in os.listdir(Bfieldlocation) \ if f.startswith("MagneticField_") and f.endswith("_cff.py") and f != "MagneticField_cff.py" ] Bfieldlist.sort( key=lambda Bfield: -len(Bfield) ) #Put it in order of decreasing length, so that searching in the name gives the longer match if self.__predefined: with open(self.__filename) as f: datatype = None Bfield = None for line in f.readlines(): if line.startswith("#data type: "): if datatype is not None: raise AllInOneError( self.__filename + " has multiple 'data type' lines.") datatype = line.replace("#data type: ", "").replace("\n", "") if line.startswith("#magnetic field: "): if Bfield is not None: raise AllInOneError( self.__filename + " has multiple 'magnetic field' lines.") Bfield = line.replace("#magnetic field: ", "").replace("\n", "") if Bfield is not None: Bfield = Bfield.split(",")[0] if Bfield in Bfieldlist or Bfield == "unknown": return Bfield else: print "Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield print "Using Bfield='unknown' - this will revert to the default" return "unknown" elif datatype == "data": return "AutoFromDBCurrent" #this should be in the "#magnetic field" line, but for safety in case it got messed up else: return "unknown" if self.__dataType == "data": return "AutoFromDBCurrent" dasQuery_B = ('dataset dataset=%s' % (self.__name) ) #try to find the magnetic field from DAS data = self.__getData( dasQuery_B ) #it seems to be there for the newer (7X) MC samples, except cosmics try: Bfield = self.__findInJson( data, ["dataset", "mcm", "sequences", "magField"]) if Bfield in Bfieldlist: return Bfield elif Bfield == "": pass else: print "Your dataset has magnetic field '%s', which does not exist in your CMSSW version!" % Bfield print "Using Bfield='unknown' - this will revert to the default magnetic field" return "unknown" except KeyError: pass for possibleB in Bfieldlist: if possibleB in self.__name.replace( "TkAlCosmics0T", "" ): #for some reason all cosmics dataset names contain this string return possibleB return "unknown"
def createScript(self, *args, **kwargs): raise AllInOneError("Shouldn't be here...")
def convertTimeToRun(self, begin=None, end=None, firstRun=None, lastRun=None, shortTuple=True): if (begin and firstRun) or (end and lastRun): msg = ( "The Usage of " + "'begin' & 'firstRun' " * int(bool(begin and firstRun)) + "and " * int(bool( (begin and firstRun) and (end and lastRun))) + "'end' & 'lastRun' " * int(bool(end and lastRun)) + "is ambigous.") raise AllInOneError(msg) if begin or end: runList = [ self.__findInJson(run, "run_number") for run in self.__getRunList() ] if begin: lastdate = begin for delta in [1, 5, 10, 20, 30]: #try searching for about 2 months after begin firstdate = lastdate lastdate = self.__dateString( self.__datetime(firstdate) + datetime.timedelta(delta)) dasQuery_begin = "run date between[%s,%s]" % (firstdate, lastdate) begindata = self.__getData(dasQuery_begin) if len(begindata) > 0: begindata.sort(key=lambda run: self.__findInJson( run, ["run", "run_number"])) try: runIndex = self.__find_ge( runList, self.__findInJson(begindata[0], ["run", "run_number"])) except ValueError: msg = ( "Your 'begin' is after the creation time of the last " "run in the dataset\n'%s'" % (self.__name)) raise AllInOneError(msg) firstRun = runList[runIndex] begin = None break if begin: raise AllInOneError( "No runs within a reasonable time interval after your 'begin'." "Try using a 'begin' that has runs soon after it (within 2 months at most)" ) if end: firstdate = end for delta in [1, 5, 10, 20, 30]: #try searching for about 2 months before end lastdate = firstdate firstdate = self.__dateString( self.__datetime(lastdate) - datetime.timedelta(delta)) dasQuery_end = "run date between[%s,%s]" % (firstdate, lastdate) enddata = self.__getData(dasQuery_end) if len(enddata) > 0: enddata.sort(key=lambda run: self.__findInJson( run, ["run", "run_number"])) try: runIndex = self.__find_lt( runList, self.__findInJson(enddata[-1], ["run", "run_number"])) except ValueError: msg = ( "Your 'end' is before the creation time of the first " "run in the dataset\n'%s'" % (self.__name)) raise AllInOneError(msg) lastRun = runList[runIndex] end = None break if end: raise AllInOneError( "No runs within a reasonable time interval before your 'end'." "Try using an 'end' that has runs soon before it (within 2 months at most)" ) if shortTuple: return firstRun, lastRun else: return begin, end, firstRun, lastRun
def __init__(self, valName, alignment, config, valType, addDefaults={}, addMandatories=[], addneedpackages=[]): """ This method adds additional items to the `self.general` dictionary which are only needed for validations using datasets. Arguments: - `valName`: String which identifies individual validation instances - `alignment`: `Alignment` instance to validate - `config`: `BetterConfigParser` instance which includes the configuration of the validations - `valType`: String which specifies the type of validation - `addDefaults`: Dictionary which contains default values for individual validations in addition to the general default values - `addMandatories`: List which contains mandatory parameters for individual validations in addition to the general mandatory parameters """ defaults = { "runRange": "", "firstRun": "", "lastRun": "", "begin": "", "end": "", "JSON": "" } defaults.update(addDefaults) mandatories = ["dataset", "maxevents"] mandatories += addMandatories needpackages = addneedpackages GenericValidation.__init__(self, valName, alignment, config, valType, defaults, mandatories, needpackages) # if maxevents is not specified, cannot calculate number of events for # each parallel job, and therefore running only a single job if int(self.general["maxevents"]) == -1 and self.NJobs > 1: msg = ("Maximum number of events (maxevents) not specified: " "cannot use parallel jobs.") raise AllInOneError(msg) tryPredefinedFirst = (not self.jobmode.split(',')[0] == "crab" and self.general["JSON"] == "" and self.general["firstRun"] == "" and self.general["lastRun"] == "" and self.general["begin"] == "" and self.general["end"] == "") if self.general["dataset"] not in globalDictionaries.usedDatasets: globalDictionaries.usedDatasets[self.general["dataset"]] = {} if self.cmssw not in globalDictionaries.usedDatasets[ self.general["dataset"]]: if globalDictionaries.usedDatasets[self.general["dataset"]] != {}: print( "Warning: you use the same dataset '%s' in multiple cmssw releases.\n" "This is allowed, but make sure it's not a mistake" ) % self.general["dataset"] globalDictionaries.usedDatasets[self.general["dataset"]][ self.cmssw] = { False: None, True: None } if globalDictionaries.usedDatasets[self.general["dataset"]][ self.cmssw][tryPredefinedFirst] is None: dataset = Dataset(self.general["dataset"], tryPredefinedFirst=tryPredefinedFirst, cmssw=self.cmssw, cmsswrelease=self.cmsswreleasebase) globalDictionaries.usedDatasets[self.general["dataset"]][ self.cmssw][tryPredefinedFirst] = dataset if tryPredefinedFirst and not dataset.predefined( ): #No point finding the data twice in that case globalDictionaries.usedDatasets[self.general["dataset"]][ self.cmssw][False] = dataset self.dataset = globalDictionaries.usedDatasets[ self.general["dataset"]][self.cmssw][tryPredefinedFirst] self.general["magneticField"] = self.dataset.magneticField() self.general["defaultMagneticField"] = "MagneticField" if self.general["magneticField"] == "unknown": print "Could not get the magnetic field for this dataset." print "Using the default: ", self.general["defaultMagneticField"] self.general["magneticField"] = '.oO[defaultMagneticField]Oo.' if not self.jobmode.split(',')[0] == "crab": try: self.general[ "datasetDefinition"] = self.dataset.datasetSnippet( jsonPath=self.general["JSON"], firstRun=self.general["firstRun"], lastRun=self.general["lastRun"], begin=self.general["begin"], end=self.general["end"], parent=self.needParentFiles) except AllInOneError as e: msg = "In section [%s:%s]: " % (valType, self.name) msg += str(e) raise AllInOneError(msg) else: if self.dataset.predefined(): msg = ("For jobmode 'crab' you cannot use predefined datasets " "(in your case: '%s')." % (self.dataset.name())) raise AllInOneError(msg) try: theUpdate = config.getResultingSection( valType + ":" + self.name, demandPars=["parallelJobs"]) except AllInOneError as e: msg = str(e)[:-1] + " when using 'jobmode: crab'." raise AllInOneError(msg) self.general.update(theUpdate) if self.general["begin"] or self.general["end"]: (self.general["begin"], self.general["end"], self.general["firstRun"], self.general["lastRun"]) = self.dataset.convertTimeToRun( firstRun=self.general["firstRun"], lastRun=self.general["lastRun"], begin=self.general["begin"], end=self.general["end"], shortTuple=False) if self.general["begin"] == None: self.general["begin"] = "" if self.general["end"] == None: self.general["end"] = "" self.general["firstRun"] = str(self.general["firstRun"]) self.general["lastRun"] = str(self.general["lastRun"]) if ( not self.general["firstRun"] ) and \ ( self.general["end"] or self.general["lastRun"] ): self.general["firstRun"] = str( self.dataset.runList()[0]["run_number"]) if ( not self.general["lastRun"] ) and \ ( self.general["begin"] or self.general["firstRun"] ): self.general["lastRun"] = str( self.dataset.runList()[-1]["run_number"]) if self.general["firstRun"] and self.general["lastRun"]: if int(self.general["firstRun"]) > int( self.general["lastRun"]): msg = ( "The lower time/runrange limit ('begin'/'firstRun') " "chosen is greater than the upper time/runrange limit " "('end'/'lastRun').") raise AllInOneError(msg) self.general["runRange"] = (self.general["firstRun"] + '-' + self.general["lastRun"]) try: self.general[ "datasetDefinition"] = self.dataset.datasetSnippet( jsonPath=self.general["JSON"], firstRun=self.general["firstRun"], lastRun=self.general["lastRun"], begin=self.general["begin"], end=self.general["end"], crab=True) except AllInOneError as e: msg = "In section [%s:%s]: " % (valType, self.name) msg += str(e) raise AllInOneError(msg)