def _validate(self): xmlFileDTD = self._dom.doctype.systemId # get system DTD path from XML file (if it exists) if not xmlFileDTD: logger.info( 'No external DTD detected in XML file. Considering %s as valid.' % os.path.basename(self.file)) return True dtds = [xmlFileDTD] dtds.extend( Validator.__alternativeDTDs.get(os.path.basename(xmlFileDTD), [])) dtdContent = self.grabDTD(dtds) if not dtdContent: m = 'Invalid XML File: Unable to access any version of the DTD %s. Considering XML file [%s] as invalid.' % ( os.path.basename(xmlFileDTD), os.path.basename(self.file)) raise RTTInputError(m) # now load this string dtd dtd = xmldtd.load_dtd_string(dtdContent) parser = xmlproc.XMLProcessor() parser.set_application(xmlval.ValidatingApp(dtd, parser)) parser.dtd = dtd parser.ent = dtd try: parser.parse_resource(self.file) except Exception, e: m = 'Invalid XML File: %s is __NOT__ valid against the DTD.' % self.file m += 'Cause: Either this is not a valid DTD, or the XML file is genuinely invalid against the DTD' raise RTTInputError(m)
def validate(self): if not self.parseOK: raise RTTInputError('User xml file failed parse') try: self._validate() except Exception, e: msg = 'Exception of type %s raised while validating xml file\nException:\n%s\nTraceback:\n%s' % ( e.__class__.__name__, str(e), exc2string2()) raise RTTInputError(msg)
def validateInputParams(logger, RTTSrcDir, RTTLibDir, topLevelRTTconfFileLocation): #---------------------------------------------- # check for the ok-ness of the passed parameter #---------------------------------------------- if not os.path.isabs(topLevelRTTconfFileLocation): m = 'topLevelRTTconfFileLocation %s is not an absolute path; it must be. Exiting...' % topLevelRTTconfFileLocation logger.critical(m) raise RTTInputError(m) if not os.path.exists(topLevelRTTconfFileLocation): m = 'topLevelRTTconfFileLocation %s is not an absolute path; it must be. Exiting...' % topLevelRTTconfFileLocation logger.critical(m) raise RTTInputError(m)
def getPhysicalDataset(self, logicalDataSetName): if not self.dataSetCatalog.has_key(logicalDataSetName): msg = 'logical data set name not known to catalog %s' % logicalDataSetName raise RTTInputError(msg) resDataSetName = self.dataSetCatalog.get(logicalDataSetName) if not resDataSetName: msg = 'The logical data set name:\n' + str(logicalDataSetName) + '\n maps to no physical data set in the catalog!' # msg = 'physical data set name not available %s' % resDataSetName raise RTTInputError(msg) return resDataSetName
def check(sizeEls): if not sizeEls: return '' size = sizeEls[0] try: size = float(Evaluate('text()', size)) except: m = '%s: illegal value for a size in bytes.' % size raise RTTInputError(m) else: if not size > 0.0: m = 'Must provide a file size to match > 0.0 bytes! (you gave %s)' % str( size) raise RTTInputError(m) else: return size
def __init__(self, configFile): self.config = rttConfiguration(configFile) self.legals = LegalsFactory(logger).create( self.config['releaseType'], self.config['branch'], HardCodedFactory(logger).create(self.config['releaseType'], self.config['runType'])) # make a copy of the branch as self.branch will be overwritten # when handling numbered releases self.originalBranch = self.config['branch'] self.isNightly = False # the user may request that the release is to be discovered # make a copy of the release here. This copy will be overwritten # if the release is discovered self.release = self.config['release'] self.findNightly = False self.valid = True self.invalidText = '' self.isConfigFileLegal() if not self.valid: print '\n\n*******************************************' print ' ILLEGAL TOP LEVEL CONFIGURATION FILE!' print '*******************************************\n\n' logger.critical(self.invalidText) raise RTTInputError(self.invalidText) self.processConfigData()
def readTopLevelConfigFile(confFilePath, logger): try: rttConfig = RTTConfiguration(confFilePath) except Exception, e: m = 'Exception thrown while reading top level RTT Config file: %s:\nTraceBack:\n %s' % (str(e), exc2string2()) logger.critical(m) raise RTTInputError(m)
def ensureUniqueTrendID(self): # new step: no XML file can have non-unique CoolDozer trend Id values. docEl = self._dom.documentElement ath = [ thing for thing in [ el.getAttribute('trendId') for el in Evaluate('rtt/jobList/athena', docEl) ] if thing ] trf = [ thing for thing in [ el.getAttribute('trendId') for el in Evaluate('rtt/jobList/jobTransform', docEl) ] if thing ] dup = {} [dup.setdefault(t, []).append(t) for t in ath] [dup.setdefault(t, []).append(t) for t in trf] dups = [(k, v) for k, v in dup.items() if len(v) > 1] if dups: m = 'InavlidXMLFile: \n%s is __NOT__ valid XML!\n' % os.path.basename( self.file) m += 'It declares non-unique trendId attributes.\n' for k, v in dups: m += '%s: declared by %d jobs\n' % (k, len(v)) raise RTTInputError(m)
def handleSize(sizeEls): def check(sizeEls): if not sizeEls: return '' size = sizeEls[0] try: size = float(Evaluate('text()', size)) except: m = '%s: illegal value for a size in bytes.' % size raise RTTInputError(m) else: if not size > 0.0: m = 'Must provide a file size to match > 0.0 bytes! (you gave %s)' % str( size) raise RTTInputError(m) else: return size if not sizeEls: return {} size_el = sizeEls[0] size_gt = check(Evaluate('gt', size_el)) size_lt = check(Evaluate('lt', size_el)) if (size_gt and size_lt) and (size_gt <= size_lt): m = 'Cannot simultaneously match files larger than X bytes _and_ less than Y bytes where X > Y.' raise RTTInputError(m) dict = {} if size_lt and not size_gt: dict = {'lt': size_lt} elif size_gt and not size_lt: dict = {'gt': size_gt} else: dict = {'lt': size_lt, 'gt': size_gt} return dict
def getDQ2PhysicalFiles(self, dsName, dq2files): physFiles = [] for file in dq2files: file = os.path.join(dsName, file) if not self.dataSetCatalog.has_key(file): msg = 'logical data set name not known to catalog %s' % file raise RTTInputError(msg) physF = self.dataSetCatalog.get(file) if not physF: msg = 'The logical data set name:\n' + str(file) msg += '\n maps to no physical data set in the catalog!' # msg = 'physical data set name not available %s' % resDataSetName raise RTTInputError(msg) physFiles.append(physF) return physFiles
def checkCriticalPath(self, path): if not path.exists(): m = 'Critical path: %s does not exist. Aborting run ' % str(path) self.logger.error(m) raise RTTInputError(m) return str(path)
def __init__(self, pathToXMLfile): self.file = pathToXMLfile try: self._dom = parse(self.file) self._dom.normalize() except Exception, e: m = 'Exception raised while parsing user input file:\n%s' % str(e) raise RTTInputError(m)
def create(self, type_): dict = {'afs': AFSIOUtils, 'castor': CastorIOUtils} try: return dict[type_](self.logger) except KeyError: m = 'Unknown IO utils type %s, should be one of %s\n' % ( type_, dict.keys()) m += 'This illegal type was likely input through your top level cfg file.' raise RTTInputError(m) except Exception, e: raise RTTCodingError(str(e))
def create(self, type_, base_path, narchives): dict = { 'afs': AFSArchivePathSelector, 'castor': CastorArchivePathSelector } try: return dict[type_](self.logger, base_path, narchives) except KeyError: m = 'Unknown archive <type> %s, must be one of %s' % (type_, dict.keys()) raise RTTInputError(m)
def findJobOptionsSearchPath(cmds, paths, logger): logger.debug('Calculating JOBOPTIONSSEARCHPATH') # cmds.extend(['source setup.sh']) cmds.extend(['echo $JOBOPTSEARCHPATH', 'echo $PATH']) nlines = -1 # do not limit the number of lines shellCom = ShellCommand(cmds, logger, nlines) reply = shellCom.getReply() #m = 'Reply on setting up CMT to find JO searchpath: %s' % str(reply) #logger.debug(m) searchPath = '' # the env variable path will be used e.g. to locate pre athena scripts PATH = '' if len(reply) > 1: searchPath = reply[-2] PATH = reply[-1] # These paths are critical to all Athena jobs. As these are most # of the RTT jobs, give up if the search path is not set. else: m = """Could not obtain env vars JOBOPTSEARCHPATH and PATH, cmds sent to seek env variables: %s Reply: %s abandoning run """ % (str(cmds), str(reply)) logger.critical(m) raise RTTInputError(m) # logger.debug('Found JOBOPTSEARCHPATH: %s' % searchPath) # logger.debug('Found PATH: %s' % PATH) def jobOpSearchPathSepChar(): if paths.isNightly: return ':' if int(paths.release.split('.')[0]) < 13: return ',' return ':' searchPaths = { 'jobOptionsSearchPath': { 'path': searchPath, 'separater': jobOpSearchPathSepChar() }, 'PATH': { 'path': PATH, 'separater': ':' } } return searchPaths
def validateCPCvalue(tagName, tagValues): legalValues = {'displayClass' : ['OfflineValidation', 'OnlineValidation', 'Development'], 'displayProcess' : ['EvGen', 'Simul', 'Digi', 'Reco', 'Core'], 'displayComponent' : ['Athena-Core', 'Det-InDet', 'Det-Muon', 'Det-LAr', 'Det-TileCal', 'Det-Combined', 'Trig-Electron', 'Trig-Photon', 'Trig-Tau', 'Trig-ETMiss', 'Trig-Bphys', 'Trig-Btag', 'Trig-Cosmics', 'Trig-Common', 'Trig-Other', 'Perf-Egamma', 'Perf-CombMuon', 'Perf-JetEtmiss', 'Perf-Tau', 'Perf-TrkVtx', 'Perf-Btag', 'Phys-Top', 'Phys-Higgs', 'Phys-Susy', 'Phys-StandardModel', 'Phys-Exotics', 'Phys-B', 'Phys-HeavyIon'] } for tagValue in tagValues: if tagValue not in legalValues[tagName]: m = "Invalid CPC tag name/value: %s, %s\n" % (tagName, tagValue) m += "Legal values are: %s" % str(legalValues[tagName]) raise RTTInputError(m)
def getDatasetRepeat(self,datasetTag): datasetRepeat = 1 # default repeatTags = datasetTag.getElementsByTagName('datasetRepeat') if not repeatTags: return datasetRepeat datasetRepeat = getText(repeatTags[0].childNodes) try: datasetRepeat = int(datasetRepeat) except: msg = 'The tag <datasetRepeat> must take a non-zero positive integer as content. Instead, found: ' + str(datasetRepeat) + '\n' msg += 'This Athena job will now be rejected.\n' logger.error(msg) raise RTTInputError(msg) return datasetRepeat
def lastmodtime(self, fpath, asSecsSinceEpoch=False): """Return the last modifed time of fpath, either (default) as a string YY/MM/DD@HH:MM, or as seconds since the Unix epoch.""" if not self.isfile(fpath): raise RTTInputError('%s: not a file' % fpath) info = self.path_info(fpath) toks = info[0].strip().split() fmonth = toks[5] fday = toks[6] if len(toks[7].split(':')) == 1: # year, not time fyear = toks[7] ftime = '?' else: fyear = '?' ftime = toks[7] if asSecsSinceEpoch: from datetime import datetime if fyear == '?': fyear = time.localtime()[0] if ftime == '?': ftime = '00:00' fmonth = { 'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12 }[fmonth] dt = datetime(int(fyear), int(fmonth), int(fday), int(ftime.split(':')[0]), int(ftime.split(':')[1])) return time.mktime(dt.timetuple()) return '%s/%s/%s@%s' % (fyear, fmonth, fday, ftime)
def __init__(self, logger, base_path, narchives): self.logger = logger self.io = self.createUtils() shouldExist = [ os.path.join(base_path, str(i)) for i in range(int(narchives)) ] dontExist = [s for s in shouldExist if not self.io.exists(s)] doExist = [s for s in shouldExist if s not in dontExist] doExistButNotDir = [ s for s in shouldExist if s in doExist and not self.io.isdir(s) ] if doExistButNotDir: m = '' for thing in doExistButNotDir: m += "%s: exists but is not a directory. Cannot archive there.\n" % thing raise RTTInputError(m) self.base_path = base_path self.available = doExist self.narchives = narchives if self.makeDirs(dontExist): self.available.extend(dontExist)
def createDirectoryDictionary(argBag, withoutReleaseName=False): """Sets up a dictionary containing the various paths concerned with results. e.g. <resBase>/12.4.0/kit/x86_64_slc4_gcc34_opt <resBase>/rel_0/bug/kit/x86_64_slc4_gcc34_opt """ dict = {} path = RTTpath(argBag.base) dict['base'] = str(path) if not withoutReleaseName: path.postpend(argBag.release) # 12.4.0, rel_0 if argBag.isNightly: path.postpend(argBag.originalBranch) # dev or bugfix path.postpend(argBag.runType) # kit or build path.postpend(argBag.cmtConfig) # target CMT config segs = getSegs() if not (argBag.topProject in segs or argBag.otherProject in segs): msg = 'Unknown top/otherProject to path mapping \ntopProject:%s \notherProject:%s\nmapping:%s' % ( str(argBag.topProject), str(argBag.otherProject), str(segs)) raise RTTInputError(msg) if argBag.topProject in segs: pathSegment = segs[argBag.topProject] if argBag.otherProject in segs: pathSegment += '_' + segs[argBag.otherProject] else: pathSegment = segs[argBag.otherProject] path.postpend(pathSegment) dict['packages'] = str(path) return dict
def setupRunDir(self): """ copy job options to a subdirectory to <rundir>/<package>/<options> """ class FullPathJOMaker: def __init__(self, searchPath, logger): self.locator = FileLocator(searchPath, logger) def __call__(self, jo): return self.locator.locateFile(jo) fpJOMaker = FullPathJOMaker( self.paths.searchPaths['jobOptionsSearchPath'], self.logger) missingJobOptions = [jo for jo in self.jobOptions if not fpJOMaker(jo)] if missingJobOptions: msg = 'The following job option files could not be found in %s\n' % self.paths.searchPaths[ 'jobOptionsSearchPath'] for jo in missingJobOptions: msg += '%s\n' % jo raise RTTInputError(msg) def makeDirAndCopy(dir, jo, logger): # jo of the form <package>/<options>.py localPackageDir = os.path.join(dir, os.path.dirname(jo)) optionsFile = os.path.basename(jo) mkdirIfNew(localPackageDir) fpJO = fpJOMaker(jo) logger.debug('JobOption Copy: \n%s\n to %s' % (fpJO, localPackageDir)) shutil.copy(fpJO, localPackageDir) [ makeDirAndCopy(self.runPath, jo, self.logger) for jo in self.jobOptions ] def addDataSets(dir, datasets, jo): # calculate the string to add to the end of the joboptions to specifiy the data sets def getdataset(): for datasetType in datasets.keys(): for ds in datasets[datasetType]: yield ds def clean(text): # convert first '+=' to '+' return text.replace('+=', '=', 1) bottomJOText = '' athenaCommonFlagsText = 'from AthenaCommon.AthenaCommonFlags import athenaCommonFlags\n' for ds in getdataset(): dataset = '["%s"]\n' % ds.physical.replace( '/castor', '%s/castor' % ds.castorProtocol()) athenaCommonFlagsText += 'athenaCommonFlags.FilesInput += %s' % dataset bottomJOText += '%s += %s\n' % (ds.whichAthenaList(), dataset) bottomJOText = clean(bottomJOText) athenaCommonFlagsText = clean(athenaCommonFlagsText) if not bottomJOText: return # no datasets # append the dataset string to the end of the job options file_ = open(os.path.join(dir, jo)) contents = file_.read() file_.close() if self.athenaCommonFlags: contents_ = '# The following athenaCommonFlags text was pre-pended by the RTT.\n\n' contents_ += athenaCommonFlagsText contents_ += '# End of RTT pre-pending.\n' contents = contents_ + contents contents += '\n\n' contents += '# Everything below this line was appended by the RTT.' contents += '\n\n' contents += bottomJOText # Write out the new contents file_ = open(os.path.join(dir, jo), 'w') file_.write(contents) file_.close() # only add datasets for athena, not athenaMT/PT (they add data via command line) if self.hasData() and self.isDefaultExecutable(): [ addDataSets(self.runPath, self.datasets, jo) for jo in self.jobOptions ]
# Copyright (C) 2002-2017 CERN for the benefit of the ATLAS collaboration from exc2string2 import exc2string2 from RTTConfigParser import RTTConfiguration from RTTSException import RTTInputError def readTopLevelConfigFile(confFilePath, logger): try: rttConfig = RTTConfiguration(confFilePath) except Exception, e: m = 'Exception thrown while reading top level RTT Config file: %s:\nTraceBack:\n %s' % (str(e), exc2string2()) logger.critical(m) raise RTTInputError(m) if rttConfig.configIsValid(): msg = 'Top level configuration %s is valid ' % str(rttConfig) logger.debug(msg) return rttConfig msg = 'Top level configuration %s is invalid: %s' % (str(rttConfig), rttConfig.text()) logger.critical(msg) raise RTTInputError(msg)
def lastmodtime(self, fpath, asSecsSinceEpoch=False): if not self.isfile(fpath): raise RTTInputError('%s: not a file' % fpath) stats = time.localtime(os.lstat(fpath)[ST_MTIME]) if asSecsSinceEpoch: return time.mktime(stats) return time.strftime('%y/%m/%d@%H:%M', stats)
def filesize(self, fpath): if not self.isfile(fpath): raise RTTInputError('%s: not a file' % fpath) return os.path.getsize(fpath)
def handleArchive(topElement, config): # Example: # <archive> # <type>castor</type> # <base>/castor/cern.ch/user/j/joebloggs/archive_area</base> # <nvers>7</nvers> # <condition> # <size><gt>100000</gt></size> # <name><is>*.root</is><isnot>root.*</isnot></name> # </condition> # </archive> # from createDirectoryDictionary import findSeg archives = Evaluate('archive', topElement) archiveList = [] for archive in archives: numb = Evaluate('nvers/text()', archive) type_ = Evaluate('type/text()', archive) dest = Evaluate('base/text()', archive) try: numb = int(numb) except: msg = 'Please give an integer for the number of required archives. You gave: <nvers>%s</nvers>' % numb raise RTTInputError(msg) if numb < 1: msg = 'Require an archive to have >=1 versions (<nvers>), you gave %d' % numb raise RTTInputError(msg) utils = IOUtilsFactory(logger).create(type_) if not utils.exists( dest ): # complicated statement because we may need to check on Castor msg = 'Archive <dest> %s: inexistant' % dest raise RTTInputError(msg) conditions = Evaluate('condition', archive) condsList = [] for c in conditions: cList = [] sizeDict = handleSize(Evaluate('size', c)) nameDict = handleName(Evaluate('name', c)) parentdirDict = handleParentDir(Evaluate('parentdir', c)) if sizeDict.keys(): cList.append(ArchivingCondition.Size(sizeDict)) if nameDict.keys(): cList.append(ArchivingCondition.Name(nameDict)) if parentdirDict.keys(): cList.append(ArchivingCondition.ParentDir(parentdirDict)) # Now finally append the list of conditions to the master list condsList.append(ArchivingCondition(cList)) selector = ArchivePathSelectorFactory(logger).create(type_, dest, numb) prefix = config['release'] if prefix.startswith('rel_'): prefix = 'nightly' run_frag = os.path.join(prefix, config['branch'], 'build', config['cmtConfig'], findSeg(config['topProject'])) # Select which archive we should use # Archives have path like <base_archive_path>/<branch>/<archiveIndex>/<run_frag>/ # where, for example: # branch=15.2.X.Y, # archiveIndex=N (where 0<=N<size of archive (7 days for ex.)) # run_frag=nightly/dev/build/i686-slc4-gcc34-opt/offline # RTT when it chooses an archive, dumps an empty file called time_stamp below # <run_frag> dir. Next run, by comparing time_stamps, the RTT can select the oldest archive to re-use. archivepath = selector.select_archive(run_frag) archiveList.append( ArchiverFactory(logger).create( type_, archivepath.split(run_frag)[0].strip(), run_frag, condsList)) config['archivers'] = archiveList
def filesize(self, fpath): if not self.isfile(fpath): raise RTTInputError('%s: not a file' % fpath) info = self.path_info(fpath) return int(info[0].strip().split()[4])