def _configureSCRAMSettings(self, config): scramProject = config.getList('scram project', []) if len(scramProject): self.projectArea = config.getPath('project area', '') if len(self.projectArea): raise ConfigError('Cannot specify both SCRAM project and project area') if len(scramProject) != 2: raise ConfigError('SCRAM project needs exactly 2 arguments: PROJECT VERSION') else: self.projectArea = config.getPath('project area') if len(self.projectArea): self.pattern = config.getList('area files', ['-.*', '-config', 'bin', 'lib', 'python', 'module', '*/data', '*.xml', '*.sql', '*.db', '*.cf[if]', '*.py', '-*/.git', '-*/.svn', '-*/CVS', '-*/work.*']) if os.path.exists(self.projectArea): utils.vprint('Project area found in: %s' % self.projectArea, -1) else: raise ConfigError('Specified config area %r does not exist!' % self.projectArea) scramPath = os.path.join(self.projectArea, '.SCRAM') # try to open it try: fp = open(os.path.join(scramPath, 'Environment'), 'r') self.scramEnv = utils.DictFormat().parse(fp, keyParser = {None: str}) except Exception: raise ConfigError('Project area file %s/.SCRAM/Environment cannot be parsed!' % self.projectArea) for key in ['SCRAM_PROJECTNAME', 'SCRAM_PROJECTVERSION']: if key not in self.scramEnv: raise ConfigError('Installed program in project area not recognized.') default_archs = lfilter(lambda x: os.path.isdir(os.path.join(scramPath, x)) and not x.startswith('.'), os.listdir(scramPath)) + [noDefault] default_arch = default_archs[0] self.scramArch = config.get('scram arch', default_arch) try: fp = open(os.path.join(scramPath, self.scramArch, 'Environment'), 'r') self.scramEnv.update(utils.DictFormat().parse(fp, keyParser = {None: str})) except Exception: raise ConfigError('Project area file .SCRAM/%s/Environment cannot be parsed!' % self.scramArch) else: self.scramEnv = { 'SCRAM_PROJECTNAME': scramProject[0], 'SCRAM_PROJECTVERSION': scramProject[1] } self.scramArch = config.get('scram arch') self.scramVersion = config.get('scram version', 'scramv1') if self.scramEnv['SCRAM_PROJECTNAME'] != 'CMSSW': raise ConfigError('Project area contains no CMSSW project')
def _parseStatusX(self, lines): adder = lambda a, b: utils.QM('=====' not in b and b != '\n', a + b, a) remap = { 'destination': 'dest', 'status reason': 'reason', 'status info for the job': 'id', 'current status': 'status', 'submitted': 'timestamp', 'reached': 'timestamp', 'exit code': 'gridexit' } for section in utils.accumulate(lines, lambda x, buf: ('=' * 70) in x, '', opAdd=adder): data = utils.DictFormat(':').parse( str.join('', section), keyParser={None: lambda k: remap.get(k, str)}) data = utils.filterDict(data, vF=lambda v: v) if data: try: if 'failed' in data['status']: data['status'] = 'failed' else: data['status'] = data['status'].split()[0].lower() except Exception: pass try: data['timestamp'] = int( time.mktime(parsedate(data['timestamp']))) except Exception: pass yield data
def commit(self, jobNum, jobObj): jobData = str.join('', utils.DictFormat(escapeString = True).format(jobObj.getAll())) tar = zipfile.ZipFile(self._dbFile, 'a', zipfile.ZIP_DEFLATED) try: tar.writestr('J%06d_T%06d' % (jobNum, self._serial), jobData) finally: tar.close() self._serial += 1
def _getSandboxFiles(self, task, monitor, smList): # Prepare all input files depList = set( ichain(imap(lambda x: x.getDependencies(), [task] + smList))) depPaths = lmap(lambda pkg: utils.pathShare('', pkg=pkg), os.listdir(utils.pathPKG())) depFiles = lmap( lambda dep: utils.resolvePath('env.%s.sh' % dep, depPaths), depList) taskEnv = utils.mergeDicts( imap(lambda x: x.getTaskConfig(), [monitor, task] + smList)) taskEnv.update({ 'GC_DEPFILES': str.join(' ', depList), 'GC_USERNAME': self._token.getUsername(), 'GC_WMS_NAME': self.wmsName }) taskConfig = sorted( utils.DictFormat(escapeString=True).format( taskEnv, format='export %s%s%s\n')) varMappingDict = dict( izip(monitor.getTaskConfig().keys(), monitor.getTaskConfig().keys())) varMappingDict.update(task.getVarMapping()) varMapping = sorted( utils.DictFormat(delimeter=' ').format(varMappingDict, format='%s%s%s\n')) # Resolve wildcards in task input files def getTaskFiles(): for f in task.getSBInFiles(): matched = glob.glob(f.pathAbs) if matched != []: for match in matched: yield match else: yield f.pathAbs return lchain([ monitor.getFiles(), depFiles, getTaskFiles(), [ VirtualFile('_config.sh', taskConfig), VirtualFile('_varmap.dat', varMapping) ] ])
def _parse_scram_file(self, fn): try: fp = open(fn, 'r') try: return utils.DictFormat().parse(fp, keyParser = {None: str}) finally: fp.close() except Exception: raise ConfigError('Project area file %s cannot be parsed!' % fn)
def getEntries(self, path, metadata, events, seList, objStore): jobInfoPath = os.path.join(path, 'job.info') try: jobInfo = utils.DictFormat('=').parse(open(jobInfoPath)) if jobInfo.get('exitcode') == 0: objStore['JOBINFO'] = jobInfo yield (path, metadata, events, seList, objStore) except Exception: pass
def getEntries(self, path, metadata, events, seList, objStore): jobInfoPath = os.path.join(path, 'job.info') try: jobInfo = utils.DictFormat('=').parse(open(jobInfoPath)) if jobInfo.get('exitcode') == 0: objStore['JOBINFO'] = jobInfo yield (path, metadata, events, seList, objStore) except Exception: self._log.log(logging.INFO2, 'Unable to parse job info file %r', jobInfoPath)
def __init__(self, config, jobLimit=-1, jobSelector=None): JobDB.__init__(self, config, jobLimit, jobSelector) self._dbPath = config.getWorkPath('jobs') self._fmt = utils.DictFormat(escapeString=True) try: self._jobMap = self._readJobs(self._jobLimit) except Exception: raise JobError('Unable to read stored job information!') if self._jobLimit < 0 and len(self._jobMap) > 0: self._jobLimit = max(self._jobMap) + 1
def _writeJobConfig(self, cfgPath, jobNum, task, extras): try: jobEnv = utils.mergeDicts([task.getJobConfig(jobNum), extras]) jobEnv['GC_ARGS'] = task.getJobArguments(jobNum).strip() content = utils.DictFormat(escapeString=True).format( jobEnv, format='export %s%s%s\n') utils.safeWrite(open(cfgPath, 'w'), content) except Exception: raise BackendError('Could not write job config data to %s.' % cfgPath)
def parseStatus(self, status): for section in utils.accumulate(status, '', lambda x, buf: x == '\n'): try: lines = section.replace('\n\t', '').split('\n') jobinfo = utils.DictFormat(' = ').parse(lines[1:]) jobinfo['id'] = lines[0].split(':')[1].split('.')[0].strip() jobinfo['status'] = jobinfo.get('job_state') jobinfo['dest'] = 'N/A' if 'exec_host' in jobinfo: jobinfo['dest'] = '%s/%s' % ( jobinfo.get('exec_host').split('/')[0] + '.' + jobinfo.get('server', ''), jobinfo.get('queue')) except Exception: raise BackendError('Error reading job info:\n%s' % section) yield jobinfo
def _parse(self, proc): for section in utils.accumulate(proc.stdout.iter(self._timeout), '', lambda x, buf: x == '\n'): try: lines = section.replace('\n\t', '').split('\n') job_info = utils.DictFormat(' = ').parse(lines[1:]) job_info[CheckInfo.WMSID] = lines[0].split(':')[1].split( '.')[0].strip() job_info[CheckInfo.RAW_STATUS] = job_info.pop('job_state') job_info[CheckInfo.QUEUE] = job_info.pop('queue', None) if 'exec_host' in job_info: job_info[CheckInfo.WN] = job_info.pop('exec_host').split( '/')[0] + '.' + job_info.get('server', '') except Exception: raise BackendError('Error reading job info:\n%s' % section) yield job_info
def parseJobInfo(fn): log = logging.getLogger('wms') if not os.path.exists(fn): return log.warning('%r does not exist.', fn) try: info_content = open(fn, 'r').read() except Exception: return log.exception('Unable to read %r!', fn) if not info_content: return log.warning('%r is empty!', fn) try: data = utils.DictFormat().parse(info_content, keyParser={None: str}) return (data['JOBID'], data['EXITCODE'], data) except Exception: return log.warning('Unable to parse %r!', fn)
def _parseProxy(self, cached=True): # Return cached results if requested if cached and self._cache: return self._cache # Call voms-proxy-info and parse results args = ['--all'] if self._proxyPath: args.extend(['--file', self._proxyPath]) proc = LocalProcess(self._infoExec, *args) (retCode, stdout, stderr) = proc.finish(timeout=10) if (retCode != 0) and not self._ignoreWarning: msg = ('voms-proxy-info output:\n%s\n%s\n' % (stdout, stderr)).replace('\n\n', '\n') msg += 'If job submission is still possible, you can set [access] ignore warnings = True\n' raise AccessTokenError( msg + 'voms-proxy-info failed with return code %d' % retCode) self._cache = utils.DictFormat(':').parse(stdout) return self._cache
def readJobs(self, jobLimit): jobMap = {} maxJobs = 0 if os.path.exists(self._dbFile): try: tar = zipfile.ZipFile(self._dbFile, 'r', zipfile.ZIP_DEFLATED) except Exception: # Try to recover job archive utils.eprint('=' * 40 + '\nStarting recovery of broken job database') utils.eprint(' => Answer "y" if asked "Is this a single-disk archive?"!\n' + '=' * 40) os.system('zip -FF %s --out %s.tmp 2> /dev/null' % (self._dbFile, self._dbFile)) os.rename(self._dbFile, self._dbFile + '.broken') os.rename(self._dbFile + '.tmp', self._dbFile) tar = zipfile.ZipFile(self._dbFile, 'r', zipfile.ZIP_DEFLATED) utils.removeFiles([self._dbFile + '.broken']) brokenList = [] for idx, fnTarInfo in enumerate(tar.namelist()): (jobNum, tid) = tuple(imap(lambda s: int(s[1:]), fnTarInfo.split('_', 1))) try: fp = tar.open(fnTarInfo) try: fp.read() finally: fp.close() except Exception: pass for broken in brokenList: os.system('zip %s -d %s' % (self._dbFile, broken)) utils.eprint('Recover completed!') activity = utils.ActivityLog('Reading job transactions ...') maxJobs = len(tar.namelist()) tMap = {} for idx, fnTarInfo in enumerate(tar.namelist()): (jobNum, tid) = tuple(imap(lambda s: int(s[1:]), fnTarInfo.split('_', 1))) if tid < tMap.get(jobNum, 0): continue data = utils.DictFormat(escapeString = True).parse(tar.open(fnTarInfo).read()) jobMap[jobNum] = Job.loadData(fnTarInfo, data) tMap[jobNum] = tid if idx % 100 == 0: activity.finish() activity = utils.ActivityLog('Reading job transactions ... %d [%d%%]' % (idx, (100.0 * idx) / maxJobs)) self._serial = maxJobs return jobMap
def makeJDL(self, jobNum, module): cfgPath = os.path.join(self._jobPath, 'job_%d.var' % jobNum) sbIn = lmap(lambda d_s_t: d_s_t[1], self._getSandboxFilesIn(module)) sbOut = lmap(lambda d_s_t: d_s_t[2], self._getSandboxFilesOut(module)) wcList = lfilter(lambda x: '*' in x, sbOut) if len(wcList): self._writeJobConfig(cfgPath, jobNum, module, {'GC_WC': str.join(' ', wcList)}) sandboxOutJDL = lfilter(lambda x: x not in wcList, sbOut) + ['GC_WC.tar.gz'] else: self._writeJobConfig(cfgPath, jobNum, module, {}) sandboxOutJDL = sbOut # Warn about too large sandboxes sbSizes = lmap(os.path.getsize, sbIn) if sbSizes and (self._warnSBSize > 0) and (sum(sbSizes) > self._warnSBSize * 1024 * 1024): if not utils.getUserBool( 'Sandbox is very large (%d bytes) and can cause issues with the WMS! Do you want to continue?' % sum(sbSizes), False): sys.exit(os.EX_OK) self._warnSBSize = 0 reqs = self.brokerSite.brokerAdd(module.getRequirements(jobNum), WMS.SITES) formatStrList = lambda strList: '{ %s }' % str.join( ', ', imap(lambda x: '"%s"' % x, strList)) contents = { 'Executable': '"gc-run.sh"', 'Arguments': '"%d"' % jobNum, 'StdOutput': '"gc.stdout"', 'StdError': '"gc.stderr"', 'InputSandbox': formatStrList(sbIn + [cfgPath]), 'OutputSandbox': formatStrList(sandboxOutJDL), 'Requirements': self._formatRequirements(reqs), 'VirtualOrganisation': '"%s"' % self.vo, 'Rank': '-other.GlueCEStateEstimatedResponseTime', 'RetryCount': 2 } cpus = dict(reqs).get(WMS.CPUS, 1) if cpus > 1: contents['CpuNumber'] = cpus return utils.DictFormat(' = ').format(contents, format='%s%s%s;\n')
def __init__(self, path): activity = Activity('Reading dataset partition file') self._lock = GCLock() self._fmt = utils.DictFormat() self._tar = tarfile.open(path, 'r:') (self._cacheKey, self._cacheTar) = (None, None) metadata = self._fmt.parse(self._tar.extractfile('Metadata').readlines(), keyParser = {None: str}) self.maxJobs = metadata.pop('MaxJobs') self.classname = metadata.pop('ClassName') self.metadata = {'dataset': dict(ifilter(lambda k_v: not k_v[0].startswith('['), metadata.items()))} for (k, v) in ifilter(lambda k_v: k_v[0].startswith('['), metadata.items()): self.metadata.setdefault('dataset %s' % k.split(']')[0].lstrip('['), {})[k.split(']')[1].strip()] = v activity.finish() self._parserMap = { None: str, DataSplitter.NEntries: int, DataSplitter.Skipped: int, DataSplitter.DatasetID: int, DataSplitter.Invalid: parseBool, DataSplitter.Locations: lambda x: parseList(x, ','), DataSplitter.MetadataHeader: parseJSON, DataSplitter.Metadata: lambda x: parseJSON(x.strip("'")) }
def _writeJobConfig(self, cfgPath, jobNum, task, extras): try: jobEnv = utils.mergeDicts([task.getJobConfig(jobNum), extras]) jobEnv['GC_ARGS'] = task.getJobArguments(jobNum).strip() # write $FILE_NAMES into file in case [wms] file names environment = False # This can help avoiding too large environments # TODO: send fileNames.job_%d.txt together with the sandbox if ("FILE_NAMES" in jobEnv) and (not self.fileNamesEnvironment): fileNamesList = os.path.join( self.config.getPath('sandbox path', self.config.getWorkPath('sandbox')), "fileNames.job_%d.txt" % jobNum) with open(fileNamesList, "w") as fileNamesListFile: fileNamesListFile.write("\n".join( shlex.split(jobEnv.pop("FILE_NAMES")))) jobEnv["FILE_NAMES"] = fileNamesList content = utils.DictFormat(escapeString=True).format( jobEnv, format='export %s%s%s\n') utils.safeWrite(open(cfgPath, 'w'), content) except Exception: raise BackendError('Could not write job config data to %s.' % cfgPath)
def __init__(self): self._fmt = utils.DictFormat() # use a single instance to save time
def __init__(self): self._esc_dict = {'\\': r'\\', '\"': r'\"', '\n': r'\n'} self._fmt = utils.DictFormat(' = ')
def commit(self, jobNum, jobObj): fp = SafeFile(os.path.join(self._dbPath, 'job_%d.txt' % jobNum), 'w') fp.writelines( utils.DictFormat(escapeString=True).format(jobObj.getAll())) fp.close()
def load(cls, name): try: data = utils.DictFormat(escapeString=True).parse(open(name)) except Exception: raise JobError('Invalid format in %s' % name) return Job.loadData(name, data)