def _checkJobList(self, wms, jobList): if self._defect_tries: nDefect = len(self._defect_counter ) # Waiting list gets larger in case reported == [] waitList = self._sample( self._defect_counter, nDefect - max(1, int(nDefect / 2**self._defect_raster))) jobList = lfilter(lambda x: x not in waitList, jobList) (change, timeoutList, reported) = JobManager._checkJobList(self, wms, jobList) for jobNum in reported: self._defect_counter.pop(jobNum, None) if self._defect_tries and (change is not None): self._defect_raster = utils.QM( reported, 1, self._defect_raster + 1) # make 'raster' iteratively smaller for jobNum in ifilter(lambda x: x not in reported, jobList): self._defect_counter[jobNum] = self._defect_counter.get( jobNum, 0) + 1 kickList = lfilter( lambda jobNum: self._defect_counter[jobNum] >= self. _defect_tries, self._defect_counter) for jobNum in set(kickList + utils.QM( (len(reported) == 0) and (len(jobList) == 1), jobList, [])): timeoutList.append(jobNum) self._defect_counter.pop(jobNum) return (change, timeoutList, reported)
def __init__(self, psource, posStart=None, posEnd=None): ForwardingParameterSource.__init__(self, psource) self._posStart = utils.QM(posStart is None, 0, posStart) self._posEndUser = posEnd self._posEnd = utils.QM(self._posEndUser is None, self._psource.getMaxParameters() - 1, self._posEndUser)
def parseTerm(term): cmpValue = utils.QM(term[0] == '~', False, True) term = term.lstrip('~') selectorType = utils.QM(term[0].isdigit(), 'id', 'state') if ':' in term: selectorType = term.split(':', 1)[0] selector = JobSelector.createInstance(selectorType, term.split(':', 1)[-1], **kwargs) return lambda jobNum, jobObj: selector.__call__(jobNum, jobObj ) == cmpValue
def _getDestination(self, config): dest = config.get('remote Dest', '@') user = config.get('remote User', '') splitDest = lmap(str.strip, dest.split('@')) if len(splitDest) == 1: return utils.QM(user, user, None), splitDest[0], None elif len(splitDest) == 2: return utils.QM(user, user, None), splitDest[0], splitDest[1] else: self._log.warning('Could not parse Configuration setting "remote Dest"!') self._log.warning('Expected: [<sched>|<sched>@|<sched>@<collector>]') self._log.warning('Found: %s', dest) raise BackendError('Could not parse submit destination')
def generateDatasetName(self, key, data): if self._discovery: return GCProvider.generateDatasetName(self, key, data) if 'CMSSW_DATATIER' not in data: raise DatasetError('Incompatible data tiers in dataset: %s' % data) getPathComponents = lambda path: utils.QM(path, tuple(path.strip('/').split('/')), ()) userPath = getPathComponents(self.nameDS) (primary, processed, tier) = (None, None, None) # In case of a child dataset, use the parent infos to construct new path for parent in data.get('PARENT_PATH', []): if len(userPath) == 3: (primary, processed, tier) = userPath else: try: (primary, processed, tier) = getPathComponents(parent) except Exception: pass if (primary is None) and (len(userPath) > 0): primary = userPath[0] userPath = userPath[1:] if len(userPath) == 2: (processed, tier) = userPath elif len(userPath) == 1: (processed, tier) = (userPath[0], data['CMSSW_DATATIER']) elif len(userPath) == 0: (processed, tier) = ('Dataset_%s' % key, data['CMSSW_DATATIER']) rawDS = '/%s/%s/%s' % (primary, processed, tier) if None in (primary, processed, tier): raise DatasetError('Invalid dataset name supplied: %r\nresulting in %s' % (self.nameDS, rawDS)) return utils.replaceDict(rawDS, data)
def _parseStatusX(self, lines): adder = lambda a, b: utils.QM('=====' not in b and b != '\n', a + b, a) remap = { 'destination': 'dest', 'status reason': 'reason', 'status info for the job': 'id', 'current status': 'status', 'submitted': 'timestamp', 'reached': 'timestamp', 'exit code': 'gridexit' } for section in utils.accumulate(lines, lambda x, buf: ('=' * 70) in x, '', opAdd=adder): data = utils.DictFormat(':').parse( str.join('', section), keyParser={None: lambda k: remap.get(k, str)}) data = utils.filterDict(data, vF=lambda v: v) if data: try: if 'failed' in data['status']: data['status'] = 'failed' else: data['status'] = data['status'].split()[0].lower() except Exception: pass try: data['timestamp'] = int( time.mktime(parsedate(data['timestamp']))) except Exception: pass yield data
def _resyncPartition(self, modSI, jobNum, oldBlock, newBlock, filesMissing, filesMatched, doExpandOutside): if newBlock: # copy new location information modSI[DataSplitter.Locations] = newBlock.get(DataProvider.Locations) # Determine old size infos and get started def search_url(url): return fast_search(oldBlock[DataProvider.FileList], itemgetter(DataProvider.URL), url) sizeInfo = lmap(lambda url: search_url(url)[DataProvider.NEntries], modSI[DataSplitter.FileList]) metaIdxLookup = self._resyncGetMatchingMetadata(oldBlock, newBlock) extended = utils.QM(doExpandOutside, [], None) old_entries = modSI[DataSplitter.NEntries] (procMode, newMetadata) = self._resyncFiles(modSI, jobNum, sizeInfo, filesMissing, filesMatched, newBlock, metaIdxLookup, extended) # Disable invalid / invalidated partitions if (len(modSI[DataSplitter.FileList]) == 0) or (old_entries * modSI[DataSplitter.NEntries] <= 0): procMode = ResyncMode.disable if procMode == ResyncMode.disable: modSI[DataSplitter.Invalid] = True return (ResyncMode.disable, []) # Discard extensions # Update metadata if DataSplitter.Metadata in modSI: modSI.pop(DataSplitter.MetadataHeader) modSI.pop(DataSplitter.Metadata) if newMetadata: modSI[DataSplitter.MetadataHeader] = newBlock.get(DataProvider.Metadata) modSI[DataSplitter.Metadata] = newMetadata return (procMode, extended or [])
def getEntries(self, path, metadata, events, seList, objStore): datacachePath = os.path.join(objStore.get('GC_WORKDIR', ''), 'datacache.dat') source = utils.QM((self._source == '') and os.path.exists(datacachePath), datacachePath, self._source) if source and (source not in self._lfnMap): pSource = DataProvider.createInstance('ListProvider', createConfig(), source) for (n, fl) in imap( lambda b: (b[DataProvider.Dataset], b[DataProvider.FileList]), pSource.getBlocks()): self._lfnMap.setdefault(source, {}).update( dict( imap( lambda fi: (self.lfnTrans(fi[DataProvider.URL]), n), fl))) pList = set() for key in ifilter(lambda k: k in metadata, self._parentKeys): pList.update( imap( lambda pPath: self._lfnMap.get(source, {}).get( self.lfnTrans(pPath)), metadata[key])) metadata['PARENT_PATH'] = lfilter(identity, pList) yield (path, metadata, events, seList, objStore)
def _parseFile(self, iterator): block = None for idx, line in enumerate(iterator): try: # Found start of block: line = line.strip() if line.startswith(';'): continue elif line.startswith('['): if block: yield self._finishBlock(block) block = self._createBlock(line) elif line != '': tmp = lmap( str.strip, utils.QM('[' in line, line.split(' = ', 1), rsplit(line, '=', 1))) if len(tmp) != 2: raise DatasetError( 'Malformed entry in dataset file:\n%s' % line) key, value = tmp handlerInfo = self._handleEntry.get(key.lower(), None) if handlerInfo: (prop, parser, msg) = handlerInfo block[prop] = try_apply(value, parser, msg) else: block[DataProvider.FileList].append( self._parseEntry(block, key, value)) except Exception: raise DatasetError('Unable to parse %s:%d\n\t%s' % (repr(self._filename), idx, repr(line))) if block: yield self._finishBlock(block)
def _resyncChangedFile(self, procMode, idx, modSI, jobNum, sizeInfo, newBlock, extended, oldFI, newFI, newMetadata, metaIdxLookup): if DataProvider.Metadata in newFI: procMode = self._resyncChangedFileMetadata(oldFI, newFI, metaIdxLookup, newMetadata, procMode) if oldFI[DataProvider.NEntries] == newFI[DataProvider.NEntries]: return (procMode, idx + 1) # go to next file oldEvts = modSI[DataSplitter.NEntries] oldSkip = modSI.get(DataSplitter.Skipped) if self._resyncChangedFileEntries(idx, modSI, jobNum, sizeInfo, oldFI, newFI, newBlock, extended): idx += 1 # True => file index should be increased mode = utils.QM( oldFI[DataProvider.NEntries] < newFI[DataProvider.NEntries], self._mode_expanded, self._mode_shrunken) if mode == ResyncMode.changed: mode = ResyncMode.ignore if (oldEvts != modSI[DataSplitter.NEntries]) or ( oldSkip != modSI.get(DataSplitter.Skipped)): mode = ResyncMode.complete procMode = min(procMode, mode) return (procMode, idx) # go to next file
def retrieve(self, wms): change = False jobList = self._sample( self.jobDB.getJobs(ClassSelector(JobClass.DONE)), utils.QM(self._chunks_enabled, self._chunks_retrieve, -1)) for (jobNum, retCode, data, outputdir) in wms.retrieveJobs(self._wmsArgs(jobList)): jobObj = self.jobDB.get(jobNum) if jobObj is None: continue if retCode == 0: state = Job.SUCCESS elif retCode == 107: # set ABORTED instead of FAILED for errorcode 107 state = Job.ABORTED else: state = Job.FAILED if state == Job.SUCCESS: if not self._outputProcessor.process(outputdir): retCode = 108 state = Job.FAILED if state != jobObj.state: change = True jobObj.set('retcode', retCode) jobObj.set('runtime', data.get('TIME', -1)) self._update(jobObj, jobNum, state) self._eventhandler.onJobOutput(wms, jobObj, jobNum, retCode) if utils.abort(): return False return change
def _submitJob(self, jobNum, module): activity = utils.ActivityLog('submitting jobs') try: sandbox = tempfile.mkdtemp('', '%s.%04d.' % (module.taskID, jobNum), self.sandPath) except Exception: raise BackendError('Unable to create sandbox directory "%s"!' % sandbox) sbPrefix = sandbox.replace(self.sandPath, '').lstrip('/') def translateTarget(d, s, t): return (d, s, os.path.join(sbPrefix, t)) self.smSBIn.doTransfer( ismap(translateTarget, self._getSandboxFilesIn(module))) self._writeJobConfig( os.path.join(sandbox, '_jobconfig.sh'), jobNum, module, { 'GC_SANDBOX': sandbox, 'GC_SCRATCH_SEARCH': str.join(' ', self.scratchPath) }) reqs = self.brokerSite.brokerAdd(module.getRequirements(jobNum), WMS.SITES) reqs = dict(self.brokerQueue.brokerAdd(reqs, WMS.QUEUES)) if (self.memory > 0) and (reqs.get(WMS.MEMORY, 0) < self.memory): reqs[ WMS. MEMORY] = self.memory # local jobs need higher (more realistic) memory requirements (stdout, stderr) = (os.path.join(sandbox, 'gc.stdout'), os.path.join(sandbox, 'gc.stderr')) jobName = module.getDescription(jobNum).jobName proc = utils.LoggedProcess( self.submitExec, '%s %s "%s" %s' % (self.submitOpts, self.getSubmitArguments(jobNum, jobName, reqs, sandbox, stdout, stderr), utils.pathShare('gc-local.sh'), self.getJobArguments(jobNum, sandbox))) retCode = proc.wait() wmsIdText = proc.getOutput().strip().strip('\n') try: wmsId = self.parseSubmitOutput(wmsIdText) except Exception: wmsId = None del activity if retCode != 0: self._log.warning('%s failed:', self.submitExec) elif wmsId is None: self._log.warning('%s did not yield job id:\n%s', self.submitExec, wmsIdText) if wmsId: wmsId = self._createId(wmsId) open(os.path.join(sandbox, wmsId), 'w') else: proc.logError(self.errorLog) return (jobNum, utils.QM(wmsId, wmsId, None), {'sandbox': sandbox})
def check(self, wms): jobList = self._sample( self.jobDB.getJobs(ClassSelector(JobClass.PROCESSING)), utils.QM(self._chunks_enabled, self._chunks_check, -1)) # Check jobs in the joblist and return changes, timeouts and successfully reported jobs (change, timeoutList, reported) = self._checkJobList(wms, jobList) unreported = len(jobList) - len(reported) if unreported > 0: self._log_user_time.critical( '%d job(s) did not report their status!', unreported) if change is None: # neither True or False => abort return False # Cancel jobs which took too long if len(timeoutList): change = True self._log_user.warning('Timeout for the following jobs:') self.cancel(wms, timeoutList, interactive=False, showJobs=True) # Process task interventions self._processIntervention(wms, self._task.getIntervention()) # Quit when all jobs are finished if self.jobDB.getJobsN(ClassSelector(JobClass.ENDSTATE)) == len( self.jobDB): self._logDisabledJobs() self._eventhandler.onTaskFinish(len(self.jobDB)) if self._task.canFinish(): self._log_user_time.info( 'Task successfully completed. Quitting grid-control!') utils.abort(True) return change
def getChoice(self, option, choices, default=noDefault, obj2str=str.__str__, str2obj=str, def2obj=None, **kwargs): default_str = self._getDefaultStr(default, def2obj, obj2str) capDefault = lambda value: utils.QM(value == default_str, value.upper( ), value.lower()) choices_str = str.join('/', imap(capDefault, imap(obj2str, choices))) if (default != noDefault) and (default not in choices): raise APIError('Invalid default choice "%s" [%s]!' % (default, choices_str)) if 'interactive_msg' in kwargs: kwargs['interactive_msg'] += (' [%s]' % choices_str) def checked_str2obj(value): obj = str2obj(value) if obj not in choices: raise ConfigError('Invalid choice "%s" [%s]!' % (value, choices_str)) return obj return self._getInternal('choice', obj2str, checked_str2obj, def2obj, option, default, interactive_msg_append_default=False, **kwargs)
def __init__(self, arg, **kwargs): idList = imap(lambda x: x.split('-'), arg.split(',')) try: parse = lambda x: utils.QM(x != '', int, str) self._ranges = lmap( lambda x: (parse(x[0])(x[0]), parse(x[-1])(x[-1])), idList) except Exception: raise UserError('Job identifiers must be integers or ranges.')
def getTaskConfig(self): data = SCRAMTask.getTaskConfig(self) data.update(dict(self.searchLoc)) data['GZIP_OUT'] = utils.QM(self.gzipOut, 'yes', 'no') data['SE_RUNTIME'] = utils.QM(self._projectAreaTarballSE, 'yes', 'no') data['HAS_RUNTIME'] = utils.QM(self._projectArea, 'yes', 'no') data['CMSSW_EXEC'] = 'cmsRun' data['CMSSW_CONFIG'] = str.join(' ', imap(os.path.basename, self.configFiles)) data['CMSSW_OLD_RELEASETOP'] = self._oldReleaseTop if self.prolog.isActive(): data['CMSSW_PROLOG_EXEC'] = self.prolog.getCommand() data['CMSSW_PROLOG_SB_IN_FILES'] = str.join(' ', imap(lambda x: x.pathRel, self.prolog.getSBInFiles())) data['CMSSW_PROLOG_ARGS'] = self.prolog.getArguments() if self.epilog.isActive(): data['CMSSW_EPILOG_EXEC'] = self.epilog.getCommand() data['CMSSW_EPILOG_SB_IN_FILES'] = str.join(' ', imap(lambda x: x.pathRel, self.epilog.getSBInFiles())) data['CMSSW_EPILOG_ARGS'] = self.epilog.getArguments() return data
def _getConfigFiles(self, config): cfgDefault = utils.QM(self.prolog.isActive() or self.epilog.isActive(), [], noDefault) for cfgFile in config.getPaths('config file', cfgDefault, mustExist=False): if not os.path.exists(cfgFile): raise ConfigError('Config file %r not found.' % cfgFile) yield cfgFile
def _initPoolInterfaces(self, config): # check submissal type self.remoteType = config.getEnum('remote Type', PoolType, PoolType.LOCAL) self.debugOut("Selected pool type: %s" % PoolType.enum2str(self.remoteType)) # get remote destination features user,sched,collector = self._getDestination(config) nice_user = user or "<local default>" nice_sched = sched or "<local default>" nice_collector = collector or "<local default>" self.debugOut("Destination:\n") self.debugOut("\tuser:%s @ sched:%s via collector:%s" % (nice_user, nice_sched, nice_collector)) # prepare commands appropriate for pool type if self.remoteType == PoolType.LOCAL or self.remoteType == PoolType.SPOOL: self.user=user self.Pool=self.Pool=ProcessHandler.createInstance("LocalProcessHandler") # local and remote use condor tools installed locally - get them self.submitExec = utils.resolveInstallPath('condor_submit') self.historyExec = utils.resolveInstallPath('condor_history') # completed/failed jobs are stored outside the queue self.cancelExec = utils.resolveInstallPath('condor_rm') self.transferExec = utils.resolveInstallPath('condor_transfer_data') # submission might spool to another schedd and need to fetch output self.configValExec = utils.resolveInstallPath('condor_config_val') # service is better when being able to adjust to pool settings if self.remoteType == PoolType.SPOOL: # remote requires adding instructions for accessing remote pool self.submitExec+= " %s %s" % (utils.QM(sched,"-remote %s"%sched,""),utils.QM(collector, "-pool %s"%collector, "")) self.historyExec = "false" # disabled for this type self.cancelExec+= " %s %s" % (utils.QM(sched,"-name %s"%sched,""),utils.QM(collector, "-pool %s"%collector, "")) self.transferExec+= " %s %s" % (utils.QM(sched,"-name %s"%sched,""),utils.QM(collector, "-pool %s"%collector, "")) else: # ssh type instructions are passed to the remote host via regular ssh/gsissh host="%s%s"%(utils.QM(user,"%s@" % user,""), sched) if self.remoteType == PoolType.SSH: self.Pool=ProcessHandler.createInstance("SSHProcessHandler", remoteHost = host, sshLink = config.getWorkPath(".ssh", self._name + host)) else: self.Pool=ProcessHandler.createInstance("GSISSHProcessHandler", remoteHost = host, sshLink = config.getWorkPath(".gsissh", self._name + host)) # ssh type instructions rely on commands being available on remote pool self.submitExec = 'condor_submit' self.historyExec = 'condor_history' self.cancelExec = 'condor_rm' self.transferExec = "false" # disabled for this type self.configValExec = 'condor_config_val' # test availability of commands testProcess=self.Pool.LoggedExecute("condor_version") self.debugOut("*** Testing remote connectivity:\n%s"%testProcess.cmd) if testProcess.wait()!=0: testProcess.logError(self.errorLog) raise BackendError("Failed to access remote Condor tools! The pool you are submitting to is very likely not configured properly.") # get initial workdir on remote pool remote_workdir = config.get("remote workdir", '') if remote_workdir: uName = self.Pool.LoggedExecute("whoami").getOutput().strip() self.poolWorkDir = os.path.join(remote_workdir, uName) pwdProcess = self.Pool.LoggedExecute("mkdir -p %s" % self.poolWorkDir ) else: pwdProcess=self.Pool.LoggedExecute("pwd") self.poolWorkDir=pwdProcess.getOutput().strip() if pwdProcess.wait()!=0: self._log.critical("Code: %d\nOutput Message: %s\nError Message: %s", pwdProcess.wait(), pwdProcess.getOutput(), pwdProcess.getError()) raise BackendError("Failed to determine, create or verify base work directory on remote host")
def getTaskConfig(self): data = DataTask.getTaskConfig(self) data.update(dict(self.searchLoc)) data['CMSSW_OLD_RELEASETOP'] = self.scramEnv.get('RELEASETOP', None) data['SCRAM_ARCH'] = self.scramArch data['SCRAM_VERSION'] = self.scramVersion data['SCRAM_PROJECTVERSION'] = self.scramEnv['SCRAM_PROJECTVERSION'] data['GZIP_OUT'] = utils.QM(self.gzipOut, 'yes', 'no') data['SE_RUNTIME'] = utils.QM(self._projectAreaTarballSE, 'yes', 'no') data['HAS_RUNTIME'] = utils.QM(len(self.projectArea), 'yes', 'no') data['CMSSW_CONFIG'] = str.join(' ', imap(os.path.basename, self.configFiles)) if self.prolog.isActive(): data['CMSSW_PROLOG_EXEC'] = self.prolog.getCommand() data['CMSSW_PROLOG_SB_IN_FILES'] = str.join(' ', imap(lambda x: x.pathRel, self.prolog.getSBInFiles())) data['CMSSW_PROLOG_ARGS'] = self.prolog.getArguments() if self.epilog.isActive(): data['CMSSW_EPILOG_EXEC'] = self.epilog.getCommand() data['CMSSW_EPILOG_SB_IN_FILES'] = str.join(' ', imap(lambda x: x.pathRel, self.epilog.getSBInFiles())) data['CMSSW_EPILOG_ARGS'] = self.epilog.getArguments() return data
def combineSources(self, PSourceClass, args): repeat = reduce(lambda a, b: a * b, ifilter(lambda expr: isinstance(expr, int), args), 1) args = lfilter(lambda expr: not isinstance(expr, int), args) if len(args) > 1: result = PSourceClass(*args) elif len(args) > 0: result = args[0] else: return utils.QM(repeat > 1, [repeat], []) if repeat > 1: return [RepeatParameterSource(result, repeat)] return [result]
def resync(self): (result_redo, result_disable, result_sizeChange) = ParameterSource.resync(self) (psource_redo, psource_disable, _) = self._psource.resync() # size change is irrelevant if outside of range for pNum in psource_redo: if (pNum >= self._posStart) and (pNum <= self._posEnd): result_redo.add(pNum - self._posStart) for pNum in psource_disable: if (pNum >= self._posStart) and (pNum <= self._posEnd): result_disable.add(pNum - self._posStart) oldPosEnd = self._posEnd self._posEnd = utils.QM(self._posEndUser is None, self._psource.getMaxParameters() - 1, self._posEndUser) return (result_redo, result_disable, result_sizeChange or (oldPosEnd != self._posEnd))
def _getSubmissionJobs(self, maxsample): # Get list of submittable jobs readyList = self.jobDB.getJobs(ClassSelector(JobClass.READY)) retryOK = readyList defaultJob = Job() if self._job_retries >= 0: retryOK = lfilter( lambda x: self.jobDB.get(x, defaultJob).attempt - 1 < self. _job_retries, readyList) modOK = lfilter(self._task.canSubmit, readyList) jobList = set.intersection(set(retryOK), set(modOK)) if self._showBlocker and readyList and not jobList: # No submission but ready jobs err = [] err += utils.QM((len(retryOK) > 0) and (len(modOK) == 0), [], ['have hit their maximum number of retries']) err += utils.QM((len(retryOK) == 0) and (len(modOK) > 0), [], ['are vetoed by the task module']) self._log_user_time.warning( 'All remaining jobs %s!', str.join(utils.QM(retryOK or modOK, ' or ', ' and '), err)) self._showBlocker = not (len(readyList) > 0 and len(jobList) == 0) # Determine number of jobs to submit submit = len(jobList) if self._njobs_inqueue > 0: submit = min( submit, self._njobs_inqueue - self.jobDB.getJobsN(ClassSelector(JobClass.ATWMS))) if self._njobs_inflight > 0: submit = min( submit, self._njobs_inflight - self.jobDB.getJobsN(ClassSelector(JobClass.PROCESSING))) if self._chunks_enabled and (maxsample > 0): submit = min(submit, maxsample) submit = max(submit, 0) if self._do_shuffle: return self._sample(jobList, submit) return sorted(jobList)[:submit]
def parseTerm(term): negate = (term[0] == '~') term = term.lstrip('~') selectorType = utils.QM(term[0].isdigit(), 'id', 'state') if ':' in term: selectorType = term.split(':', 1)[0] selector = JobSelector.createInstance(selectorType, term.split(':', 1)[-1], **kwargs) if negate: return lambda jobNum, jobObj: not selector.__call__( jobNum, jobObj) return selector.__call__
def frange(start, end=None, num=None, steps=None, format='%g'): if (end is None) and (num is None): raise ConfigError('frange: No exit condition!') if (end is not None) and (num is not None) and (steps is not None): raise ConfigError('frange: Overdetermined parameters!') if (end is not None) and (num is not None) and (steps is None): steps = (end - start) / (num - 1) num -= 1 if (end is not None) and (num is None): steps = steps or 1 num = int(1 + (end - start) / steps) result = imap(lambda i: start + (steps or 1) * i, irange(num)) + utils.QM(end, [end], []) return lmap(lambda x: format % x, result)
def __init__(self, config, name, optDefault, optPrefix, varPrefix): StorageManager.__init__(self, config, name, optDefault, optPrefix, varPrefix) normSEPath = lambda x: utils.QM(x[0] == '/', 'dir:///%s' % x.lstrip( '/'), x) self.defPaths = config.getList('%s path' % optDefault, [], onValid=validNoVar(config), parseItem=normSEPath) self.smPaths = config.getList('%s path' % optPrefix, self.defPaths, onValid=validNoVar(config), parseItem=normSEPath) self.smFiles = config.getList('%s files' % optPrefix, []) self.smPattern = config.get('%s pattern' % optPrefix, '@X@') self.smTimeout = config.getTime('%s timeout' % optPrefix, 2 * 60 * 60) self.smForce = config.getBool('%s force' % optPrefix, True)
def _submitJob(self, jobNum, module): fd, jdl = tempfile.mkstemp('.jdl') try: jdlData = self.makeJDL(jobNum, module) utils.safeWrite(os.fdopen(fd, 'w'), jdlData) except Exception: utils.removeFiles([jdl]) raise BackendError('Could not write jdl data to %s.' % jdl) try: submitArgs = [] for key_value in utils.filterDict(self._submitParams, vF=lambda v: v).items(): submitArgs.extend(key_value) submitArgs.append(jdl) activity = Activity('submitting job %d' % jobNum) proc = LocalProcess(self._submitExec, '--nomsg', '--noint', '--logfile', '/dev/stderr', *submitArgs) gcID = None for line in ifilter(lambda x: x.startswith('http'), imap(str.strip, proc.stdout.iter(timeout=60))): gcID = line retCode = proc.status(timeout=0, terminate=True) activity.finish() if (retCode != 0) or (gcID is None): if self.explainError(proc, retCode): pass else: self._log.log_process(proc, files={'jdl': SafeFile(jdl).read()}) finally: utils.removeFiles([jdl]) return (jobNum, utils.QM(gcID, self._createId(gcID), None), { 'jdl': str.join('', jdlData) })
def __init__(self, config, name): PBSGECommon.__init__(self, config, name) self._nodesExec = utils.resolveInstallPath('pbsnodes') self._server = config.get('server', '', onChange=None) self._fqid = lambda wmsId: utils.QM(self._server, '%s.%s' % (wmsId, self._server), wmsId)
def se_copy(src, dst, force=True, tmp=''): cmd = 'print_and_eval "url_copy_single%s"' % utils.QM(force, '_force', '') return se_runcmd(cmd, {'GC_KEEPTMP': tmp}, src, dst)
from grid_control import utils from grid_control.config import ConfigError, validNoVar from grid_control.gc_plugin import NamedPlugin from grid_control.utils.activity import Activity from grid_control.utils.process_base import LocalProcess from hpfwk import NestedException from python_compat import imap, set class StorageError(NestedException): pass # All functions use url_* functions from gc-run.lib (just like the job did...) ensurePrefix = lambda fn: utils.QM( '://' in fn, fn, 'file:////%s' % os.path.abspath(fn).lstrip('/')) def se_runcmd(cmd, varDict, *urls): runLib = utils.pathShare('gc-run.lib') args = str.join( ' ', imap(lambda x: '"%s"' % ensurePrefix(x).replace('dir://', 'file://'), urls)) varString = str.join( ' ', imap(lambda x: 'export %s="%s";' % (x, varDict[x]), varDict)) return LocalProcess( '/bin/bash', '-c', '. %s || exit 99; %s %s %s' % (runLib, varString, cmd, args))
def getSBOutFiles(self): if not self.configFiles: return SCRAMTask.getSBOutFiles(self) return SCRAMTask.getSBOutFiles(self) + utils.QM(self.gzipOut, ['cmssw.log.gz'], []) + ['cmssw.dbs.tar.gz']