def _submitJob(self, jobNum, module): activity = Activity('submitting job %d' % jobNum) try: sandbox = tempfile.mkdtemp('', '%s.%04d.' % (module.taskID, jobNum), self._sandbox_helper.get_path()) except Exception: raise BackendError('Unable to create sandbox directory "%s"!' % sandbox) sbPrefix = sandbox.replace(self._sandbox_helper.get_path(), '').lstrip('/') def translateTarget(d, s, t): return (d, s, os.path.join(sbPrefix, t)) self.smSBIn.doTransfer(ismap(translateTarget, self._getSandboxFilesIn(module))) self._writeJobConfig(os.path.join(sandbox, '_jobconfig.sh'), jobNum, module, { 'GC_SANDBOX': sandbox, 'GC_SCRATCH_SEARCH': str.join(' ', self.scratchPath)}) reqs = self.brokerSite.brokerAdd(module.getRequirements(jobNum), WMS.SITES) reqs = dict(self.brokerQueue.brokerAdd(reqs, WMS.QUEUES)) if (self.memory > 0) and (reqs.get(WMS.MEMORY, 0) < self.memory): reqs[WMS.MEMORY] = self.memory # local jobs need higher (more realistic) memory requirements (stdout, stderr) = (os.path.join(sandbox, 'gc.stdout'), os.path.join(sandbox, 'gc.stderr')) jobName = module.getDescription(jobNum).jobName proc = LoggedProcess(self.submitExec, '%s %s "%s" %s' % (self.submitOpts, self.getSubmitArguments(jobNum, jobName, reqs, sandbox, stdout, stderr), utils.pathShare('gc-local.sh'), self.getJobArguments(jobNum, sandbox))) retCode = proc.wait() gcIDText = proc.getOutput().strip().strip('\n') try: gcID = self.parseSubmitOutput(gcIDText) except Exception: gcID = None activity.finish() if retCode != 0: self._log.warning('%s failed:', self.submitExec) elif gcID is None: self._log.warning('%s did not yield job id:\n%s', self.submitExec, gcIDText) if gcID: gcID = self._createId(gcID) open(os.path.join(sandbox, gcID), 'w') else: proc.logError(self.errorLog) return (jobNum, utils.QM(gcID, gcID, None), {'sandbox': sandbox})
def _getJobsOutput(self, allIds): if len(allIds) == 0: raise StopIteration basePath = os.path.join(self._outputPath, 'tmp') try: if len(allIds) == 1: # For single jobs create single subdir basePath = os.path.join(basePath, md5(allIds[0][0]).hexdigest()) utils.ensureDirExists(basePath) except Exception: raise BackendError('Temporary path "%s" could not be created.' % basePath, BackendError) activity = Activity('retrieving %d job outputs' % len(allIds)) for ids in imap(lambda x: allIds[x:x+self._nJobsPerChunk], irange(0, len(allIds), self._nJobsPerChunk)): jobNumMap = dict(ids) jobs = ' '.join(self._getRawIDs(ids)) log = tempfile.mktemp('.log') proc = LoggedProcess(self._outputExec, '--noint --logfile "%s" --dir "%s" %s' % (log, basePath, jobs)) # yield output dirs todo = jobNumMap.values() done = [] currentJobNum = None for line in imap(str.strip, proc.iter()): match = re.match(self._outputRegex, line) if match: currentJobNum = jobNumMap.get(self._createId(match.groupdict()['rawId'])) todo.remove(currentJobNum) done.append(match.groupdict()['rawId']) outputDir = match.groupdict()['outputDir'] if os.path.exists(outputDir): if 'GC_WC.tar.gz' in os.listdir(outputDir): wildcardTar = os.path.join(outputDir, 'GC_WC.tar.gz') try: tarfile.TarFile.open(wildcardTar, 'r:gz').extractall(outputDir) os.unlink(wildcardTar) except Exception: self._log.error('Can\'t unpack output files contained in %s', wildcardTar) yield (currentJobNum, outputDir) currentJobNum = None retCode = proc.wait() if retCode != 0: if 'Keyboard interrupt raised by user' in proc.getError(): utils.removeFiles([log, basePath]) raise StopIteration else: proc.logError(self.errorLog, log = log) self._log.error('Trying to recover from error ...') for dirName in os.listdir(basePath): yield (None, os.path.join(basePath, dirName)) activity.finish() # return unretrievable jobs for jobNum in todo: yield (jobNum, None) purgeLog = tempfile.mktemp('.log') purgeProc = LoggedProcess(utils.resolveInstallPath('glite-ce-job-purge'), '--noint --logfile "%s" %s' % (purgeLog, str.join(' ', done))) retCode = purgeProc.wait() if retCode != 0: if self.explainError(purgeProc, retCode): pass else: proc.logError(self.errorLog, log = purgeLog, jobs = done) utils.removeFiles([log, purgeLog, basePath])
def _getJobsOutput(self, allIds): if len(allIds) == 0: raise StopIteration basePath = os.path.join(self._outputPath, 'tmp') try: if len(allIds) == 1: # For single jobs create single subdir basePath = os.path.join(basePath, md5(allIds[0][0]).hexdigest()) utils.ensureDirExists(basePath) except Exception: raise BackendError( 'Temporary path "%s" could not be created.' % basePath, BackendError) activity = Activity('retrieving %d job outputs' % len(allIds)) for ids in imap(lambda x: allIds[x:x + self._nJobsPerChunk], irange(0, len(allIds), self._nJobsPerChunk)): jobNumMap = dict(ids) jobs = ' '.join(self._getRawIDs(ids)) log = tempfile.mktemp('.log') proc = LoggedProcess( self._outputExec, '--noint --logfile "%s" --dir "%s" %s' % (log, basePath, jobs)) # yield output dirs todo = jobNumMap.values() done = [] currentJobNum = None for line in imap(str.strip, proc.iter()): match = re.match(self._outputRegex, line) if match: currentJobNum = jobNumMap.get( self._createId(match.groupdict()['rawId'])) todo.remove(currentJobNum) done.append(match.groupdict()['rawId']) outputDir = match.groupdict()['outputDir'] if os.path.exists(outputDir): if 'GC_WC.tar.gz' in os.listdir(outputDir): wildcardTar = os.path.join(outputDir, 'GC_WC.tar.gz') try: tarfile.TarFile.open( wildcardTar, 'r:gz').extractall(outputDir) os.unlink(wildcardTar) except Exception: self._log.error( 'Can\'t unpack output files contained in %s', wildcardTar) yield (currentJobNum, outputDir) currentJobNum = None retCode = proc.wait() if retCode != 0: if 'Keyboard interrupt raised by user' in proc.getError(): utils.removeFiles([log, basePath]) raise StopIteration else: proc.logError(self.errorLog, log=log) self._log.error('Trying to recover from error ...') for dirName in os.listdir(basePath): yield (None, os.path.join(basePath, dirName)) activity.finish() # return unretrievable jobs for jobNum in todo: yield (jobNum, None) purgeLog = tempfile.mktemp('.log') purgeProc = LoggedProcess( utils.resolveInstallPath('glite-ce-job-purge'), '--noint --logfile "%s" %s' % (purgeLog, str.join(' ', done))) retCode = purgeProc.wait() if retCode != 0: if self.explainError(purgeProc, retCode): pass else: proc.logError(self.errorLog, log=purgeLog, jobs=done) utils.removeFiles([log, purgeLog, basePath])