Exemple #1
0
	def cancelJobs(self, ids):
		if not len(ids):
			raise StopIteration

		activity = utils.ActivityLog('cancelling jobs')
		proc = utils.LoggedProcess(self.cancelExec, self.getCancelArguments(self._getRawIDs(ids)))
		if proc.wait() != 0:
			for line in proc.getError().splitlines():
				if not self.unknownID() in line:
					utils.eprint(line.strip())
		del activity

		activity = utils.ActivityLog('waiting for jobs to finish')
		time.sleep(5)
		for wmsId, jobNum in ids:
			path = self._getSandbox(wmsId)
			if path is None:
				utils.eprint('Sandbox for job %d with wmsId "%s" could not be found' % (jobNum, wmsId))
				continue
			try:
				shutil.rmtree(path)
			except Exception:
				raise BackendError('Sandbox for job %d with wmsId "%s" could not be deleted' % (jobNum, wmsId))
			yield (jobNum, wmsId)
		del activity
Exemple #2
0
	def checkJobs(self, ids):
		if not len(ids):
			raise StopIteration

		activity = utils.ActivityLog('checking job status')
		proc = utils.LoggedProcess(self.statusExec, self.getCheckArguments(self._getRawIDs(ids)))

		tmp = {}
		for data in self.parseStatus(proc.iter()):
			wmsId = self._createId(data['id'])
			tmp[wmsId] = (wmsId, self.parseJobState(data['status']), data)

		for wmsId, jobNum in ids:
			if wmsId not in tmp:
				yield (jobNum, wmsId, Job.DONE, {})
			else:
				yield tuple([jobNum] + list(tmp[wmsId]))

		retCode = proc.wait()
		del activity

		if retCode != 0:
			for line in proc.getError().splitlines():
				if not self.unknownID() in line:
					utils.eprint(line)
	def checkJobs(self, ids):
		if len(ids) == 0:
			raise StopIteration

		jobNumMap = dict(ids)
		jobs = ' '.join(self._getRawIDs(ids))
		log = tempfile.mktemp('.log')

		activity = utils.ActivityLog('checking job status')
		proc = utils.LoggedProcess(self._statusExec, '--level 0 --logfile "%s" %s' % (log, jobs))
		for jobOutput in proc.getOutput().split('******')[1:]:
			data = {}
			for statusRegexLevel0 in self._statusRegexLevel0:
				match = re.match(statusRegexLevel0, jobOutput.replace('\n', ' '))
				if match:
					data = match.groupdict()
					break
			data['id'] = self._createId(data['rawId'])
			yield (jobNumMap.get(data['id']), data['id'], self._statusMap[data.get('status', 'DONE-FAILED')], data)
		
		retCode = proc.wait()
		del activity

		if retCode != 0:
			if self.explainError(proc, retCode):
				pass
			else:
				proc.logError(self.errorLog, log = log, jobs = jobs)
		
		utils.removeFiles([log])
Exemple #4
0
    def _submitJob(self, jobNum, module):
        activity = utils.ActivityLog('submitting jobs')

        try:
            sandbox = tempfile.mkdtemp('',
                                       '%s.%04d.' % (module.taskID, jobNum),
                                       self.sandPath)
        except Exception:
            raise BackendError('Unable to create sandbox directory "%s"!' %
                               sandbox)
        sbPrefix = sandbox.replace(self.sandPath, '').lstrip('/')

        def translateTarget(d, s, t):
            return (d, s, os.path.join(sbPrefix, t))

        self.smSBIn.doTransfer(
            ismap(translateTarget, self._getSandboxFilesIn(module)))

        self._writeJobConfig(
            os.path.join(sandbox, '_jobconfig.sh'), jobNum, module, {
                'GC_SANDBOX': sandbox,
                'GC_SCRATCH_SEARCH': str.join(' ', self.scratchPath)
            })
        reqs = self.brokerSite.brokerAdd(module.getRequirements(jobNum),
                                         WMS.SITES)
        reqs = dict(self.brokerQueue.brokerAdd(reqs, WMS.QUEUES))
        if (self.memory > 0) and (reqs.get(WMS.MEMORY, 0) < self.memory):
            reqs[
                WMS.
                MEMORY] = self.memory  # local jobs need higher (more realistic) memory requirements

        (stdout, stderr) = (os.path.join(sandbox, 'gc.stdout'),
                            os.path.join(sandbox, 'gc.stderr'))
        jobName = module.getDescription(jobNum).jobName
        proc = utils.LoggedProcess(
            self.submitExec, '%s %s "%s" %s' %
            (self.submitOpts,
             self.getSubmitArguments(jobNum, jobName, reqs, sandbox, stdout,
                                     stderr), utils.pathShare('gc-local.sh'),
             self.getJobArguments(jobNum, sandbox)))
        retCode = proc.wait()
        wmsIdText = proc.getOutput().strip().strip('\n')
        try:
            wmsId = self.parseSubmitOutput(wmsIdText)
        except Exception:
            wmsId = None

        del activity

        if retCode != 0:
            self._log.warning('%s failed:', self.submitExec)
        elif wmsId is None:
            self._log.warning('%s did not yield job id:\n%s', self.submitExec,
                              wmsIdText)
        if wmsId:
            wmsId = self._createId(wmsId)
            open(os.path.join(sandbox, wmsId), 'w')
        else:
            proc.logError(self.errorLog)
        return (jobNum, utils.QM(wmsId, wmsId, None), {'sandbox': sandbox})
	def cancelJobs(self, allIds):
		if len(allIds) == 0:
			raise StopIteration

		waitFlag = False
		for ids in imap(lambda x: allIds[x:x+self._nJobsPerChunk], irange(0, len(allIds), self._nJobsPerChunk)):
			# Delete jobs in groups of 5 - with 5 seconds between groups
			if waitFlag and not utils.wait(5):
				break
			waitFlag = True

			jobNumMap = dict(ids)
			jobs = ' '.join(self._getRawIDs(ids))
			log = tempfile.mktemp('.log')

			activity = utils.ActivityLog('cancelling jobs')
			proc = utils.LoggedProcess(self._cancelExec, '--noint --logfile "%s" %s' % (log, jobs))
			retCode = proc.wait()
			del activity

			# select cancelled jobs
			for rawId in self._getRawIDs(ids):
				deletedWMSId = self._createId(rawId)
				yield (jobNumMap.get(deletedWMSId), deletedWMSId)

			if retCode != 0:
				if self.explainError(proc, retCode):
					pass
				else:
					proc.logError(self.errorLog, log = log)
		
			purgeLog = tempfile.mktemp('.log')
			purgeProc = utils.LoggedProcess(self._purgeExec, '--noint --logfile "%s" %s' % (purgeLog, jobs))
			retCode = purgeProc.wait()
			if retCode != 0:
				if self.explainError(purgeProc, retCode):
					pass
				else:
					proc.logError(self.errorLog, log = purgeLog, jobs = jobs)
			
			utils.removeFiles([log, purgeLog])
	def _getJobsOutput(self, allIds):
		if len(allIds) == 0:
			raise StopIteration

		basePath = os.path.join(self._outputPath, 'tmp')
		try:
			if len(allIds) == 1:
				# For single jobs create single subdir
				basePath = os.path.join(basePath, md5(allIds[0][0]).hexdigest())
			utils.ensureDirExists(basePath)
		except Exception:
			raise BackendError('Temporary path "%s" could not be created.' % basePath, BackendError)
		
		activity = utils.ActivityLog('retrieving job outputs')
		for ids in imap(lambda x: allIds[x:x+self._nJobsPerChunk], irange(0, len(allIds), self._nJobsPerChunk)):
			jobNumMap = dict(ids)
			jobs = ' '.join(self._getRawIDs(ids))
			log = tempfile.mktemp('.log')

			#print self._outputExec, '--noint --logfile "%s" --dir "%s" %s' % (log, basePath, jobs)
			#import sys
			#sys.exit(1)
			proc = utils.LoggedProcess(self._outputExec,
				'--noint --logfile "%s" --dir "%s" %s' % (log, basePath, jobs))

			# yield output dirs
			todo = jobNumMap.values()
			done = []
			currentJobNum = None
			for line in imap(str.strip, proc.iter()):
				match = re.match(self._outputRegex, line)
				if match:
					currentJobNum = jobNumMap.get(self._createId(match.groupdict()['rawId']))
					todo.remove(currentJobNum)
					done.append(match.groupdict()['rawId'])
					outputDir = match.groupdict()['outputDir']
					if os.path.exists(outputDir):
						if 'GC_WC.tar.gz' in os.listdir(outputDir):
							wildcardTar = os.path.join(outputDir, 'GC_WC.tar.gz')
							try:
								tarfile.TarFile.open(wildcardTar, 'r:gz').extractall(outputDir)
								os.unlink(wildcardTar)
							except Exception:
								utils.eprint("Can't unpack output files contained in %s" % wildcardTar)
					yield (currentJobNum, outputDir)
					currentJobNum = None
			retCode = proc.wait()

			if retCode != 0:
				if 'Keyboard interrupt raised by user' in proc.getError():
					utils.removeFiles([log, basePath])
					raise StopIteration
				else:
					proc.logError(self.errorLog, log = log)
				utils.eprint('Trying to recover from error ...')
				for dirName in os.listdir(basePath):
					yield (None, os.path.join(basePath, dirName))
		del activity

		# return unretrievable jobs
		for jobNum in todo:
			yield (jobNum, None)
		
		purgeLog = tempfile.mktemp('.log')
		purgeProc = utils.LoggedProcess(self._purgeExec, '--noint --logfile "%s" %s' % (purgeLog, " ".join(done)))
		retCode = purgeProc.wait()
		if retCode != 0:
			if self.explainError(purgeProc, retCode):
				pass
			else:
				proc.logError(self.errorLog, log = purgeLog, jobs = done)
		utils.removeFiles([log, purgeLog, basePath])