def __init__(self, config, name): self._sandbox_helper = SandboxHelper(config) BasicWMS.__init__(self, config, name, checkExecutor = CheckJobsMissingState(config, Condor_CheckJobs(config)), cancelExecutor = CancelAndPurgeJobs(config, Condor_CancelJobs(config), LocalPurgeJobs(config, self._sandbox_helper))) # special debug out/messages/annotations - may have noticeable effect on storage and performance! debugLogFN = config.get('debugLog', '') self.debug = False if debugLogFN: self.debug = open(debugLogFN, 'a') ###### try: random_task_id = md5(str(time.time())).hexdigest() except TypeError: random_task_id = md5(str(time.time()).encode()).hexdigest() self.taskID = config.get('task id', random_task_id, persistent = True) # FIXME! self.debugOut(""" ############################# Initialized Condor/GlideInWMS ############################# Config: %s taskID: %s Name: %s ############################# """ % (config.getConfigName(), self.taskID, name)) # finalize config state by reading values or setting to defaults self.settings={ 'jdl': { 'Universe' : config.get('Universe', 'vanilla'), 'NotifyEmail' : config.get('NotifyEmail', ''), 'ClassAdData' : config.getList('ClassAdData',[]), 'JDLData' : config.getList('JDLData',[]) }, 'pool' : { 'hosts' : config.getList('PoolHostList',[]) } } # prepare interfaces for local/remote/ssh pool access self._initPoolInterfaces(config) # load keys for condor pool ClassAds self.poolReqs = config.getDict('poolArgs req', {})[0] self.poolQuery = config.getDict('poolArgs query', {})[0] # Sandbox base path where individual job data is stored, staged and returned to self.sandPath = config.getPath('sandbox path', config.getWorkPath('sandbox'), mustExist = False) # history query is faster with split files - check if and how this is used # default condor_history command works WITHOUT explicitly specified file self.historyFile = None if self.remoteType == PoolType.LOCAL and getoutput( self.configValExec + ' ENABLE_HISTORY_ROTATION').lower() == 'true': self.historyFile = getoutput( self.configValExec + ' HISTORY') if not os.path.isfile(self.historyFile): self.historyFile = None # broker for selecting Sites self.brokerSite = config.getPlugin('site broker', 'UserBroker', cls = Broker, tags = [self], pargs = ('sites', 'sites', self.getSites)) self.debugFlush()
def makeEnum(members = [], cls = None): if cls == None: cls = type('Enum_%s_%s' % (md5(str(members)).hexdigest()[:4], str.join('_', members)), (), {}) cls.members = members cls.allMembers = range(len(members)) for idx, member in enumerate(members): setattr(cls, member, idx) return cls
def translateEntry(meta): # Translates parameter setting into hash tmp = md5() for key in ifilter(lambda k: k in meta, keys_store): if str(meta[key]): tmp.update(str2bytes(key)) tmp.update(str2bytes(str(meta[key]))) return { ParameterInfo.HASH: tmp.hexdigest(), 'GC_PARAM': meta['GC_PARAM'], ParameterInfo.ACTIVE: meta[ParameterInfo.ACTIVE] }
def _getJobsOutput(self, ids): if len(ids) == 0: raise StopIteration basePath = os.path.join(self._outputPath, 'tmp') try: if len(ids) == 1: # For single jobs create single subdir tmpPath = os.path.join(basePath, md5(ids[0][0]).hexdigest()) else: tmpPath = basePath utils.ensureDirExists(tmpPath) except Exception: raise BackendError('Temporary path "%s" could not be created.' % tmpPath, BackendError) jobNumMap = dict(ids) jobs = self.writeWMSIds(ids) activity = Activity('retrieving %d job outputs' % len(ids)) proc = LocalProcess(self._outputExec, '--noint', '--logfile', '/dev/stderr', '-i', jobs, '--dir', tmpPath) # yield output dirs todo = jobNumMap.values() currentJobNum = None for line in imap(str.strip, proc.stdout.iter(timeout = 60)): if line.startswith(tmpPath): todo.remove(currentJobNum) outputDir = line.strip() if os.path.exists(outputDir): if 'GC_WC.tar.gz' in os.listdir(outputDir): wildcardTar = os.path.join(outputDir, 'GC_WC.tar.gz') try: tarfile.TarFile.open(wildcardTar, 'r:gz').extractall(outputDir) os.unlink(wildcardTar) except Exception: self._log.error('Can\'t unpack output files contained in %s', wildcardTar) yield (currentJobNum, line.strip()) currentJobNum = None else: currentJobNum = jobNumMap.get(self._createId(line), currentJobNum) retCode = proc.status(timeout = 0, terminate = True) activity.finish() if retCode != 0: if 'Keyboard interrupt raised by user' in proc.stderr.read(timeout = 0): utils.removeFiles([jobs, basePath]) raise StopIteration else: self._log.log_process(proc, files = {'jobs': SafeFile(jobs).read()}) self._log.error('Trying to recover from error ...') for dirName in os.listdir(basePath): yield (None, os.path.join(basePath, dirName)) # return unretrievable jobs for jobNum in todo: yield (jobNum, None) utils.removeFiles([jobs, basePath])
def _prepareSubmit(self, task, jobNumList, queryArguments): jdlFilePath = os.path.join( self.parentPool.getSandboxPath(), 'htc-%s.schedd-%s.jdl' % (self.parentPool.wmsName, md5(self.getURI()).hexdigest())) utils.safeWrite( open(jdlFilePath, 'w'), lmap(lambda line: line + '\n', self._getJDLData(task, jobNumList, queryArguments))) return jdlFilePath
def md5sum(filename): m = md5() # use 4M blocksize: blocksize = 4096 * 1024 f = open(filename, 'r') while True: s = f.read(blocksize) m.update(s) if len(s) != blocksize: break return m.hexdigest()
def _prepareSubmit(self, task, jobNumList, queryArguments): localJdlFilePath = os.path.join( self.parentPool.getSandboxPath(), 'htc-%s.schedd-%s.jdl' % (self.parentPool.wmsName, md5(self.getURI()).hexdigest())) readyJobNumList = self._stageSubmitFiles(task, jobNumList) utils.safeWrite( open(localJdlFilePath, 'w'), lmap(lambda line: line + '\n', self._getJDLData(task, readyJobNumList, queryArguments))) raise NotImplementedError('JDL must get moved to remote') return jdlFilePath
def __init__(self, config, wmsName): utils.vprint('Using batch system: Condor/GlideInWMS', -1) ### WMSname=condor is a hardcoded hack until interface is clear BasicWMS.__init__(self, config, wmsName, 'condor') # special debug out/messages/annotations - may have noticeable effect on storage and performance! if config.get( self._getSections("backend"), "debugLog", ""): self.debug=open(config.get( self._getSections("backend"), "debugLog", ""),'a') else: self.debug=False ###### self.taskID = config.get('condor', 'task id', md5(str(time.time())).hexdigest(), persistent = True) # FIXME! self.debugOut(""" ############################# Initialized Condor/GlideInWMS ############################# Config: %s taskID: %s Name: %s ############################# """%(config.confName,self.taskID,wmsName)) # finalize config state by reading values or setting to defaults self.settings={ "jdl": { "Universe" : config.get( self._getSections("backend"), "Universe", "vanilla"), "NotifyEmail" : config.get( self._getSections("backend"), "NotifyEmail", ""), "ClassAdData" : config.getList( self._getSections("backend"), "ClassAdData",[]), "JDLData" : config.getList( self._getSections("backend"), "JDLData",[]) }, "pool" : { "hosts" : config.getList( self._getSections("backend"), "PoolHostList",[]) } } # prepare interfaces for local/remote/ssh pool access self._initPoolInterfaces(config) # load keys for condor pool ClassAds self.poolReqs = config.getDict(self._getSections("backend"), 'poolArgs req', {})[0] self.poolQuery = config.getDict(self._getSections("backend"), 'poolArgs query', {})[0] self._formatStatusReturnQuery(config) # Sandbox base path where individual job data is stored, staged and returned to self.sandPath = config.getPath(self._getSections("local"), 'sandbox path', config.getWorkPath('sandbox'), mustExist = False) # history query is faster with split files - check if and how this is used # default condor_history command works WITHOUT explicitly specified file self.historyFile = None if self.remoteType == poolType.LOCAL and commands.getoutput( self.configValExec + " ENABLE_HISTORY_ROTATION").lower() == "true": self.historyFile = commands.getoutput( self.configValExec + " HISTORY") if not os.path.isfile(self.historyFile): self.historyFile = None # broker for selecting Sites self.brokerSite = config.getClass('site broker', 'UserBroker', cls = Broker, tags = [self]).getInstance('sites', 'sites', self.getSites) self.debugFlush()
def __init__(self, config, name): self._sandbox_helper = SandboxHelper(config) BasicWMS.__init__(self, config, name, checkExecutor = CheckJobsMissingState(config, Condor_CheckJobs(config)), cancelExecutor = CancelAndPurgeJobs(config, Condor_CancelJobs(config), LocalPurgeJobs(config, self._sandbox_helper))) # special debug out/messages/annotations - may have noticeable effect on storage and performance! debugLogFN = config.get('debugLog', '') self.debug = False if debugLogFN: self.debug = open(debugLogFN, 'a') ###### self.taskID = config.get('task id', md5(str(time.time())).hexdigest(), persistent = True) # FIXME! self.debugOut(""" ############################# Initialized Condor/GlideInWMS ############################# Config: %s taskID: %s Name: %s ############################# """ % (config.getConfigName(), self.taskID, name)) # finalize config state by reading values or setting to defaults self.settings={ 'jdl': { 'Universe' : config.get('Universe', 'vanilla'), 'NotifyEmail' : config.get('NotifyEmail', ''), 'ClassAdData' : config.getList('ClassAdData',[]), 'JDLData' : config.getList('JDLData',[]) }, 'pool' : { 'hosts' : config.getList('PoolHostList',[]) } } # prepare interfaces for local/remote/ssh pool access self._initPoolInterfaces(config) # load keys for condor pool ClassAds self.poolReqs = config.getDict('poolArgs req', {})[0] self.poolQuery = config.getDict('poolArgs query', {})[0] # Sandbox base path where individual job data is stored, staged and returned to self.sandPath = config.getPath('sandbox path', config.getWorkPath('sandbox'), mustExist = False) # history query is faster with split files - check if and how this is used # default condor_history command works WITHOUT explicitly specified file self.historyFile = None if self.remoteType == PoolType.LOCAL and getoutput( self.configValExec + ' ENABLE_HISTORY_ROTATION').lower() == 'true': self.historyFile = getoutput( self.configValExec + ' HISTORY') if not os.path.isfile(self.historyFile): self.historyFile = None # broker for selecting Sites self.brokerSite = config.getPlugin('site broker', 'UserBroker', cls = Broker, tags = [self], pargs = ('sites', 'sites', self.getSites)) self.debugFlush()
def makeEnum(members=[], cls=None, useHash=False): if cls: enumID = md5(str(members) + "!" + cls.__name__).hexdigest()[:4] else: enumID = md5(str(members)).hexdigest()[:4] cls = type("Enum_%s_%s" % (enumID, str.join("_", members)), (), {}) if useHash: getValue = lambda (idx, name): idx + int(enumID, 16) else: getValue = lambda (idx, name): idx values = list(map(getValue, enumerate(members))) cls.enumNames = members cls.enumValues = values cls._enumMapNV = dict(zip(cls.enumNames, cls.enumValues)) cls._enumMapVN = dict(zip(cls.enumValues, cls.enumNames)) if len(cls._enumMapNV) != len(cls._enumMapVN): raise APIError("Invalid enum definition!") cls.enum2str = cls._enumMapVN.get cls.str2enum = cls._enumMapNV.get for name, value in zip(cls.enumNames, cls.enumValues): setattr(cls, name, value) return cls
def hash_calc(filename): md5_obj = md5() blocksize = 4 * 1024 * 1024 # use 4M blocksize: fp = open(filename, 'rb') pos = 0 progress = ProgressBarActivity('Calculating checksum', os.path.getsize(filename)) while True: buffer_str = fp.read(blocksize) md5_obj.update(buffer_str) pos += blocksize progress.update_progress(pos) if len(buffer_str) != blocksize: break progress.finish() return md5_obj.hexdigest()
def __init__(self, config, name): NamedPlugin.__init__(self, config, name) initSandbox = changeInitNeeded('sandbox') # Task requirements configJobs = config.changeView(viewClass = TaggedConfigView, addSections = ['jobs'], addTags = [self]) # Move this into parameter manager? self.wallTime = configJobs.getTime('wall time', onChange = None) self.cpuTime = configJobs.getTime('cpu time', self.wallTime, onChange = None) self.cpus = configJobs.getInt('cpus', 1, onChange = None) self.memory = configJobs.getInt('memory', -1, onChange = None) self.nodeTimeout = configJobs.getTime('node timeout', -1, onChange = initSandbox) # Compute / get task ID self.taskID = config.get('task id', 'GC' + md5(str(time())).hexdigest()[:12], persistent = True) self.taskDate = config.get('task date', strftime('%Y-%m-%d'), persistent = True, onChange = initSandbox) self.taskConfigName = config.getConfigName() # Storage setup configStorage = config.changeView(viewClass = TaggedConfigView, setClasses = None, setNames = None, addSections = ['storage'], addTags = [self]) self.taskVariables = { # Space limits 'SCRATCH_UL': configStorage.getInt('scratch space used', 5000, onChange = initSandbox), 'SCRATCH_LL': configStorage.getInt('scratch space left', 1, onChange = initSandbox), 'LANDINGZONE_UL': configStorage.getInt('landing zone space used', 100, onChange = initSandbox), 'LANDINGZONE_LL': configStorage.getInt('landing zone space left', 1, onChange = initSandbox), } configStorage.set('se output pattern', 'job_@GC_JOB_ID@_@X@') self.seMinSize = configStorage.getInt('se min size', -1, onChange = initSandbox) self.sbInputFiles = config.getPaths('input files', [], onChange = initSandbox) self.sbOutputFiles = config.getList('output files', [], onChange = initSandbox) self.gzipOut = config.getBool('gzip output', True, onChange = initSandbox) self.substFiles = config.getList('subst files', [], onChange = initSandbox) self.dependencies = map(str.lower, config.getList('depends', [], onChange = initSandbox)) # Get error messages from gc-run.lib comments self.errorDict = dict(self.updateErrorDict(utils.pathShare('gc-run.lib'))) # Init parameter source manager pm = config.getPlugin('parameter factory', 'SimpleParameterFactory', cls = ParameterFactory, inherit = True).getInstance() configParam = config.changeView(viewClass = TaggedConfigView, addSections = ['parameters'], addTags = [self]) self.setupJobParameters(configParam, pm) self.source = pm.getSource(configParam)
def bulkSubmissionBegin(self): self._submitParams.update({ '-d': None }) if self._discovery_module: self._submitParams.update({ '-e': self._discovery_module.getWMS() }) if self._useDelegate == False: self._submitParams.update({ '-a': ' ' }) return True log = tempfile.mktemp('.log') try: dID = 'GCD' + md5(str(time.time())).hexdigest()[:10] activity = utils.ActivityLog('creating delegate proxy for job submission') proc = utils.LoggedProcess(self._delegateExec, '%s -d %s --noint --logfile "%s"' % (utils.QM(self._configVO, '--config "%s"' % self._configVO, ''), dID, log)) output = proc.getOutput(wait = True) if ('glite-wms-job-delegate-proxy Success' in output) and (dID in output): self._submitParams.update({ '-d': dID }) del activity if proc.wait() != 0: proc.logError(self.errorLog, log = log) return (self._submitParams.get('-d', None) != None) finally: utils.removeFiles([log])
def getHash(self): return md5(str(self.key) + str(self.srcList)).hexdigest()
def _getJobsOutput(self, ids): if len(ids) == 0: raise StopIteration basePath = os.path.join(self._outputPath, 'tmp') try: if len(ids) == 1: # For single jobs create single subdir tmpPath = os.path.join(basePath, md5(ids[0][0]).hexdigest()) else: tmpPath = basePath utils.ensureDirExists(tmpPath) except Exception: raise BackendError('Temporary path "%s" could not be created.' % tmpPath, RuntimeError) jobNumMap = dict(ids) jobs = self.writeWMSIds(ids) log = tempfile.mktemp('.log') activity = utils.ActivityLog('retrieving job outputs') proc = utils.LoggedProcess(self._outputExec, '--noint --logfile "%s" -i "%s" --dir "%s"' % (log, jobs, tmpPath)) # yield output dirs todo = jobNumMap.values() currentJobNum = None for line in map(str.strip, proc.iter()): if line.startswith(tmpPath): todo.remove(currentJobNum) outputDir = line.strip() if os.path.exists(outputDir): if 'GC_WC.tar.gz' in os.listdir(outputDir): wildcardTar = os.path.join(outputDir, 'GC_WC.tar.gz') try: tarfile.TarFile.open(wildcardTar, 'r:gz').extractall(outputDir) os.unlink(wildcardTar) except Exception: utils.eprint("Can't unpack output files contained in %s" % wildcardTar) pass yield (currentJobNum, line.strip()) currentJobNum = None else: currentJobNum = jobNumMap.get(self._createId(line), currentJobNum) retCode = proc.wait() del activity if retCode != 0: if 'Keyboard interrupt raised by user' in proc.getError(): utils.removeFiles([log, jobs, basePath]) raise StopIteration else: proc.logError(self.errorLog, log = log) utils.eprint('Trying to recover from error ...') for dirName in os.listdir(basePath): yield (None, os.path.join(basePath, dirName)) # return unretrievable jobs for jobNum in todo: yield (jobNum, None) utils.removeFiles([log, jobs, basePath])
def _getJobsOutput(self, allIds): if len(allIds) == 0: raise StopIteration basePath = os.path.join(self._outputPath, 'tmp') try: if len(allIds) == 1: # For single jobs create single subdir basePath = os.path.join(basePath, md5(allIds[0][0]).hexdigest()) utils.ensureDirExists(basePath) except Exception: raise BackendError('Temporary path "%s" could not be created.' % basePath, BackendError) activity = utils.ActivityLog('retrieving job outputs') for ids in imap(lambda x: allIds[x:x+self._nJobsPerChunk], irange(0, len(allIds), self._nJobsPerChunk)): jobNumMap = dict(ids) jobs = ' '.join(self._getRawIDs(ids)) log = tempfile.mktemp('.log') #print self._outputExec, '--noint --logfile "%s" --dir "%s" %s' % (log, basePath, jobs) #import sys #sys.exit(1) proc = utils.LoggedProcess(self._outputExec, '--noint --logfile "%s" --dir "%s" %s' % (log, basePath, jobs)) # yield output dirs todo = jobNumMap.values() done = [] currentJobNum = None for line in imap(str.strip, proc.iter()): match = re.match(self._outputRegex, line) if match: currentJobNum = jobNumMap.get(self._createId(match.groupdict()['rawId'])) todo.remove(currentJobNum) done.append(match.groupdict()['rawId']) outputDir = match.groupdict()['outputDir'] if os.path.exists(outputDir): if 'GC_WC.tar.gz' in os.listdir(outputDir): wildcardTar = os.path.join(outputDir, 'GC_WC.tar.gz') try: tarfile.TarFile.open(wildcardTar, 'r:gz').extractall(outputDir) os.unlink(wildcardTar) except Exception: utils.eprint("Can't unpack output files contained in %s" % wildcardTar) yield (currentJobNum, outputDir) currentJobNum = None retCode = proc.wait() if retCode != 0: if 'Keyboard interrupt raised by user' in proc.getError(): utils.removeFiles([log, basePath]) raise StopIteration else: proc.logError(self.errorLog, log = log) utils.eprint('Trying to recover from error ...') for dirName in os.listdir(basePath): yield (None, os.path.join(basePath, dirName)) del activity # return unretrievable jobs for jobNum in todo: yield (jobNum, None) purgeLog = tempfile.mktemp('.log') purgeProc = utils.LoggedProcess(self._purgeExec, '--noint --logfile "%s" %s' % (purgeLog, " ".join(done))) retCode = purgeProc.wait() if retCode != 0: if self.explainError(purgeProc, retCode): pass else: proc.logError(self.errorLog, log = purgeLog, jobs = done) utils.removeFiles([log, purgeLog, basePath])
def getHash(self): return md5(self._psource.getHash() + str(self.times)).hexdigest()
def getHash(self): return md5(self._psource.getHash() + str([self._posStart, self._posEnd])).hexdigest()
def _getJobsOutput(self, ids): if len(ids) == 0: raise StopIteration basePath = os.path.join(self._outputPath, 'tmp') try: if len(ids) == 1: # For single jobs create single subdir tmpPath = os.path.join(basePath, md5(ids[0][0]).hexdigest()) else: tmpPath = basePath utils.ensureDirExists(tmpPath) except Exception: raise BackendError( 'Temporary path "%s" could not be created.' % tmpPath, BackendError) jobNumMap = dict(ids) jobs = self.writeWMSIds(ids) activity = Activity('retrieving %d job outputs' % len(ids)) proc = LocalProcess(self._outputExec, '--noint', '--logfile', '/dev/stderr', '-i', jobs, '--dir', tmpPath) # yield output dirs todo = jobNumMap.values() currentJobNum = None for line in imap(str.strip, proc.stdout.iter(timeout=60)): if line.startswith(tmpPath): todo.remove(currentJobNum) outputDir = line.strip() if os.path.exists(outputDir): if 'GC_WC.tar.gz' in os.listdir(outputDir): wildcardTar = os.path.join(outputDir, 'GC_WC.tar.gz') try: tarfile.TarFile.open(wildcardTar, 'r:gz').extractall(outputDir) os.unlink(wildcardTar) except Exception: self._log.error( 'Can\'t unpack output files contained in %s', wildcardTar) yield (currentJobNum, line.strip()) currentJobNum = None else: currentJobNum = jobNumMap.get(self._createId(line), currentJobNum) retCode = proc.status(timeout=0, terminate=True) activity.finish() if retCode != 0: if 'Keyboard interrupt raised by user' in proc.stderr.read( timeout=0): utils.removeFiles([jobs, basePath]) raise StopIteration else: self._log.log_process(proc, files={'jobs': SafeFile(jobs).read()}) self._log.error('Trying to recover from error ...') for dirName in os.listdir(basePath): yield (None, os.path.join(basePath, dirName)) # return unretrievable jobs for jobNum in todo: yield (jobNum, None) utils.removeFiles([jobs, basePath])
def getHash(self): return md5(str(map(lambda p: str(p.getMaxParameters()) + p.getHash(), self._psourceList))).hexdigest()
def getHash(self): return md5(self.plugin.getHash() + str(self.times)).hexdigest()
def getHash(self): return md5(self.plugin.getHash() + str([self.posStart, self.posEnd])).hexdigest()
def getHash(self): return md5(str(self.key) + str([self.fmt, self.source, self.default])).hexdigest()
def getHash(self): return md5(str(self.key) + str(self.seed)).hexdigest()
def getHash(self): return md5(str(self.key) + str([self.low, self.high])).hexdigest()
def getHash(self): return md5(str(self.key) + str(self.value)).hexdigest()
def _prepareSubmit(self, task, jobNumList, queryArguments): jdlFilePath = os.path.join(self.parentPool.getSandboxPath(), 'htc-%s.schedd-%s.jdl' % (self.parentPool.wmsName,md5(self.getURI()).hexdigest())) utils.safeWrite(open(jdlFilePath, 'w'), lmap(lambda line: line + '\n', self._getJDLData(task, jobNumList, queryArguments))) return jdlFilePath
def _prepareSubmit(self, task, jobNumList, queryArguments): localJdlFilePath = os.path.join(self.parentPool.getSandboxPath(), 'htc-%s.schedd-%s.jdl' % (self.parentPool.wmsName,md5(self.getURI()).hexdigest())) readyJobNumList = self._stageSubmitFiles(task, jobNumList) utils.safeWrite(open(localJdlFilePath, 'w'), lmap(lambda line: line + '\n', self._getJDLData(task, readyJobNumList, queryArguments))) raise NotImplementedError('JDL must get moved to remote') return jdlFilePath
def generateKey(self, keys, base, path, metadata, events, seList, objStore): return md5(repr(base) + repr(seList) + repr(map(lambda k: metadata.get(k, None), keys))).hexdigest()