if printTime: sys.stderr.write('%s - ' % time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())) sys.stderr.write('%s%s' % (text, QM(newline, '\n', ''))) def getVersion(): try: version = LoggedProcess('svnversion', '-c %s' % pathGC()).getOutput(True).strip() if version != '': if 'stable' in LoggedProcess('svn info', pathGC()).getOutput(True): return '%s - stable' % version return '%s - testing' % version except: pass return 'unknown' getVersion = lru_cache(getVersion) def wait(timeout): shortStep = map(lambda x: (x, 1), range(max(timeout - 5, 0), timeout)) for x, w in map(lambda x: (x, 5), range(0, timeout - 5, 5)) + shortStep: if abort(): return False log = ActivityLog('waiting for %d seconds' % (timeout - x)) time.sleep(w) del log return True class ActivityLog: class Activity:
class TaskModule(NamedPlugin): configSections = NamedPlugin.configSections + ['task'] tagName = 'task' # Read configuration options and init vars def __init__(self, config, name): NamedPlugin.__init__(self, config, name) initSandbox = changeInitNeeded('sandbox') self._varCheck = validNoVar(config) # Task requirements jobs_config = config.changeView( viewClass='TaggedConfigView', addSections=['jobs'], addTags=[self]) # Move this into parameter manager? self.wallTime = jobs_config.getTime('wall time', onChange=None) self.cpuTime = jobs_config.getTime('cpu time', self.wallTime, onChange=None) self.cpus = jobs_config.getInt('cpus', 1, onChange=None) self.memory = jobs_config.getInt('memory', -1, onChange=None) self.nodeTimeout = jobs_config.getTime('node timeout', -1, onChange=initSandbox) # Compute / get task ID self.taskID = config.get('task id', 'GC' + md5_hex(str(time()))[:12], persistent=True) self.taskDate = config.get('task date', strftime('%Y-%m-%d'), persistent=True, onChange=initSandbox) self.taskConfigName = config.getConfigName() self._job_name_generator = config.getPlugin('job name generator', 'DefaultJobName', cls=JobNamePlugin, pargs=(self, )) # Storage setup storage_config = config.changeView(viewClass='TaggedConfigView', setClasses=None, setNames=None, addSections=['storage'], addTags=[self]) self.taskVariables = { # Space limits 'SCRATCH_UL': storage_config.getInt('scratch space used', 5000, onChange=initSandbox), 'SCRATCH_LL': storage_config.getInt('scratch space left', 1, onChange=initSandbox), 'LANDINGZONE_UL': storage_config.getInt('landing zone space used', 100, onChange=initSandbox), 'LANDINGZONE_LL': storage_config.getInt('landing zone space left', 1, onChange=initSandbox), } storage_config.set('se output pattern', 'job_@GC_JOB_ID@_@X@') self.seMinSize = storage_config.getInt('se min size', -1, onChange=initSandbox) self.sbInputFiles = config.getPaths('input files', [], onChange=initSandbox) self.sbOutputFiles = config.getList('output files', [], onChange=initSandbox) self.gzipOut = config.getBool('gzip output', True, onChange=initSandbox) self.substFiles = config.getList('subst files', [], onChange=initSandbox) self.dependencies = lmap( str.lower, config.getList('depends', [], onChange=initSandbox)) # Get error messages from gc-run.lib comments self.errorDict = {} self.updateErrorDict(utils.pathShare('gc-run.lib')) # Init parameter source manager self._setupJobParameters(config) self._pfactory = config.getPlugin('internal parameter factory', 'BasicParameterFactory', cls=ParameterFactory, tags=[self], inherit=True) self.source = config.getPlugin('parameter adapter', 'TrackedParameterAdapter', cls=ParameterAdapter, pargs=(self._pfactory.getSource(), )) def _setupJobParameters(self, config): pass # Read comments with error codes at the beginning of file: # <code> - description def updateErrorDict(self, fileName): for line in ifilter(lambda x: x.startswith('#'), SafeFile(fileName).readlines()): tmp = lmap(str.strip, line.lstrip('#').split(' - ', 1)) if tmp[0].isdigit() and (len(tmp) == 2): self.errorDict[int(tmp[0])] = tmp[1] # Get environment variables for gc_config.sh def getTaskConfig(self): taskConfig = { # Storage element 'SE_MINFILESIZE': self.seMinSize, # Sandbox 'SB_OUTPUT_FILES': str.join(' ', self.getSBOutFiles()), 'SB_INPUT_FILES': str.join(' ', imap(lambda x: x.pathRel, self.getSBInFiles())), # Runtime 'GC_JOBTIMEOUT': self.nodeTimeout, 'GC_RUNTIME': self.getCommand(), # Seeds and substitutions 'SUBST_FILES': str.join(' ', imap(os.path.basename, self.getSubstFiles())), 'GC_SUBST_OLD_STYLE': str('__' in self._varCheck.markers).lower(), # Task infos 'GC_TASK_CONF': self.taskConfigName, 'GC_TASK_DATE': self.taskDate, 'GC_TASK_ID': self.taskID, 'GC_VERSION': utils.getVersion(), } return utils.mergeDicts([taskConfig, self.taskVariables]) getTaskConfig = lru_cache(getTaskConfig) # Get job dependent environment variables def getJobConfig(self, jobNum): tmp = self.source.getJobInfo(jobNum) return dict( imap(lambda key: (str(key), tmp.get(key, '')), self.source.getJobKeys())) def getTransientVars(self): return { 'GC_DATE': strftime("%F"), 'GC_TIMESTAMP': strftime("%s"), 'GC_GUID': strGuid( str.join( "", imap(lambda x: "%02x" % x, imap(random.randrange, [256] * 16)))), 'RANDOM': str(random.randrange(0, 900000000)) } def getVarNames(self): # Take task variables and the variables from the parameter source return lchain([self.getTaskConfig().keys(), self.source.getJobKeys()]) def getVarMapping(self): # Transient variables transients = ['GC_DATE', 'GC_TIMESTAMP', 'GC_GUID'] # these variables are determined on the WN # Alias vars: Eg. __MY_JOB__ will access $GC_JOB_ID - used mostly for compatibility alias = { 'DATE': 'GC_DATE', 'TIMESTAMP': 'GC_TIMESTAMP', 'GUID': 'GC_GUID', 'MY_JOBID': 'GC_JOB_ID', 'MY_JOB': 'GC_JOB_ID', 'JOBID': 'GC_JOB_ID', 'GC_JOBID': 'GC_JOB_ID', 'CONF': 'GC_CONF', 'TASK_ID': 'GC_TASK_ID' } varNames = self.getVarNames() + transients alias.update(dict(izip(varNames, varNames))) # include reflexive mappings return alias def substVars(self, name, inp, jobNum=None, addDict=None, check=True): addDict = addDict or {} allVars = utils.mergeDicts([addDict, self.getTaskConfig()]) if jobNum is not None: allVars.update(self.getJobConfig(jobNum)) subst = lambda x: utils.replaceDict( x, allVars, ichain([self.getVarMapping().items(), izip(addDict, addDict)])) result = subst(subst(str(inp))) if check and self._varCheck.check(result): raise ConfigError('%s references unknown variables: %s' % (name, result)) return result def validateVariables(self): example_vars = dict.fromkeys(self.getVarNames(), '') example_vars.update( dict.fromkeys([ 'X', 'XBASE', 'XEXT', 'GC_DATE', 'GC_TIMESTAMP', 'GC_GUID', 'RANDOM' ], '')) for name, value in ichain( [self.getTaskConfig().items(), example_vars.items()]): self.substVars(name, value, None, example_vars) # Get job requirements def getRequirements(self, jobNum): return [(WMS.WALLTIME, self.wallTime), (WMS.CPUTIME, self.cpuTime), (WMS.MEMORY, self.memory), (WMS.CPUS, self.cpus) ] + self.source.getJobInfo(jobNum)[ParameterInfo.REQS] def getSEInFiles(self): return [] # Get files for input sandbox def getSBInFiles(self): return lmap( lambda fn: utils.Result(pathAbs=fn, pathRel=os.path.basename(fn)), self.sbInputFiles) # Get files for output sandbox def getSBOutFiles(self): return list(self.sbOutputFiles) # Get files whose content will be subject to variable substitution def getSubstFiles(self): return list(self.substFiles) def getCommand(self): raise AbstractError def getJobArguments(self, jobNum): return '' def getMaxJobs(self): return self.source.getMaxJobs() def getDependencies(self): return list(self.dependencies) def getDescription(self, jobNum): # (task name, job name, job type) return utils.Result(taskName=self.taskID, jobType=None, jobName=self._job_name_generator.getName(jobNum)) def report(self, jobNum): keys = lfilter(lambda k: not k.untracked, self.source.getJobKeys()) return utils.filterDict(self.source.getJobInfo(jobNum), kF=lambda k: k in keys) def canFinish(self): return self.source.canFinish() def canSubmit(self, jobNum): return self.source.canSubmit(jobNum) # Called on job submission def getSubmitInfo(self, jobNum): return {} # Intervene in job management - return None or (redoJobs, disableJobs) def getIntervention(self): return self.source.resync()
class HTCScheddSSH(HTCScheddCLIBase): adapterTypes = ["ssh", "gsissh"] _submitScale = 20 _adapterMaxWait = 30 def __init__(self, URI="", adapter=None, parentPool=None): HTCScheddCLIBase.__init__(self, URI=URI, adapter=adapter, parentPool=parentPool) self._stageDirCache = {} def getTimings(self): return utils.Result(waitOnIdle=60, waitBetweenSteps=10) def getJobsOutput(self, htcIDs): retrievedJobs = [] for index, htcID in enumerate(htcIDs): self._log( logging.DEBUG3, "Retrieving job files (%d/%d): %s" % (index, len(htcIDs), jobData[0])) getProcess = self._adapter.LoggedGet( self.getStagingDir(htcID), self.parentPool.getSandboxPath(htcID.jobNum)) if getProcess.wait(timeout=self._adapterMaxWait): getProcess.logError(self.parentPool.errorLog, brief=True) self._log(logging.INFO1, "Retrieval failed for job %d." % (jobData[0])) else: retrievedJobs.append(htcID) try: self.cleanStagingDir(htcID=htcID) except Exception: self._log(logging.DEFAULT, 'Unable to clean staging dir') # clean up task dir if no job(dir)s remain try: statProcess = self._adapter.LoggedExecute( 'find %s -maxdepth 1 -type d | wc -l' % self.getStagingDir(taskID=htcIDs[0].gctaskID)) if statProcess.wait(timeout=self._adapterMaxWait): statProcess.logError(self.parentPool.errorLog, brief=True) raise BackendError( 'Failed to check remote dir for cleanup : %s @ %s' % (self.getStagingDir(taskID=htcIDs[0].gctaskID))) elif (int(checkProcess.getOutput()) == 1): self.cleanStagingDir(taskID=htcIDs[0].gctaskID) except Exception: self._log(logging.DEFAULT, 'unable to clean task dir') return retrievedJobs def _prepareSubmit(self, task, jobNumList, queryArguments): localJdlFilePath = os.path.join( self.parentPool.getSandboxPath(), 'htc-%s.schedd-%s.jdl' % (self.parentPool.wmsName, md5(self.getURI()).hexdigest())) readyJobNumList = self._stageSubmitFiles(task, jobNumList) utils.safeWrite( open(localJdlFilePath, 'w'), lmap(lambda line: line + '\n', self._getJDLData(task, readyJobNumList, queryArguments))) raise NotImplementedError('JDL must get moved to remote') return jdlFilePath def _getJDLData(self, task, jobNumList, queryArguments): taskFiles, proxyFile, jobFileMap = self._getSubmitFileMap( task, jobNumList) jdlData = self._getBaseJDLData(task, queryArguments) jdlData.extend([ 'Executable = %s' % taskFiles[0][2], ]) if proxyFile: jdlData.extend([ 'use_x509userproxy = True', 'x509userproxy = %s' % proxyFile[2], ]) for jobNum in jobNumList: jdlData.extend(self._getRequirementJdlData(task, jobNum)) jobStageDir = self.getStagingDir( htcID=HTCJobID(gcJobNum=jobNum, gcTaskID=task.taskID)) jdlData.extend([ '+GcJobNum = "%s"' % jobNum, 'arguments = %s' % jobNum, 'initialdir = %s' % jobStageDir, 'Output = %s' % os.path.join(jobStageDir, 'gc.stdout'), 'Error = %s' % os.path.join(jobStageDir, 'gc.stderr'), # HACK: ignore executable (In[0]), stdout (Out[0]) and stderr (Out[1]) 'transfer_input_files = %s' % ','.join([ schd for descr, gc, schd in taskFiles[1:] + jobFileMap[jobNum] ]), 'transfer_output_files = %s' % ','.join([ src for descr, src, trg in self.parentPool._getSandboxFilesOut(task)[2:] ]), '+rawID = "%s"' % HTCJobID(gcJobNum=jobNum, gcTaskID=task.taskID, clusterID='$(Cluster)', procID='$(Process)', scheddURI=self.getURI(), typed=False).rawID, ]) return jdlData # internal interfaces for HTC Pool/Schedds def _getSubmitFileMap(self, task, jobNumList): """ Get listed files for submission Returns: taskFiles iterable as (descr, gcPath, scheddPath) files shared by all jobs jobsFileMap map of jobNum to iterable as (descr, gcPath, scheddPath) files per individual job """ taskFiles = [] def mapSBFiles(desrc, path, base): return (descr, path, os.path.join(self.getStagingDir(taskID=task.taskID), base)) taskFiles.extend( ismap(mapSBFiles, self.parentPool._getSandboxFilesIn(task))) proxyFile = () try: for authFile in parentPool.proxy.getauthFiles(): proxyFile = ('User Proxy', authFile, os.path.join( self.getStagingDir(taskID=task.taskID), os.path.basename(authFile))) except Exception: clear_current_exception() jobFileMap = {} for jobNum in jobNumList: jcFull, jcBase = self.getJobCfgPath(jobNum) jobsFileMap[jobNum] = ('Job Config %d' % jobNum, jcFull, os.path.join( self.getStagingDir(taskID=task.taskID), jcBase)) return taskFiles, proxyFile, jobFileMap def _stageSubmitFiles(self, task, jobNumList): """ Stage submission files at scheduler. """ taskFiles, proxyFile, jobFileMap = self._getSubmitFileMap( task, jobNumList) self._log(logging.DEBUG1, "Staging task files.") stagedJobs = [] if proxyFile: taskFiles.append(proxyFile) for index, fileInfoBlob in enumerate(taskFiles): self._log( logging.DEBUG3, "Staging task files (%d/%d): %s" % (index, len(taskFiles), fileInfoBlob[0])) putProcess = self._adapter.LoggedPut(fileInfoBlob[1], fileInfoBlob[2]) if putProcess.wait(timeout=self._adapterMaxWait): putProcess.logError(self.parentPool.errorLog, brief=True) self._log( logging.INFO1, "Staging failure. Aborting submit." % (fileInfoBlob[0])) return stagedJobs for jobNum, jobFiles in jobFileMap: try: for fileInfoBlob in jobFiles: self._log(logging.DEBUG3, "Staging job files: %s" % (fileInfoBlob[0])) putProcess = self._adapter.LoggedPut( fileInfoBlob[1], fileInfoBlob[2]) if putProcess.wait(timeout=self._adapterMaxWait): putProcess.logError(self.parentPool.errorLog, brief=True) try: self.cleanStagingDir( htcID=HTCJobID(jobNum, task.taskID)) except Exception: self._log(logging.INFO1, 'unable to clean staging dir') raise BackendError except BackendError: continue else: stagedJobs.append(jobNum) return stagedJobs def _getStagingToken(self, htcID=None, taskID=None): """Construct the key for a staging directory""" try: return 'taskID.%s/job_%s' % (htcID.gctaskID, htcID.gcJobNum) except AttributeError: if taskID: return 'taskID.%s' % taskID return '' _getStagingToken = lru_cache(_getStagingToken) def getStagingDir(self, htcID=None, taskID=None): token = self._getStagingToken(htcID=htcID, taskID=taskID) try: return self._stageDirCache[token] except KeyError: stageDirBase = os.path.join('GC.work', token, '') stageDirPath = self._adapter.getDomainAbsPath(stageDirBase) # -m 744 -> rwxr--r-- mkdirProcess = self._adapter.LoggedExecute("mkdir -m 744 -p", stageDirPath) if mkdirProcess.wait(timeout=self._adapterMaxWait): mkdirProcess.logError(self.parentPool.errorLog, brief=True) raise BackendError('Failed to create remote dir : %s @ %s' % (stageDirPath, self.getDomain())) self._stageDirCache[token] = stageDirPath return stageDirPath def cleanStagingDir(self, htcID=None, taskID=None): token = self._getStagingToken(htcID=htcID, taskID=taskID) try: stageDirPath = self.getStagingDir(htcID=htcID, taskID=taskID) except BackendError: return rmdirProcess = self._adapter.LoggedExecute("rm -rf", stageDirPath) if rmdirProcess.wait(timeout=self._adapterMaxWait): rmdirProcess.logError(self.parentPool.errorLog, brief=True) raise BackendError('Failed to clean remote dir : %s @ %s' % (stageDirPath, self.getDomain())) del self._stageDirCache[token]
def getVersion(): try: proc_ver = LocalProcess('svnversion', '-c', pathPKG()) version = proc_ver.get_output(timeout=10).strip() if version != '': assert (any(imap(str.isdigit, version))) proc_branch = LocalProcess('svn info', pathPKG()) if 'stable' in proc_branch.get_output(timeout=10): return '%s - stable' % version return '%s - testing' % version except Exception: pass return __import__('grid_control').__version__ + ' or later' getVersion = lru_cache(getVersion) def wait(timeout): shortStep = lmap(lambda x: (x, 1), irange(max(timeout - 5, 0), timeout)) for x, w in lmap(lambda x: (x, 5), irange(0, timeout - 5, 5)) + shortStep: if abort(): return False log = ActivityLog('waiting for %d seconds' % (timeout - x)) time.sleep(w) del log return True class ActivityLog: class Activity:
class SSHProcessAdapter(ProcessAdapterInterface): uriScheme = ["ssh"] uriRepr = "[ssh://][<user>@]<host>[:<port>][/<path>]" def __init__(self, URI, **kwargs): """ Required: URI string remote target identity as [ssh://][<user>@]<host>[:<port>][/<path>] Optional: retryLimit int limit for any failures before the connection is defunct needSocket bool reject connection if socket cannot be established socketDir string location for placing socket socketMinSec float/int minimum remaining lifetime of socket before refresh socketCount int maximum number of sockets in use """ ProcessAdapterInterface.__init__(self, URI, **kwargs) (_, self._user, self._host, self._port, self._basepath) = self.resolveURI(URI, **kwargs) self._initInterfaces(**kwargs) self._initSockets(**kwargs) # always clean up on termination, even outside of context atexit.register(self.__exit__, None, None, None) # test connection once before usage self._validateConnection() self._basepath = self._basepath or self.LoggedExecute( "pwd").getOutput().strip() def __enter__(self): self def __exit__(self, exc_type, exc_value, traceback): self._log(logging.DEBUG1, "Exiting context for URI '%s'" % self.URI) for socket in self._socketProcs: if self._socketProcs[socket].poll() < 0: self._socketProcs[socket].kill() self._log( logging.DEBUG3, 'Terminated master for socket %s - PID: %s' % (socket, self._socketProcs[socket].proc.pid)) time.sleep( 0.2) # delay for sockets to disappear before further cleanup shutil.rmtree(self._socketDir) # Logged Processes def LoggedExecute(self, command, args='', niceCmd=None, niceArgs=None): return LoggedProcess( self._exeWrapper.cmd, args=self._exeWrapper.args(command=command, args=args), niceCmd=self._exeWrapper.niceCmd(command=(niceCmd or command)), niceArgs=self._exeWrapper.niceArg(args=(niceArgs or args)), shell=False, ) def LoggedGet(self, source, destination): return LoggedProcess( self._copy.cmd, self._copy.args(source=self.getGlobalAbsPath(source), destination=destination), niceCmd=self._copy.niceCmd(), niceArgs=self._copy.niceArg(source=self.getGlobalAbsPath(source), destination=destination), shell=False, ) def LoggedPut(self, source, destination): return LoggedProcess( self._copy.cmd, self._copy.args(source=source, destination=self.getGlobalAbsPath(destination)), niceCmd=self._copy.niceCmd(), niceArgs=self._copy.niceArg( source=source, destination=self.getGlobalAbsPath(destination)), shell=False, ) def LoggedDelete(self, target): return LoggedProcess( self._delete.cmd, self._delete.args({"target": target}), niceCmd=self._delete.niceCmd(), niceArgs=self._delete.niceArg({"target": target}), shell=False, ) def getDomain(self): return self._host def getType(self): return "ssh" def getProtocol(self): return "ssh" def getLoopbackURI(self, _cachedURI={}): try: return _cachedURI[self] except KeyError: _cachedURI[self] = None remoteHostname = self.LoggedExecute('hostname').getOutput( wait=True).strip() localHostname = socket.gethostname().strip() remoteAdress = socket.gethostbyname(remoteHostname) localAdress = socket.gethostbyname(localHostname) self._log( logging.DEBUG1, "'Checking host/IP for loopback - local: '%s/%s', remote: '%s/%s'" % (localHostname, localAdress, remoteHostname, remoteAdress)) if socket.gethostbyname(remoteHostname) == socket.gethostbyname( localHostname): _cachedURI[self] = LocalProcessAdapter.createURI({ 'user': self._user, 'host': self._host, 'port': self._port, 'path': self._basepath }) return _cachedURI[self] def LoggedSocket(self, command="", args='', niceCmd=None, niceArgs=None): return LoggedProcess( self._socketWrapper.cmd, args=self._socketWrapper.args(command=command, args=args), niceCmd=self._socketWrapper.niceCmd(command=(niceCmd or command)), niceArgs=self._exeWrapper.niceArg(args=(niceArgs or args)), shell=False, ) # general internal functions def resolveURI(cls, URI, **kwargs): reMatch = re.search( r'(?:(\w*)://)?(?:(\w*)@)?([\w.-]*)(?::(\d*))?(?:/(.*))?(.*)', URI) if not reMatch: raise ValueError("URI %s could not be parsed" % URI) (scheme, user, host, port, path, leftover) = reMatch.group(1, 2, 3, 4, 5, 6) cls._log( logging.DEBUG1, "Resolved URI '%s' as %s" % (URI, { 'scheme': scheme, 'user': user, 'host': host, 'port': port, 'path': path, 'remainder': leftover })) if (scheme) and (scheme not in cls.uriScheme): raise ValueError("Got URI of scheme '%s', expected '%s'." % (scheme, "' or '".join(cls.uriScheme))) if leftover: raise ValueError( "URI %s yielded unexpected leftover '%s'. Expected URI form %s." % (URI, leftover, cls.uriRepr)) if not host: raise ValueError( "URI %s yielded no hostname. Expected URI form %s." % (URI, cls.uriRepr)) return (scheme, user, host, port, path) resolveURI = classmethod(resolveURI) def _initInterfaces(self, **kwargs): def makeArgList(*args): argList = [] for arg in args: try: if isinstance(arg, str): raise argList.extend(arg) except Exception: argList.append(arg) return [arg for arg in argList if arg] portArgs = lambda key: self._port and "-%s%s" % (key, self._port) or "" self._exeWrapper = CommandContainer( resolveInstallPath("ssh"), lambda **kwargs: makeArgList( self._getDefaultArgs(), self._getValidSocketArgs(), portArgs('p'), self._host, " ".join( (kwargs["command"], kwargs.get("args", '')))), lambda **kwargs: "'%(command)s' [via ssh %(URI)s]" % { "command": kwargs.get("command", "<undefined command>"), "URI": self.URI, }, lambda **kwargs: kwargs.get( 'args') and "Arguments: '%s'" % kwargs.get('args') or '') self._copy = CommandContainer( resolveInstallPath("scp"), lambda **kwargs: makeArgList( self._getDefaultArgs(), self._getValidSocketArgs(), "-r", portArgs('P'), kwargs["source"], portArgs('P'), kwargs["destination"], ), lambda **kwargs: "'scp' [%(URI)s]", lambda **kwargs: "Transfer: '%(source)' -> '%(destination)'" % kwargs, ) self._delete = CommandContainer( resolveInstallPath("ssh"), lambda **kwargs: makeArgList( self._getDefaultArgs(), self._getValidSocketArgs(), portArgs('p'), self._host, "rm -rf " + kwargs["target"], ), lambda **kwargs: "'rm' [via ssh %(URI)s]" % kwargs, lambda **kwargs: "Target: '%(target)s'" % kwargs, ) self._socketWrapper = CommandContainer( resolveInstallPath("ssh"), lambda **kwargs: makeArgList( self._getDefaultArgs(), self._getCurrentSocketArgs(), portArgs('p'), self._host, " ".join( (kwargs["command"], kwargs.get("args", '')))), lambda **kwargs: "'%(command)s' [via ssh %(URI)s (master)]" % { "command": kwargs.get("command", "<undefined command>"), "URI": self.URI, }, lambda **kwargs: kwargs.get( 'args') and "Arguments: '%s'" % kwargs.get('args') or '') # Interface specific internal methods def _initSockets(self, **kwargs): self._needSocket = kwargs.get("needSocket", True) self._socketMinSec = kwargs.get("socketMinSec", 300) self._socketCount = max(2, kwargs.get("socketCount", 2)) self._socketIndex = 0 self._socketMaxMiss = kwargs.get("socketMaxMiss", 2) self._socketMisses = 0 # sockets should reside in secure, managed directory if kwargs.get("socketDir", "") and len(kwargs.get("socketDir")) < 105: self._socketDir = kwargs.get("socketDir") ensureDirExists(self._socketDir, name="SSH connection socket container directory") else: self._socketDir = tempfile.mkdtemp() self._log(logging.DEBUG1, 'Using socket directoy %s' % self._socketDir) # create list of socket names and corresponding arguments to rotate through self._socketList = [ os.path.join(self._socketDir, str(socketIndex)) for socketIndex in irange(self._socketCount) ] self._socketArgList = [[ "-oControlMaster=auto", "-oControlPath=%s" % socket ] for socket in self._socketList] self._socketProcs = {} def _incrementSocket(self): self._socketIndex = (self._socketIndex + 1) % self._socketCount def _getCurrentSocket(self): return self._socketList[self._socketIndex] def _getCurrentSocketArgs(self): return self._socketArgList[self._socketIndex] def _getValidSocketArgs(self): if self._socketMisses >= self._socketMaxMiss: self._socketMisses -= 1 return [] # validate that current socket does exist and is fresh enough, else pick next try: if (time.time() - os.path.getctime( self._getCurrentSocket())) > self._socketMinSec: raise OSError except OSError: self._incrementSocket() while not self._validateControlMaster(): self._socketMisses += 1 if not self._needSocket: self._log( logging.INFO3, 'Failed to validate socket. (%d/%d)' % (self._socketMisses, self._socketMaxMiss)) if self._socketMisses == self._socketMaxMiss: self._socketMisses + self._socketMaxMiss self._log( logging.INFO2, 'Disabling failing sockets for %d operations.' % self._socketMaxMiss) return [] if self._socketMisses == self._socketMaxMiss: raise BackendError("Repeated failure to create ControlMaster.") self._socketMisses = max(self._socketMisses - 1, 0) return self._getCurrentSocketArgs() def _validateControlMaster(self, timeout=20): # socket already exists, so Master is fresh or undying if os.path.exists(self._getCurrentSocket()): return True # create dummy background process, encapsuling sleep to stay alive regardless of SSH version socketProcess = self.LoggedSocket( "sleep", "%d" % ((self._socketCount - 0.5) * self._socketMinSec)) # validate socket exists waitTime = 0 while not os.path.exists(self._getCurrentSocket()): if socketProcess.poll() > 0: self._log( logging.DEBUG1, "Failure on ControlMaster socket creation [code: %s]." % socketProcess.poll()) if self._errorLog: socketProcess.logError(self._errorLog) return False time.sleep(0.5) waitTime += 0.5 if waitTime == timeout: self._log( logging.DEBUG1, "Timeout (%ds) on ControlMaster socket creation." % timeout) socketProcess.kill() if self._errorLog: socketProcess.logError(self._errorLog) return False self._socketProcs[self._getCurrentSocket()] = socketProcess return True # Helper functions for SSH connections def _getDefaultArgs(self): """Provide arguments for ssh container""" argString = ["-v", "-oBatchMode=yes", "-oForwardX11=no"] return argString def getGlobalAbsPath(self, path): abspath = (self._user and self._user + "@" or "") + self._host + ":" + self.getDomainAbsPath(path) return abspath getGlobalAbsPath = lru_cache(getGlobalAbsPath)
class ProcessAdapterInterface(Plugin): uriScheme = [] _basepath = "" # Default loggers _logger = logging.getLogger('process.adapter') _log = _logger.log # python internals def __init__(self, URI, **kwargs): self.URI = URI self._errorLog = kwargs.get('errorLog') self._initLogger(**kwargs) self._log( logging.INFO1, 'Establishing process adapter of type %s' % self.__class__.__name__) def __enter__(self): raise NotImplementedError def __exit__(self, exc_type, exc_value, traceback): raise NotImplementedError # public interfaces def LoggedExecute(self, command, args='', niceCmd=None, niceArgs=None): """Execute a command via the adapter shell""" raise AbstractError def LoggedGet(self, source, destination): """Move a source file/folder from the adapter domain to the local domain destination""" raise AbstractError def LoggedPut(self, source, destination): """Move a source file/folder from the local domain to the adapter domain destination""" raise AbstractError def LoggedDelete(self, targets): """Delete a file/folder in the adapter domain""" raise AbstractError def getDomain(self): """Get a descriptive representation of the adapter domain""" raise AbstractError def getType(self): """Get a descriptive representation of the adapter interfacing method""" raise AbstractError def getProtocol(self): """Get a descriptive representation of the adapter interfacing protocol""" raise AbstractError def isLoopback(self): """Check if this adapter is a loopback.""" return bool(self.getLoopbackURI()) def getLoopbackURI(self): """Return a local URI if this adapter is a loopback""" raise AbstractError def getDomainAbsPath(self, path): """Translate any path to an absolute one in the adapter domain""" abspath = path if not path.startswith("/") and self._basepath: abspath = self._basepath + "/" + abspath return abspath getDomainAbsPath = lru_cache(getDomainAbsPath) def getGlobalAbsPath(self, path): """Translate any path to an absolute one in the executing GC domain""" raise AbstractError # general internal functions def resolveURI(cls, URI=None, **kwargs): """ Extract the adapter information for a given URI raises ValueError if the URI is not compatible """ raise ValueError resolveURI = classmethod(resolveURI) def _initInterfaces(self, **kwargs): raise AbstractError def _initLogger(cls, **kwargs): cls._logger = logging.getLogger('process.adapter.%s' % cls.__name__) cls._log = cls._logger.log _initLogger = classmethod(_initLogger) def _validateConnection(self): """ Test the connection of this adapter raises BackendError if the connection exits unsuccessfully raises InstallationError if stdout is not clean """ self._log(logging.INFO2, "Validating adapter for URI '%s'" % self.URI) testProcess = self.LoggedExecute("exit 0") stdProcess = self.LoggedExecute("echo stdout; echo stderr >&2; exit 0") for proc in [testProcess, stdProcess]: if proc.wait() != os.EX_OK: if self._errorLog: proc.logError(self._errorLog) raise BackendError( "Failure when validating connection to '%s'." % self.getDomain) if len(testProcess.getOutput()) != 0 or stdProcess.getOutput( ) != "stdout\n": raise InstallationError( "Output of processes from adapter for URI '%s' is either muted or poluted." % self.URI)