def _resolveInputSandbox( self, inputSandbox ): """ Internal function. Resolves wildcards for input sandbox files. This is currently linux specific and should be modified. """ resolvedIS = [] for i in inputSandbox: if not re.search( '\*', i ): if not os.path.isdir( i ): resolvedIS.append( i ) for name in inputSandbox: if re.search( '\*', name ): #escape the star character... cmd = 'ls -d ' + name output = shellCall( 10, cmd ) if not output['OK']: self.log.error( 'Could not perform: ', cmd ) elif output['Value'][0]: self.log.error(" Failed getting the files ", output['Value'][2]) else: files = output['Value'][1].split() for check in files: if os.path.isfile( check ): self.log.verbose( 'Found file ' + check + ' appending to Input Sandbox' ) resolvedIS.append( check ) if os.path.isdir( check ): if re.search( '/$', check ): #users can specify e.g. /my/dir/lib/ check = check[:-1] tarName = os.path.basename( check ) directory = os.path.dirname( check ) #if just the directory this is null if directory: cmd = 'tar cfz ' + tarName + '.tar.gz ' + ' -C ' + directory + ' ' + tarName else: cmd = 'tar cfz ' + tarName + '.tar.gz ' + tarName output = shellCall( 60, cmd ) if not output['OK']: self.log.error( 'Could not perform: %s' % ( cmd ) ) resolvedIS.append( tarName + '.tar.gz' ) self.log.verbose( 'Found directory ' + check + ', appending ' + check + '.tar.gz to Input Sandbox' ) if os.path.isdir( name ): self.log.verbose( 'Found specified directory ' + name + ', appending ' + name + '.tar.gz to Input Sandbox' ) if re.search( '/$', name ): #users can specify e.g. /my/dir/lib/ name = name[:-1] tarName = os.path.basename( name ) directory = os.path.dirname( name ) #if just the directory this is null if directory: cmd = 'tar cfz ' + tarName + '.tar.gz ' + ' -C ' + directory + ' ' + tarName else: cmd = 'tar cfz ' + tarName + '.tar.gz ' + tarName output = shellCall( 60, cmd ) if not output['OK']: self.log.error( 'Could not perform: %s' % ( cmd ) ) else: resolvedIS.append( tarName + '.tar.gz' ) return resolvedIS
def getDiskSpace( path = '.' ): """ Get the free disk space in the partition containing the path. The disk space is reported in MBytes. Returned 0 in case of any error, e.g. path does not exist """ if not os.path.exists( path ): return -1 comm = 'df -P -m %s | tail -1' % path resultDF = shellCall( 10, comm ) if resultDF['OK'] and not resultDF['Value'][0]: output = resultDF['Value'][1] if output.find( ' /afs' ) >= 0 : # AFS disk space comm = 'fs lq | tail -1' resultAFS = shellCall( 10, comm ) if resultAFS['OK'] and not resultAFS['Value'][0]: output = resultAFS['Value'][1] fields = output.split() quota = long( fields[1] ) used = long( fields[2] ) space = ( quota - used ) / 1024 return int( space ) else: return -1 else: fields = output.split() try: value = int( fields[3] ) except Exception, error: print "Exception during disk space evaluation:", str( error ) value = -1 return value
def getDiskSpace(path='.'): """ Get the free disk space in the partition containing the path. The disk space is reported in MBytes. Returned 0 in case of any error, e.g. path does not exist """ if not os.path.exists(path): return -1 comm = 'df -P -m %s | tail -1' % path resultDF = shellCall(10, comm) if resultDF['OK'] and not resultDF['Value'][0]: output = resultDF['Value'][1] if output.find(' /afs') >= 0: # AFS disk space comm = 'fs lq | tail -1' resultAFS = shellCall(10, comm) if resultAFS['OK'] and not resultAFS['Value'][0]: output = resultAFS['Value'][1] fields = output.split() quota = long(fields[1]) used = long(fields[2]) space = (quota - used) / 1024 return int(space) else: return -1 else: fields = output.split() try: value = int(fields[3]) except Exception, error: print "Exception during disk space evaluation:", str(error) value = -1 return value
def _resolveInputSandbox( self, inputSandbox ): """ Internal function. Resolves wildcards for input sandbox files. This is currently linux specific and should be modified. """ resolvedIS = [] for i in inputSandbox: if not re.search( '\*', i ): if not os.path.isdir( i ): resolvedIS.append( i ) for name in inputSandbox: if re.search( '\*', name ): #escape the star character... cmd = 'ls -d ' + name output = shellCall( 10, cmd ) if not output['OK']: self.log.error( 'Could not perform: ', cmd ) elif output['Value'][0]: self.log.error(" Failed getting the files ", output['Value'][2]) else: files = output['Value'][1].split() for check in files: if os.path.isfile( check ): self.log.verbose( 'Found file ' + check + ' appending to Input Sandbox' ) resolvedIS.append( check ) if os.path.isdir( check ): if re.search( '/$', check ): #users can specify e.g. /my/dir/lib/ check = check[:-1] tarname = os.path.basename( check ) directory = os.path.dirname( check ) #if just the directory this is null if directory: cmd = 'tar cfz ' + tarname + '.tar.gz ' + ' -C ' + directory + ' ' + tarname else: cmd = 'tar cfz ' + tarname + '.tar.gz ' + tarname output = shellCall( 60, cmd ) if not output['OK']: self.log.error( 'Could not perform: %s' % ( cmd ) ) resolvedIS.append( tarname + '.tar.gz' ) self.log.verbose( 'Found directory ' + check + ', appending ' + check + '.tar.gz to Input Sandbox' ) if os.path.isdir( name ): self.log.verbose( 'Found specified directory ' + name + ', appending ' + name + '.tar.gz to Input Sandbox' ) if re.search( '/$', name ): #users can specify e.g. /my/dir/lib/ name = name[:-1] tarname = os.path.basename( name ) directory = os.path.dirname( name ) #if just the directory this is null if directory: cmd = 'tar cfz ' + tarname + '.tar.gz ' + ' -C ' + directory + ' ' + tarname else: cmd = 'tar cfz ' + tarname + '.tar.gz ' + tarname output = shellCall( 60, cmd ) if not output['OK']: self.log.error( 'Could not perform: %s' % ( cmd ) ) else: resolvedIS.append( tarname + '.tar.gz' ) return resolvedIS
def submitJob(self, executableFile, proxy, numberOfJobs=1): """ Method to submit job, should be overridden in sub-class. """ self.log.info("Executable file path: %s" % executableFile) if not os.access(executableFile, 5): os.chmod(executableFile, 0755) #Perform any other actions from the site admin if self.ceParameters.has_key('AdminCommands'): commands = self.ceParameters['AdminCommands'].split(';') for command in commands: self.log.verbose('Executing site admin command: %s' % command) result = shellCall(30, command, callbackFunction=self.sendOutput) if not result['OK'] or result['Value'][0]: self.log.error('Error during "%s":' % command, result) return S_ERROR('Error executing %s CE AdminCommands' % CE_NAME) # if no proxy is supplied, the executable can be submitted directly # otherwise a wrapper script is needed to get the proxy to the execution node # The wrapper script makes debugging more complicated and thus it is # recommended to transfer a proxy inside the executable if possible. if proxy: self.log.verbose('Setting up proxy for payload') submitFile = self.makeProxyExecutableFile(executableFile, proxy) else: # no proxy submitFile = executableFile # submit submitFile to the batch system cmd = "qsub -o %(output)s -e %(error)s -q %(queue)s -N DIRACPilot %(executable)s" % \ {'output': self.batchOutput, \ 'error': self.batchError, \ 'queue': self.queue, \ 'executable': os.path.abspath( submitFile ) } self.log.verbose('CE submission command: %s' % (cmd)) batchIDList = [] for i in range(numberOfJobs): result = shellCall(30, cmd) if not result['OK'] or result['Value'][0]: self.log.warn('===========>Torque CE result NOT OK') self.log.debug(result) return S_ERROR(result['Value']) else: self.log.debug('Torque CE result OK') batchID = result['Value'][1].strip() batchIDList.append(batchID) self.submittedJobs += 1 return S_OK(batchIDList)
def sudoExecute(self, executableFile, payloadProxy, payloadUsername, payloadUID, payloadGID): """Run sudo with checking of the exit status code.""" # We now implement a file giveaway using groups, to avoid any need to sudo to root. # Each payload user must have their own group. The pilot user must be a member # of all of these groups. This allows the pilot user to set the group of the # payloadProxy file to be that of the payload user. The payload user can then # read it and make a copy of it (/tmp/x509up_uNNNN) that it owns. Some grid # commands check that the proxy is owned by the current user so the copy stage # is necessary. # 1) Make sure the payload user can read its proxy via its per-user group os.chown(payloadProxy, -1, payloadGID) os.chmod(payloadProxy, stat.S_IRUSR + stat.S_IWUSR + stat.S_IRGRP) # 2) Now create a copy of the proxy owned by the payload user result = shellCall( 0, '/usr/bin/sudo -u %s sh -c "cp -f %s /tmp/x509up_u%d ; chmod 0400 /tmp/x509up_u%d"' % (payloadUsername, payloadProxy, payloadUID, payloadUID), callbackFunction=self.sendOutput, ) # 3) Make sure the current directory is +rwx by the pilot's group # (needed for InstallDIRAC but not for LHCbInstallDIRAC, for example) os.chmod(".", os.stat(".").st_mode | stat.S_IRWXG) # Run the executable (the wrapper in fact) cmd = "/usr/bin/sudo -u %s " % payloadUsername cmd += "PATH=$PATH " cmd += "DIRACSYSCONFIG=/scratch/%s/pilot.cfg " % os.environ.get( "USER", "") cmd += "LD_LIBRARY_PATH=$LD_LIBRARY_PATH " cmd += "PYTHONPATH=$PYTHONPATH " cmd += "X509_CERT_DIR=$X509_CERT_DIR " cmd += "X509_USER_PROXY=/tmp/x509up_u%d sh -c '%s'" % (payloadUID, executableFile) self.log.info("CE submission command is: %s" % cmd) self.runningJobs += 1 result = shellCall(0, cmd, callbackFunction=self.sendOutput) self.runningJobs -= 1 if not result["OK"]: result["Value"] = (0, "", "") return result resultTuple = result["Value"] status = resultTuple[0] stdOutput = resultTuple[1] stdError = resultTuple[2] self.log.info("Status after the sudo execution is %s" % str(status)) if status > 128: error = S_ERROR(status) error["Value"] = (status, stdOutput, stdError) return error return result
def submitJob( self, executableFile, proxy, numberOfJobs = 1 ): """ Method to submit job, should be overridden in sub-class. """ self.log.info( "Executable file path: %s" % executableFile ) if not os.access( executableFile, 5 ): os.chmod( executableFile, 0755 ) #Perform any other actions from the site admin if self.ceParameters.has_key( 'AdminCommands' ): commands = self.ceParameters['AdminCommands'].split( ';' ) for command in commands: self.log.verbose( 'Executing site admin command: %s' % command ) result = shellCall( 30, command, callbackFunction = self.sendOutput ) if not result['OK'] or result['Value'][0]: self.log.error( 'Error during "%s":' % command, result ) return S_ERROR( 'Error executing %s CE AdminCommands' % CE_NAME ) # if no proxy is supplied, the executable can be submitted directly # otherwise a wrapper script is needed to get the proxy to the execution node # The wrapper script makes debugging more complicated and thus it is # recommended to transfer a proxy inside the executable if possible. if proxy: self.log.verbose( 'Setting up proxy for payload' ) submitFile = self.makeProxyExecutableFile( executableFile, proxy ) else: # no proxy submitFile = executableFile # submit submitFile to the batch system cmd = "qsub -o %(output)s -e %(error)s -q %(queue)s -N DIRACPilot %(executable)s" % \ {'output': self.batchOutput, \ 'error': self.batchError, \ 'queue': self.queue, \ 'executable': os.path.abspath( submitFile ) } self.log.verbose( 'CE submission command: %s' % ( cmd ) ) batchIDList = [] for i in range( numberOfJobs ): result = shellCall( 30, cmd ) if not result['OK'] or result['Value'][0]: self.log.warn( '===========>Torque CE result NOT OK' ) self.log.debug( result ) return S_ERROR( result['Value'] ) else: self.log.debug( 'Torque CE result OK' ) batchID = result['Value'][1].strip() batchIDList.append( batchID ) self.submittedJobs += 1 return S_OK( batchIDList )
def mysqlCleanUp(self): """Does mysql cleanup. Remove socket and tmpdir with mysql db. Called at the end of Mokka execution, whatever the status is. """ currentdir = os.getcwd() os.chdir(os.path.join(self.softDir, "mysql4grid")) self.log.verbose('clean up db') MySQLcleanUpComm = "mysqladmin --no-defaults -hlocalhost --socket=%s/mysql.sock -uroot -p%s shutdown" % (self.MokkaTMPDir, self.rootpass) self.result = shellCall(0, MySQLcleanUpComm, callbackFunction = self.redirectLogOutput, bufferLimit = 20971520) resultTuple = self.result['Value'] status = resultTuple[0] self.log.info( "Status after the shutdown execution is %s" % str( status ) ) ##kill mysql #mysqlkillcomm = "cat mysql.pid | kill -9 "#%(self.MokkaTMPDir) #mysqlkillcomm = "kill -9 %s"%(self.mysqldPID) #self.result = shellCall(0,mysqlkillcomm,callbackFunction=self.redirectLogOutput,bufferLimit=20971520) #resultTuple = self.result['Value'] ####Have to sleep for a while to let time for the socket to go away sleepComm = """ while [ -n "$socket_grep" ] ; do socket_grep=$(netstat -ln 2>/dev/null | grep "%s/mysql.sock") echo -n . sleep 1 done """ % (self.MokkaTMPDir) self.result = shellCall(0, sleepComm, callbackFunction = self.redirectLogOutput, bufferLimit = 20971520) os.chdir(currentdir) status = resultTuple[0] #self.log.info( "Status after the application execution is %s" % str( status ) ) failed = False if status != 0: self.log.error( "MySQL-cleanup execution completed with errors:" ) failed = True else: self.log.info( "MySQL-cleanup execution completed successfully") #cleanup script also removes tmp if (os.path.exists(self.MokkaTMPDir)): try: self.log.verbose('Removing tmp dir') shutil.rmtree(self.mokkaDBroot, True) #shutil.rmtree(self.MokkaTMPDir,True) #shutil.rmtree(self.MokkaDataDir,True) except OSError, (errno, strerror): self.log.error("I/O error(%s): %s" % (errno, strerror))
def __getFileMetadata(self, urls): gLogger.debug( "RFIOStorage.__getPathMetadata: Attempting to get additional metadata for %s files." % (len(urls))) # Check whether the files that exist are staged comm = "stager_qry -S %s" % self.spaceToken successful = {} for pfn in urls: successful[pfn] = {} comm = "%s -M %s" % (comm, pfn) res = shellCall(self.timeout, comm) if not res['OK']: errStr = "RFIOStorage.__getFileMetadata: Completely failed to get cached status." gLogger.error(errStr, res['Message']) return S_ERROR(errStr) else: _returncode, stdout, _stderr = res['Value'] for line in stdout.splitlines(): pfn = line.split()[0] status = line.split()[-1] if status in ['STAGED', 'CANBEMIGR']: successful[pfn]['Cached'] = True for pfn in urls: if 'Cached' not in successful[pfn]: successful[pfn]['Cached'] = False # Now for the files that exist get the tape segment (i.e. whether they have been migrated) and related checksum comm = "nsls -lT --checksum" for pfn in urls: comm = "%s %s" % (comm, pfn) res = shellCall(self.timeout, comm) if not res['OK']: errStr = "RFIOStorage.__getFileMetadata: Completely failed to get migration status." gLogger.error(errStr, res['Message']) return S_ERROR(errStr) else: _returncode, stdout, _stderr = res['Value'] for line in stdout.splitlines(): pfn = line.split()[-1] checksum = line.split()[-2] successful[pfn]['Migrated'] = True successful[pfn]['Checksum'] = checksum for pfn in urls: if 'Migrated' not in successful[pfn]: successful[pfn]['Migrated'] = False # Update all the metadata with the common one for lfn in successful: successful[lfn] = self._addCommonMetadata(successful[lfn]) resDict = {'Failed': {}, 'Successful': successful} return S_OK(resDict)
def removeFile(self, path): """Remove physically the file specified by its path """ res = self.__checkArgumentFormat(path) if not res['OK']: return res urls = res['Value'] successful = {} failed = {} listOfLists = breakListIntoChunks(urls, 100) for urls in listOfLists: gLogger.debug( "RFIOStorage.removeFile: Attempting to remove %s files." % len(urls)) comm = 'stager_rm -S %s' % self.spaceToken for url in urls: comm = "%s -M %s" % (comm, url) res = shellCall(100, comm) if res['OK']: returncode, _stdout, stderr = res['Value'] if returncode in [0, 1]: comm = 'nsrm -f' for url in urls: comm = "%s %s" % (comm, url) res = shellCall(100, comm) if res['OK']: returncode, _stdout, stderr = res['Value'] if returncode in [0, 1]: for pfn in urls: successful[pfn] = True else: errStr = "RFIOStorage.removeFile. Completely failed to remove files from the nameserver." gLogger.error(errStr, stderr) for pfn in urls: failed[pfn] = errStr else: errStr = "RFIOStorage.removeFile. Completely failed to remove files from the nameserver." gLogger.error(errStr, res['Message']) for pfn in urls: failed[pfn] = errStr else: errStr = "RFIOStorage.removeFile. Completely failed to remove files from the stager." gLogger.error(errStr, stderr) for pfn in urls: failed[pfn] = errStr else: errStr = "RFIOStorage.removeFile. Completely failed to remove files from the stager." gLogger.error(errStr, res['Message']) for pfn in urls: failed[pfn] = errStr resDict = {'Failed': failed, 'Successful': successful} return S_OK(resDict)
def submitJob(self, executableFile, proxy, localID): """ Method to submit job, should be overridden in sub-class. """ self.log.info("Executable file path: %s" % executableFile) if not os.access(executableFile, 5): os.chmod(executableFile, 0755) # Perform any other actions from the site admin if self.ceParameters.has_key("AdminCommands"): commands = self.ceParameters["AdminCommands"].split(";") for command in commands: self.log.verbose("Executing site admin command: %s" % command) result = shellCall(0, command, callbackFunction=self.sendOutput) if not result["OK"] or result["Value"][0]: self.log.error('Error during "%s":' % command, result) return S_ERROR("Error executing %s CE AdminCommands" % CE_NAME) # if no proxy is supplied, the executable can be submitted directly # otherwise a wrapper script is needed to get the proxy to the execution node # The wrapper script makes debugging more complicated and thus it is # recommended to transfer a proxy inside the executable if possible. if proxy: self.log.verbose("Setting up proxy for payload") submitFile = self.makeProxyExecutableFile(executableFile, proxy) else: # no proxy submitFile = executableFile # submit submitFile to the batch system cmd = "qsub -o %(output)s -e %(error)s -q %(queue)s -N DIRACPilot %(executable)s" % { "output": self.batchOutput, "error": self.batchError, "queue": self.queue, "executable": os.path.abspath(submitFile), } self.log.verbose("CE submission command: %s" % (cmd)) result = shellCall(0, cmd, callbackFunction=self.sendOutput) if not result["OK"] or result["Value"][0]: self.log.warn("===========>Torque CE result NOT OK") self.log.debug(result) return S_ERROR(result["Value"]) else: self.log.debug("Torque CE result OK") batchID = result["Value"][1] self.submittedJobs += 1 return S_OK(batchID)
def __getFileMetadata( self, urls ): gLogger.debug( "RFIOStorage.__getPathMetadata: Attempting to get additional metadata for %s files." % ( len( urls ) ) ) # Check whether the files that exist are staged comm = "stager_qry -S %s" % self.spaceToken successful = {} for pfn in urls: successful[pfn] = {} comm = "%s -M %s" % ( comm, pfn ) res = shellCall( self.timeout, comm ) if not res['OK']: errStr = "RFIOStorage.__getFileMetadata: Completely failed to get cached status." gLogger.error( errStr, res['Message'] ) return S_ERROR( errStr ) else: _returncode, stdout, _stderr = res['Value'] for line in stdout.splitlines(): pfn = line.split()[0] status = line.split()[-1] if status in ['STAGED', 'CANBEMIGR']: successful[pfn]['Cached'] = True for pfn in urls: if not successful[pfn].has_key( 'Cached' ): successful[pfn]['Cached'] = False # Now for the files that exist get the tape segment (i.e. whether they have been migrated) and related checksum comm = "nsls -lT --checksum" for pfn in urls: comm = "%s %s" % ( comm, pfn ) res = shellCall( self.timeout, comm ) if not res['OK']: errStr = "RFIOStorage.__getFileMetadata: Completely failed to get migration status." gLogger.error( errStr, res['Message'] ) return S_ERROR( errStr ) else: _returncode, stdout, _stderr = res['Value'] for line in stdout.splitlines(): pfn = line.split()[-1] checksum = line.split()[-2] successful[pfn]['Migrated'] = True successful[pfn]['Checksum'] = checksum for pfn in urls: if not successful[pfn].has_key( 'Migrated' ): successful[pfn]['Migrated'] = False # Update all the metadata with the common one for lfn in successful: successful[lfn] = self._addCommonMetadata( successful[lfn] ) resDict = {'Failed':{}, 'Successful':successful} return S_OK( resDict )
def removeFile( self, path ): """Remove physically the file specified by its path """ res = self.__checkArgumentFormat( path ) if not res['OK']: return res urls = res['Value'] successful = {} failed = {} listOfLists = breakListIntoChunks( urls, 100 ) for urls in listOfLists: gLogger.debug( "RFIOStorage.removeFile: Attempting to remove %s files." % len( urls ) ) comm = 'stager_rm -S %s' % self.spaceToken for url in urls: comm = "%s -M %s" % ( comm, url ) res = shellCall( 100, comm ) if res['OK']: returncode, _stdout, stderr = res['Value'] if returncode in [0, 1]: comm = 'nsrm -f' for url in urls: comm = "%s %s" % ( comm, url ) res = shellCall( 100, comm ) if res['OK']: returncode, _stdout, stderr = res['Value'] if returncode in [0, 1]: for pfn in urls: successful[pfn] = True else: errStr = "RFIOStorage.removeFile. Completely failed to remove files from the nameserver." gLogger.error( errStr, stderr ) for pfn in urls: failed[pfn] = errStr else: errStr = "RFIOStorage.removeFile. Completely failed to remove files from the nameserver." gLogger.error( errStr, res['Message'] ) for pfn in urls: failed[pfn] = errStr else: errStr = "RFIOStorage.removeFile. Completely failed to remove files from the stager." gLogger.error( errStr, stderr ) for pfn in urls: failed[pfn] = errStr else: errStr = "RFIOStorage.removeFile. Completely failed to remove files from the stager." gLogger.error( errStr, res['Message'] ) for pfn in urls: failed[pfn] = errStr resDict = {'Failed':failed, 'Successful':successful} return S_OK( resDict )
def removeDirectory(self, path, recursive=False): """Remove a directory on the physical storage together with all its files and subdirectories. """ res = self.__checkArgumentFormat(path) if not res['OK']: return res urls = res['Value'] gLogger.debug( "RFIOStorage.removeDirectory: Attempting to remove %s directories." % len(urls)) successful = {} failed = {} for url in urls: comm = "nsrm -r %s" % url res = shellCall(100, comm) if res['OK']: returncode, _stdout, stderr = res['Value'] if returncode == 0: successful[url] = {'FilesRemoved': 0, 'SizeRemoved': 0} elif returncode == 1: successful[url] = {'FilesRemoved': 0, 'SizeRemoved': 0} else: failed[url] = stderr else: errStr = "RFIOStorage.removeDirectory: Completely failed to remove directory." gLogger.error(errStr, "%s %s" % (url, res['Message'])) failed[url] = res['Message'] resDict = {'Failed': failed, 'Successful': successful} return S_OK(resDict)
def _executeCommand( self ): """ execute the self.command (uses shellCall) """ failed = False outputDict = shellCall( 0, self.command, env = self.environment, callbackFunction = self.callbackFunction, bufferLimit = self.bufferLimit ) if not outputDict['OK']: failed = True self.log.error( 'Shell call execution failed:', '\n' + str( outputDict['Message'] ) ) status, stdout, stderr = outputDict['Value'][0:3] if status: failed = True self.log.error( "Non-zero status while executing", "%s: %s" % ( status, self.command ) ) else: self.log.info( "%s execution completed with status %s" % ( self.executable, status ) ) self.log.verbose( stdout ) self.log.verbose( stderr ) if os.path.exists( self.applicationLog ): self.log.verbose( 'Removing existing %s' % self.applicationLog ) os.remove( self.applicationLog ) fopen = open( '%s/%s' % ( os.getcwd(), self.applicationLog ), 'w' ) fopen.write( "<<<<<<<<<< %s Standard Output >>>>>>>>>>\n\n%s " % ( self.executable, stdout ) ) if stderr: fopen.write( "<<<<<<<<<< %s Standard Error >>>>>>>>>>\n\n%s " % ( self.executable, stderr ) ) fopen.close() self.log.info( "Output written to %s, execution complete." % ( self.applicationLog ) ) if failed: raise RuntimeError( "'%s' Exited With Status %s" % ( os.path.basename( self.executable ), status ) )
def getEOSFile(self, lfn): """ Use xrdcp to get the files from EOS """ prependpath = "/eos/experiment/clicdp/grid" if not lfn.startswith(prependpath): lfile = prependpath + lfn else: lfile = lfn self.log.info("Getting %s" % lfile) if os.path.exists("overlayinput.sh"): os.unlink("overlayinput.sh") with open("overlayinput.sh","w") as script: script.write('#!/bin/sh \n') script.write('################################\n') script.write('# Dynamically generated script #\n') script.write('################################\n') if 'X509_USER_PROXY' in os.environ: script.write("cp %s /tmp/x509up_u%s \n" % (os.environ['X509_USER_PROXY'], os.getuid())) script.write("xrdcp -s root://eospublic.cern.ch/%s ./ \n" % lfile.rstrip() ) script.write('declare -x appstatus=$?\n') script.write('exit $appstatus\n') os.chmod("overlayinput.sh", 0755) comm = 'sh -c "./overlayinput.sh"' self.result = shellCall(600, comm, callbackFunction = self.redirectLogOutput, bufferLimit = 20971520) localfile = os.path.basename(lfile) if os.path.exists(localfile): return S_OK(localfile) return S_ERROR("Failed")
def glexecExecute( self, executableFile, glexecLocation ): """Run glexec with checking of the exit status code. """ cmd = executableFile if glexecLocation and executableFile: cmd = "%s /bin/bash -lc '%s'" % ( glexecLocation, executableFile ) if glexecLocation and not executableFile: cmd = '%s' % ( glexecLocation ) self.log.info( 'CE submission command is: %s' % cmd ) result = shellCall( 0, cmd, callbackFunction = self.sendOutput ) if not result['OK']: result['Value'] = ( 0, '', '' ) return result resultTuple = result['Value'] status = resultTuple[0] stdOutput = resultTuple[1] stdError = resultTuple[2] self.log.info( "Status after the glexec execution is %s" % str( status ) ) if status >=127: error = S_ERROR( status ) error['Value'] = ( status, stdOutput, stdError ) return error return result
def getKEKFile(self, lfn): """ Use cp to get the files from kek-se """ prependpath = '/grid' lfile = prependpath + lfn LOG.info("Getting %s" % lfile) self.__disableWatchDog() if os.path.exists("overlayinput.sh"): os.unlink("overlayinput.sh") with open("overlayinput.sh", "w") as script: script.write('#!/bin/sh \n') script.write('###############################\n') script.write('# Dynamically generated scrip #\n') script.write('###############################\n') script.write("cp %s ./ -s\n" % lfile.rstrip()) script.write('declare -x appstatus=$?\n') script.write('exit $appstatus\n') os.chmod("overlayinput.sh", 0o755) comm = 'sh -c "./overlayinput.sh"' self.result = shellCall(600, comm, callbackFunction = self.redirectLogOutput, bufferLimit = 20971520) localfile = os.path.basename(lfile) if os.path.exists(localfile): return S_OK(localfile) return S_ERROR("Failed")
def glexecExecute(self, executableFile, glexecLocation): """Run glexec with checking of the exit status code. """ cmd = executableFile if glexecLocation and executableFile: cmd = "%s /bin/bash -lc '%s'" % (glexecLocation, executableFile) if glexecLocation and not executableFile: cmd = '%s' % (glexecLocation) self.log.info('CE submission command is: %s' % cmd) result = shellCall(0, cmd, callbackFunction=self.sendOutput) if not result['OK']: result['Value'] = (0, '', '') return result resultTuple = result['Value'] status = resultTuple[0] stdOutput = resultTuple[1] stdError = resultTuple[2] self.log.info("Status after the glexec execution is %s" % str(status)) if status >= 127: error = S_ERROR(status) error['Value'] = (status, stdOutput, stdError) return error return result
def prestageFile(self, path): """ Issue prestage request for file """ res = self.__checkArgumentFormat(path) if not res['OK']: return res urls = res['Value'] userTag = '%s-%s' % (self.spaceToken, time.time()) comm = "stager_get -S %s -U %s " % (self.spaceToken, userTag) for url in urls: comm = "%s -M %s" % (comm, url) res = shellCall(100, comm) successful = {} failed = {} if res['OK']: returncode, stdout, stderr = res['Value'] if returncode in [0, 1]: for line in stdout.splitlines(): if re.search('SUBREQUEST_READY', line): pfn, _status = line.split() successful[pfn] = userTag elif re.search('SUBREQUEST_FAILED', line): pfn, _status, err = line.split(' ', 2) failed[pfn] = err else: errStr = "RFIOStorage.prestageFile: Got unexpected return code from stager_get." gLogger.error(errStr, stderr) return S_ERROR(errStr) else: errStr = "RFIOStorage.prestageFile: Completely failed to issue stage requests." gLogger.error(errStr, res['Message']) return S_ERROR(errStr) resDict = {'Failed': failed, 'Successful': successful} return S_OK(resDict)
def getNodeInformation(self): """Try to obtain system HostName, CPU, Model, cache and memory. This information is not essential to the running of the jobs but will be reported if available. """ result = S_OK() try: result["HostName"] = socket.gethostname() with open("/proc/cpuinfo", "r") as cpuInfo: info = cpuInfo.readlines() result["CPU(MHz)"] = info[7].split(':')[1].replace( ' ', '').replace('\n', '') result["ModelName"] = info[4].split(':')[1].replace( ' ', '').replace('\n', '') result["CacheSize(kB)"] = info[8].split(':')[1].replace( ' ', '').replace('\n', '') with open("/proc/meminfo", "r") as memInfo: info = memInfo.readlines() result["Memory(kB)"] = info[3].split(':')[1].replace( ' ', '').replace('\n', '') account = 'Unknown' localID = shellCall(10, 'whoami') if localID['OK']: account = localID['Value'][1].strip() result["LocalAccount"] = account except Exception as x: self.log.fatal( 'Watchdog failed to obtain node information with Exception:') self.log.fatal(str(x)) result = S_ERROR() result[ 'Message'] = 'Failed to obtain system information for ' + self.systemFlag return result return result
def isFile( self, path ): """Check if the given path exists and it is a file """ res = self.__checkArgumentFormat( path ) if not res['OK']: return res urls = res['Value'] gLogger.debug( "RFIOStorage.isFile: Determining whether %s paths are files." % len( urls ) ) successful = {} failed = {} comm = "nsls -ld" for url in urls: comm = " %s %s" % ( comm, url ) res = shellCall( self.timeout, comm ) if not res['OK']: return res returncode, stdout, stderr = res['Value'] if returncode in [0, 1]: for line in stdout.splitlines(): permissions, subdirs, owner, group, size, month, date, timeYear, pfn = line.split() if permissions[0] != 'd': successful[pfn] = True else: successful[pfn] = False for line in stderr.splitlines(): pfn, error = line.split( ': ' ) url = pfn.strip() failed[url] = error else: errStr = "RFIOStorage.isFile: Completely failed to determine whether path is file." gLogger.error( errStr, "%s %s" % ( self.name, stderr ) ) return S_ERROR( errStr ) resDict = {'Failed':failed, 'Successful':successful} return S_OK( resDict )
def exists( self, path ): """ Check if the given path exists. The 'path' variable can be a string or a list of strings. """ res = self.__checkArgumentFormat( path ) if not res['OK']: return res urls = res['Value'] gLogger.debug( "RFIOStorage.exists: Determining the existance of %s files." % len( urls ) ) comm = "nsls -d" for url in urls: comm = " %s %s" % ( comm, url ) res = shellCall( self.timeout, comm ) successful = {} failed = {} if res['OK']: returncode, stdout, stderr = res['Value'] if returncode in [0, 1]: for line in stdout.splitlines(): url = line.strip() successful[url] = True for line in stderr.splitlines(): pfn, _ = line.split( ': ' ) url = pfn.strip() successful[url] = False else: errStr = "RFIOStorage.exists: Completely failed to determine the existance files." gLogger.error( errStr, "%s %s" % ( self.name, stderr ) ) return S_ERROR( errStr ) else: errStr = "RFIOStorage.exists: Completely failed to determine the existance files." gLogger.error( errStr, "%s %s" % ( self.name, res['Message'] ) ) return S_ERROR( errStr ) resDict = {'Failed':failed, 'Successful':successful} return S_OK( resDict )
def getKEKFile(self, lfn): """ Use cp to get the files from kek-se """ prependpath = '/grid' lfile = prependpath + lfn self.log.info("Getting %s" % lfile) self.__disableWatchDog() if os.path.exists("overlayinput.sh"): os.unlink("overlayinput.sh") with open("overlayinput.sh", "w") as script: script.write('#!/bin/sh \n') script.write('###############################\n') script.write('# Dynamically generated scrip #\n') script.write('###############################\n') script.write("cp %s ./ -s\n" % lfile.rstrip()) script.write('declare -x appstatus=$?\n') script.write('exit $appstatus\n') os.chmod("overlayinput.sh", 0755) comm = 'sh -c "./overlayinput.sh"' self.result = shellCall(600, comm, callbackFunction = self.redirectLogOutput, bufferLimit = 20971520) localfile = os.path.basename(lfile) if os.path.exists(localfile): return S_OK(localfile) return S_ERROR("Failed")
def prestageFile( self, path ): """ Issue prestage request for file """ res = self.__checkArgumentFormat( path ) if not res['OK']: return res urls = res['Value'] userTag = '%s-%s' % ( self.spaceToken, time.time() ) comm = "stager_get -S %s -U %s " % ( self.spaceToken, userTag ) for url in urls: comm = "%s -M %s" % ( comm, url ) res = shellCall( 100, comm ) successful = {} failed = {} if res['OK']: returncode, stdout, stderr = res['Value'] if returncode in [0, 1]: for line in stdout.splitlines(): if re.search( 'SUBREQUEST_READY', line ): pfn, _status = line.split() successful[pfn] = userTag elif re.search( 'SUBREQUEST_FAILED', line ): pfn, _status, err = line.split( ' ', 2 ) failed[pfn] = err else: errStr = "RFIOStorage.prestageFile: Got unexpected return code from stager_get." gLogger.error( errStr, stderr ) return S_ERROR( errStr ) else: errStr = "RFIOStorage.prestageFile: Completely failed to issue stage requests." gLogger.error( errStr, res['Message'] ) return S_ERROR( errStr ) resDict = {'Failed':failed, 'Successful':successful} return S_OK( resDict )
def getEOSFile(self, lfn): """ Use xrdcp to get the files from EOS """ prependpath = "/eos/experiment/clicdp/grid" if not lfn.startswith(prependpath): lfile = prependpath + lfn else: lfile = lfn LOG.info("Getting %s" % lfile) if os.path.exists("overlayinput.sh"): os.unlink("overlayinput.sh") with open("overlayinput.sh","w") as script: script.write('#!/bin/sh \n') script.write('################################\n') script.write('# Dynamically generated script #\n') script.write('################################\n') if 'X509_USER_PROXY' in os.environ: script.write("cp %s /tmp/x509up_u%s \n" % (os.environ['X509_USER_PROXY'], os.getuid())) script.write("xrdcp -s root://eospublic.cern.ch/%s ./ \n" % lfile.rstrip() ) script.write('declare -x appstatus=$?\n') script.write('exit $appstatus\n') os.chmod("overlayinput.sh", 0o755) comm = 'sh -c "./overlayinput.sh"' self.result = shellCall(600, comm, callbackFunction = self.redirectLogOutput, bufferLimit = 20971520) localfile = os.path.basename(lfile) if os.path.exists(localfile): return S_OK(localfile) return S_ERROR("Failed")
def execute(self): """ This is where magic happens """ res = self.getVariables() if not res['OK']: return res #In the future, you'll want to put the entire wrapper here, but because I'm lazy, we will just call it res = self.getWrapperLocation() if not res['OK']: return res loc = res['Value'] exec_name = os.path.basename(loc) try: shutil.copy(loc, os.path.join(".",exec_name)) except: return S_ERROR("Could not copy the executable to run directory") os.chmod(exec_name, 0755) #executable for all comm = 'bash "./%s"' % exec_name self.log.info("Will execute", comm) res = shellCall(0, comm, self.callBack) if not res['OK']: return res resultTuple = res['Value'] status = resultTuple[0] if status: self.log.error("Command exited with status %s" % status) return S_ERROR("Failed with status %s" % status) return S_OK()
def _executeCommand( self ): """ execute the self.command (uses shellCall) """ failed = False outputDict = shellCall( 0, self.command, env = self.environment, callbackFunction = self.callbackFunction, bufferLimit = self.bufferLimit ) if not outputDict['OK']: failed = True self.log.error( 'Shell call execution failed:' ) self.log.error( outputDict['Message'] ) status, stdout, stderr = outputDict['Value'][0:3] if status: failed = True self.log.error( "Non-zero status %s while executing %s" % ( status, self.command ) ) else: self.log.info( "%s execution completed with status %s" % ( self.executable, status ) ) self.log.verbose( stdout ) self.log.verbose( stderr ) if os.path.exists( self.applicationLog ): self.log.verbose( 'Removing existing %s' % self.applicationLog ) os.remove( self.applicationLog ) fopen = open( '%s/%s' % ( os.getcwd(), self.applicationLog ), 'w' ) fopen.write( "<<<<<<<<<< %s Standard Output >>>>>>>>>>\n\n%s " % ( self.executable, stdout ) ) if stderr: fopen.write( "<<<<<<<<<< %s Standard Error >>>>>>>>>>\n\n%s " % ( self.executable, stderr ) ) fopen.close() self.log.info( "Output written to %s, execution complete." % ( self.applicationLog ) ) if failed: raise RuntimeError, "'%s' Exited With Status %s" % ( os.path.basename( self.executable ), status )
def removeDirectory( self, path, recursive = False ): """Remove a directory on the physical storage together with all its files and subdirectories. """ res = self.__checkArgumentFormat( path ) if not res['OK']: return res urls = res['Value'] gLogger.debug( "RFIOStorage.removeDirectory: Attempting to remove %s directories." % len( urls ) ) successful = {} failed = {} for url in urls: comm = "nsrm -r %s" % url res = shellCall( 100, comm ) if res['OK']: returncode, _stdout, stderr = res['Value'] if returncode == 0: successful[url] = {'FilesRemoved':0, 'SizeRemoved':0} elif returncode == 1: successful[url] = {'FilesRemoved':0, 'SizeRemoved':0} else: failed[url] = stderr else: errStr = "RFIOStorage.removeDirectory: Completely failed to remove directory." gLogger.error( errStr, "%s %s" % ( url, res['Message'] ) ) failed[url] = res['Message'] resDict = {'Failed':failed, 'Successful':successful} return S_OK( resDict )
def exists(self, path): """Check if the given path exists. The 'path' variable can be a string or a list of strings.""" res = self.__checkArgumentFormat(path) if not res["OK"]: return res urls = res["Value"] gLogger.debug( "RFIOStorage.exists: Determining the existance of %s files." % len(urls)) comm = "nsls -d" for url in urls: comm = " %s %s" % (comm, url) res = shellCall(self.timeout, comm) successful = {} failed = {} if res["OK"]: returncode, stdout, stderr = res["Value"] if returncode in [0, 1]: for line in stdout.splitlines(): url = line.strip() successful[url] = True for line in stderr.splitlines(): pfn, _ = line.split(": ") url = pfn.strip() successful[url] = False else: errStr = "RFIOStorage.exists: Completely failed to determine the existance files." gLogger.error(errStr, "%s %s" % (self.name, stderr)) return S_ERROR(errStr) else: errStr = "RFIOStorage.exists: Completely failed to determine the existance files." gLogger.error(errStr, "%s %s" % (self.name, res["Message"])) return S_ERROR(errStr) resDict = {"Failed": failed, "Successful": successful} return S_OK(resDict)
def isFile( self, path ): """Check if the given path exists and it is a file """ res = self.__checkArgumentFormat( path ) if not res['OK']: return res urls = res['Value'] gLogger.debug( "RFIOStorage.isFile: Determining whether %s paths are files." % len( urls ) ) successful = {} failed = {} comm = "nsls -ld" for url in urls: comm = " %s %s" % ( comm, url ) res = shellCall( self.timeout, comm ) if not res['OK']: return res returncode, stdout, stderr = res['Value'] if returncode in [0, 1]: for line in stdout.splitlines(): permissions, _subdirs, _owner, _group, _size, _month, _date, _timeYear, pfn = line.split() if permissions[0] != 'd': successful[pfn] = True else: successful[pfn] = False for line in stderr.splitlines(): pfn, error = line.split( ': ' ) url = pfn.strip() failed[url] = error else: errStr = "RFIOStorage.isFile: Completely failed to determine whether path is file." gLogger.error( errStr, "%s %s" % ( self.name, stderr ) ) return S_ERROR( errStr ) resDict = {'Failed':failed, 'Successful':successful} return S_OK( resDict )
def monitorProxy( self, pilotProxy, payloadProxy, payloadUsername, payloadUID, payloadGID ): """ Monitor the payload proxy and renew as necessary. """ retVal = self._monitorProxy( pilotProxy, payloadProxy ) if not retVal['OK']: # Failed to renew the proxy, nothing else to be done return retVal if not retVal['Value']: # No need to renew the proxy, nothing else to be done return retVal self.log.info( 'Re-executing sudo to make renewed payload proxy available as before' ) # New version of the proxy file, so we have to do the copy again # 1) Make sure the payload user can read its proxy via its per-user group os.chown( payloadProxy, -1, payloadGID ) os.chmod( payloadProxy, stat.S_IRUSR + stat.S_IWUSR + stat.S_IRGRP ) # 2) Now recreate the copy of the proxy owned by the payload user result = shellCall( 0, '/usr/bin/sudo -u %s sh -c "cp -f %s /tmp/x509up_u%d ; chmod 0400 /tmp/x509up_u%d"' % ( payloadUsername, payloadProxy, payloadUID, payloadUID ), callbackFunction = self.sendOutput ) return S_OK( 'Proxy checked' )
def getNodeInformation(self): """Try to obtain system HostName, CPU, Model, cache and memory. This information is not essential to the running of the jobs but will be reported if available. """ result = S_OK() try: cpuInfo = open ( "/proc/cpuinfo", "r" ) info = cpuInfo.readlines() cpuInfo.close() result["HostName"] = socket.gethostname() result["CPU(MHz)"] = string.replace(string.replace(string.split(info[6],":")[1]," ",""),"\n","") result["ModelName"] = string.replace(string.replace(string.split(info[4],":")[1]," ",""),"\n","") result["CacheSize(kB)"] = string.replace(string.replace(string.split(info[7],":")[1]," ",""),"\n","") memInfo = open ( "/proc/meminfo", "r" ) info = memInfo.readlines() memInfo.close() result["Memory(kB)"] = string.replace(string.replace(string.split(info[3],":")[1]," ",""),"\n","") account = 'Unknown' localID = shellCall(10,'whoami') if localID['OK']: account = localID['Value'][1].strip() result["LocalAccount"] = account except Exception, x: self.log.fatal('Watchdog failed to obtain node information with Exception:') self.log.fatal(str(x)) result = S_ERROR() result['Message']='Failed to obtain system information for '+self.systemFlag return result
def testTimeouts(self): """ test timeouts """ ## systemCall ret = systemCall(timeout=self.timeout, cmdSeq=self.cmd) self.assertEqual(ret, { 'Message': 'Timed out after 3 seconds', 'OK': False }) ## shellCall ret = shellCall(timeout=self.timeout, cmdSeq=" ".join(self.cmd)) self.assertEqual(ret, { 'Message': 'Timed out after 3 seconds', 'OK': False }) def pyfunc(name): time.sleep(10) return name ## pythonCall ret = pythonCall(self.timeout, pyfunc, "Krzysztof") self.assertEqual(ret, { 'Message': 'Timed out after 3 seconds', 'OK': False })
def monitorProxy(self, pilotProxy, payloadProxy, payloadUsername, payloadUID, payloadGID): """ Monitor the payload proxy and renew as necessary. """ retVal = self._monitorProxy(pilotProxy, payloadProxy) if not retVal['OK']: # Failed to renew the proxy, nothing else to be done return retVal if not retVal['Value']: # No need to renew the proxy, nothing else to be done return retVal self.log.info( 'Re-executing sudo to make renewed payload proxy available as before' ) # New version of the proxy file, so we have to do the copy again # 1) Make sure the payload user can read its proxy via its per-user group os.chown(payloadProxy, -1, payloadGID) os.chmod(payloadProxy, stat.S_IRUSR + stat.S_IWUSR + stat.S_IRGRP) # 2) Now recreate the copy of the proxy owned by the payload user result = shellCall( 0, '/usr/bin/sudo -u %s sh -c "cp -f %s /tmp/x509up_u%d ; chmod 0400 /tmp/x509up_u%d"' % (payloadUsername, payloadProxy, payloadUID, payloadUID), callbackFunction=self.sendOutput) return S_OK('Proxy checked')
def __getFileMetadata(self, urls): gLogger.debug( "RFIOStorage.__getPathMetadata: Attempting to get additional metadata for %s files." % (len(urls)) ) # Check whether the files that exist are staged comm = "stager_qry -S %s" % self.spaceToken successful = {} for pfn in urls: successful[pfn] = {} comm = "%s -M %s" % (comm, pfn) res = shellCall(self.timeout, comm) if not res["OK"]: errStr = "RFIOStorage.__getFileMetadata: Completely failed to get cached status." gLogger.error(errStr, res["Message"]) return S_ERROR(errStr) else: returncode, stdout, stderr = res["Value"] for line in stdout.splitlines(): pfn = line.split()[0] status = line.split()[-1] if status in ["STAGED", "CANBEMIGR"]: successful[pfn]["Cached"] = True for pfn in urls: if not successful[pfn].has_key("Cached"): successful[pfn]["Cached"] = False # Now for the files that exist get the tape segment (i.e. whether they have been migrated) and related checksum comm = "nsls -lT --checksum" for pfn in urls: comm = "%s %s" % (comm, pfn) res = shellCall(self.timeout, comm) if not res["OK"]: errStr = "RFIOStorage.__getFileMetadata: Completely failed to get migration status." gLogger.error(errStr, res["Message"]) return S_ERROR(errStr) else: returncode, stdout, stderr = res["Value"] for line in stdout.splitlines(): pfn = line.split()[-1] checksum = line.split()[-2] successful[pfn]["Migrated"] = True successful[pfn]["Checksum"] = checksum for pfn in urls: if not successful[pfn].has_key("Migrated"): successful[pfn]["Migrated"] = False resDict = {"Failed": {}, "Successful": successful} return S_OK(resDict)
def getCEStatus(self): """ Method to return information on running and pending jobs. We hope to satisfy both instances that use robot proxies and those which use proper configurations. """ result = self._prepareProxy() self.usercfg.ProxyPath(os.environ["X509_USER_PROXY"]) if not result["OK"]: gLogger.error("ARCComputingElement: failed to set up proxy", result["Message"]) return result # Try to find out which VO we are running for. vo = "" res = getVOfromProxyGroup() if res["OK"]: vo = res["Value"] result = S_OK() result["SubmittedJobs"] = 0 if not vo: # Presumably the really proper way forward once the infosys-discuss WG comes up with a solution # and it is implemented. Needed for DIRAC instances which use robot certificates for pilots. endpoints = [ arc.Endpoint( "ldap://" + self.ceHost + "/MDS-Vo-name=local,o=grid", arc.Endpoint.COMPUTINGINFO, "org.nordugrid.ldapng", ) ] retriever = arc.ComputingServiceRetriever(self.usercfg, endpoints) retriever.wait() # Takes a bit of time to get and parse the ldap information targets = retriever.GetExecutionTargets() ceStats = targets[0].ComputingShare gLogger.debug("Running jobs for CE %s : %s" % (self.ceHost, ceStats.RunningJobs)) gLogger.debug("Waiting jobs for CE %s : %s" % (self.ceHost, ceStats.WaitingJobs)) result["RunningJobs"] = ceStats.RunningJobs result["WaitingJobs"] = ceStats.WaitingJobs else: # The system which works properly at present for ARC CEs that are configured correctly. # But for this we need the VO to be known - ask me (Raja) for the whole story if interested. cmd = 'ldapsearch -x -LLL -H ldap://%s:2135 -b mds-vo-name=resource,o=grid "(GlueVOViewLocalID=%s)"' % ( self.ceHost, vo.lower(), ) res = shellCall(0, cmd) if not res["OK"]: gLogger.debug("Could not query CE %s - is it down?" % self.ceHost) return res try: ldapValues = res["Value"][1].split("\n") running = [lValue for lValue in ldapValues if "GlueCEStateRunningJobs" in lValue] waiting = [lValue for lValue in ldapValues if "GlueCEStateWaitingJobs" in lValue] result["RunningJobs"] = int(running[0].split(":")[1]) result["WaitingJobs"] = int(waiting[0].split(":")[1]) except IndexError: res = S_ERROR("Unknown ldap failure for site %s" % self.ceHost) return res return result
def logWorkingDirectory(self): """log the content of the working directory""" res = shellCall(0,'ls -laR') if res['OK'] and res['Value'][0] == 0: self.log.info('The contents of the working directory...') self.log.info(str(res['Value'][1])) else: self.log.error('Failed to list the working directory', str(res['Value'][2]))
def test_calls(timeout, expected): ret = systemCall(timeout, cmdSeq=cmd) assert ret["OK"] == expected ret = shellCall(timeout, cmdSeq=" ".join(cmd)) assert ret["OK"] == expected ret = pythonCall(timeout, pyfunc, "something") assert ret["OK"] == expected
def __getProcGroupLinux(self, pid): """Returns UID for given PID. """ result = shellCall(0, 'ps --no-headers -o pgrp -p %s' % (pid)) if not result['OK']: if not 'Value' in result: return result return S_OK(result['Value'][1])
def __getProcGroupLinux( self, pid ): """Returns UID for given PID. """ result = shellCall( 10, 'ps --no-headers -o pgrp -p %s' % ( pid ) ) if not result['OK']: if not 'Value' in result: return result return S_OK( result['Value'][1] )
def execute(self): """ Main execution function. """ failed = False result = self.resolveInputVariables() if not result['OK']: return result self.log.info('Script Module Instance Name: %s' % (self.name)) cmd = self.executable if os.path.exists(os.path.basename(self.executable)): self.executable = os.path.basename(self.executable) if not os.access('%s/%s' % (os.getcwd(), self.executable), 5): os.chmod('%s/%s' % (os.getcwd(), self.executable), 0755) cmd = '%s/%s' % (os.getcwd(), self.executable) if re.search('.py$', self.executable): cmd = '%s %s' % (sys.executable, self.executable) if self.arguments: cmd = '%s %s' % (cmd, self.arguments) self.log.info('Command is: %s' % cmd) outputDict = shellCall(0, cmd) if not outputDict['OK']: failed = True self.log.error('Shell call execution failed:') self.log.error(outputDict['Message']) resTuple = outputDict['Value'] status = resTuple[0] stdout = resTuple[1] stderr = resTuple[2] if status: failed = True self.log.error('Non-zero status %s while executing %s' % (status, cmd)) else: self.log.info('%s execution completed with status %s' % (self.executable, status)) self.log.verbose(stdout) self.log.verbose(stderr) if os.path.exists(self.logFile): self.log.verbose('Removing existing %s' % self.logFile) os.remove(self.logFile) fopen = open('%s/%s' % (os.getcwd(), self.logFile), 'w') fopen.write('<<<<<<<<<< %s Standard Output >>>>>>>>>>\n\n%s ' % (self.executable, stdout)) if stderr: fopen.write('<<<<<<<<<< %s Standard Error >>>>>>>>>>\n\n%s ' % (self.executable, stderr)) fopen.close() self.log.info('Output written to %s, execution complete.' % (self.logFile)) if failed: return S_ERROR('Exit Status %s' % (status)) return S_OK()
def uploadProxy(self, proxy=False, useDNAsUserName=False): """ Upload a proxy to myproxy service. proxy param can be: : Default -> use current proxy : string -> upload file specified as proxy : X509Chain -> use chain """ retVal = File.multiProxyArgument(proxy) if not retVal['OK']: return retVal proxyDict = retVal['Value'] chain = proxyDict['chain'] proxyLocation = proxyDict['file'] timeLeft = int(chain.getRemainingSecs()['Value'] / 3600) cmdArgs = ['-n'] cmdArgs.append('-s "%s"' % self._secServer) cmdArgs.append('-c "%s"' % (timeLeft - 1)) cmdArgs.append('-t "%s"' % self._secMaxProxyHours) cmdArgs.append('-C "%s"' % proxyLocation) cmdArgs.append('-y "%s"' % proxyLocation) if useDNAsUserName: cmdArgs.append('-d') else: retVal = self._getUsername(chain) if not retVal['OK']: File.deleteMultiProxy(proxyDict) return retVal mpUsername = retVal['Value'] cmdArgs.append('-l "%s"' % mpUsername) mpEnv = self._getExternalCmdEnvironment() #Hack to upload properly mpEnv['GT_PROXY_MODE'] = 'old' cmd = "myproxy-init %s" % " ".join(cmdArgs) result = shellCall(self._secCmdTimeout, cmd, env=mpEnv) File.deleteMultiProxy(proxyDict) if not result['OK']: errMsg = "Call to myproxy-init failed: %s" % retVal['Message'] return S_ERROR(errMsg) status, output, error = result['Value'] # Clean-up files if status: errMsg = "Call to myproxy-init failed" extErrMsg = 'Command: %s; StdOut: %s; StdErr: %s' % (cmd, result, error) return S_ERROR("%s %s" % (errMsg, extErrMsg)) return S_OK()
def __ssh_call(self, command, timeout): try: import pexpect expectFlag = True except BaseException: from DIRAC.Core.Utilities.Subprocess import shellCall expectFlag = False if not timeout: timeout = 999 if expectFlag: ssh_newkey = "Are you sure you want to continue connecting" try: child = pexpect.spawn(command, timeout=timeout, encoding="utf-8") i = child.expect( [pexpect.TIMEOUT, ssh_newkey, pexpect.EOF, "assword: "]) if i == 0: # Timeout return S_OK((-1, child.before, "SSH login failed")) elif i == 1: # SSH does not have the public key. Just accept it. child.sendline("yes") child.expect("assword: ") i = child.expect([pexpect.TIMEOUT, "assword: "]) if i == 0: # Timeout return S_OK((-1, str(child.before) + str(child.after), "SSH login failed")) elif i == 1: child.sendline(self.password) child.expect(pexpect.EOF) return S_OK((0, child.before, "")) elif i == 2: # Passwordless login, get the output return S_OK((0, child.before, "")) if self.password: child.sendline(self.password) child.expect(pexpect.EOF) return S_OK((0, child.before, "")) return S_ERROR((-2, child.before, "")) except Exception as x: res = (-1, "Encountered exception %s: %s" % (Exception, str(x))) return S_ERROR(res) else: # Try passwordless login result = shellCall(timeout, command) # print ( "!!! SSH command: %s returned %s\n" % (command, result) ) if result["Value"][0] == 255: return S_ERROR( (-1, "Cannot connect to host %s" % self.host, "")) return result
def listDirectory( self, path ): """ List the supplied path. First checks whether the path is a directory then gets the contents. """ res = self.__checkArgumentFormat( path ) if not res['OK']: return res urls = res['Value'] gLogger.debug( "RFIOStorage.listDirectory: Attempting to list %s directories." % len( urls ) ) res = self.isDirectory( urls ) if not res['OK']: return res successful = {} failed = res['Value']['Failed'] directories = [] for url, isDirectory in res['Value']['Successful'].items(): if isDirectory: directories.append( url ) else: errStr = "RFIOStorage.listDirectory: Directory does not exist." gLogger.error( errStr, url ) failed[url] = errStr for directory in directories: comm = "nsls -l %s" % directory res = shellCall( self.timeout, comm ) if res['OK']: returncode, stdout, stderr = res['Value'] if not returncode == 0: errStr = "RFIOStorage.listDirectory: Failed to list directory." gLogger.error( errStr, "%s %s" % ( directory, stderr ) ) failed[directory] = errStr else: subDirs = {} files = {} successful[directory] = {} for line in stdout.splitlines(): permissions, subdirs, owner, group, size, month, date, timeYear, pfn = line.split() if not pfn == 'dirac_directory': path = "%s/%s" % ( directory, pfn ) if permissions[0] == 'd': # If the subpath is a directory subDirs[path] = True elif permissions[0] == 'm': # In the case that the path is a migrated file files[path] = {'Size':int( size ), 'Migrated':1} else: # In the case that the path is not migrated file files[path] = {'Size':int( size ), 'Migrated':0} successful[directory]['SubDirs'] = subDirs successful[directory]['Files'] = files else: errStr = "RFIOStorage.listDirectory: Completely failed to list directory." gLogger.error( errStr, "%s %s" % ( directory, res['Message'] ) ) return S_ERROR( errStr ) resDict = {'Failed':failed, 'Successful':successful} return S_OK( resDict )
def getRALFile(self, lfn): """ Use rfcp to get the files from RAL castor """ prependpath = '/castor/ads.rl.ac.uk/prod' if not lfn.count('ads.rl.ac.uk/prod'): lfile = prependpath + lfn else: lfile = lfn self.log.info("Getting %s" % lfile) ###Don't check for CPU time as other wise, job can get killed self.__disableWatchDog() #command = "rfcp %s ./"%file #comm = [] #comm.append("cp $X509_USER_PROXY /tmp/x509up_u%s"%os.getuid()) if os.environ.has_key('X509_USER_PROXY'): comm2 = ["cp", os.environ['X509_USER_PROXY'],"/tmp/x509up_u%s" % os.getuid()] res = subprocess.Popen(comm2, stdout = subprocess.PIPE).communicate() print res #comm.append("xrdcp root://ccdcacsn179.in2p3.fr:1094%s ./ -s"%file) #command = string.join(comm,";") #logfile = file(self.applicationLog,"w") os.environ['CNS_HOST'] = 'castorns.ads.rl.ac.uk' #comm4= ['declare','-x','CNS_HOST=castorns.ads.rl.ac.uk'] #res = subprocess.Popen(comm4,stdout=logfile,stderr=subprocess.STDOUT) #print res os.environ['STAGE_SVCCLASS'] = 'ilcTape' # comm5= ['declare','-x','STAGE_SVCCLASS=ilcTape'] # res = subprocess.call(comm5) # print res os.environ['STAGE_HOST'] = 'cgenstager.ads.rl.ac.uk' # comm6=['declare','-x','STAGE_HOST=cgenstager.ads.rl.ac.uk'] # res = subprocess.call(comm6) # print res basename = os.path.basename(lfile) if os.path.exists("overlayinput.sh"): os.unlink("overlayinput.sh") script = file("overlayinput.sh","w") script.write('#!/bin/sh \n') script.write('###############################\n') script.write('# Dynamically generated scrip #\n') script.write('###############################\n') script.write("/usr/bin/rfcp 'rfio://cgenstager.ads.rl.ac.uk:9002?svcClass=ilcTape&path=%s' %s\n" % (lfile, basename)) script.write('declare -x appstatus=$?\n') script.write('exit $appstatus\n') script.close() os.chmod("overlayinput.sh", 0755) comm = 'sh -c "./overlayinput.sh"' self.result = shellCall(600, comm, callbackFunction = self.redirectLogOutput, bufferLimit = 20971520) localfile = os.path.basename(lfile) if os.path.exists(localfile): return S_OK(localfile) return S_ERROR("Failed")
def listDirectory( self, path ): """ List the supplied path. First checks whether the path is a directory then gets the contents. """ res = self.__checkArgumentFormat( path ) if not res['OK']: return res urls = res['Value'] gLogger.debug( "RFIOStorage.listDirectory: Attempting to list %s directories." % len( urls ) ) res = self.isDirectory( urls ) if not res['OK']: return res successful = {} failed = res['Value']['Failed'] directories = [] for url, isDirectory in res['Value']['Successful'].items(): if isDirectory: directories.append( url ) else: errStr = "RFIOStorage.listDirectory: Directory does not exist." gLogger.error( errStr, url ) failed[url] = errStr for directory in directories: comm = "nsls -l %s" % directory res = shellCall( self.timeout, comm ) if res['OK']: returncode, stdout, stderr = res['Value'] if not returncode == 0: errStr = "RFIOStorage.listDirectory: Failed to list directory." gLogger.error( errStr, "%s %s" % ( directory, stderr ) ) failed[directory] = errStr else: subDirs = {} files = {} successful[directory] = {} for line in stdout.splitlines(): permissions, _subdirs, _owner, _group, size, _month, _date, _timeYear, pfn = line.split() if not pfn == 'dirac_directory': path = "%s/%s" % ( directory, pfn ) if permissions[0] == 'd': # If the subpath is a directory subDirs[path] = True elif permissions[0] == 'm': # In the case that the path is a migrated file files[path] = {'Size':int( size ), 'Migrated':1} else: # In the case that the path is not migrated file files[path] = {'Size':int( size ), 'Migrated':0} successful[directory]['SubDirs'] = subDirs successful[directory]['Files'] = files else: errStr = "RFIOStorage.listDirectory: Completely failed to list directory." gLogger.error( errStr, "%s %s" % ( directory, res['Message'] ) ) return S_ERROR( errStr ) resDict = {'Failed':failed, 'Successful':successful} return S_OK( resDict )
def getCASTORFile(self, lfn): """ USe xrdcp or rfcp to get the files from castor """ prependpath = "/castor/cern.ch/grid" if not lfn.count("castor/cern.ch"): lfile = prependpath + lfn else: lfile = lfn self.log.info("Getting %s" % lfile) #command = "rfcp %s ./"%file basename = os.path.basename(lfile) if os.path.exists("overlayinput.sh"): os.unlink("overlayinput.sh") script = file("overlayinput.sh","w") script.write('#!/bin/sh \n') script.write('###############################\n') script.write('# Dynamically generated scrip #\n') script.write('###############################\n') script.write("cp %s /tmp/x509up_u%s \n" % (os.environ['X509_USER_PROXY'], os.getuid())) script.write('declare -x STAGE_SVCCLASS=ilcdata\n') script.write('declare -x STAGE_HOST=castorpublic\n') script.write("xrdcp -s root://castorpublic.cern.ch/%s ./ -OSstagerHost=castorpublic\&svcClass=ilcdata\n" % lfile.rstrip()) #script.write("/usr/bin/rfcp 'rfio://cgenstager.ads.rl.ac.uk:9002?svcClass=ilcTape&path=%s' %s\n"%(lfile,basename)) script.write(""" if [ ! -s %s ]; then echo "Using rfcp instead" rfcp %s ./ fi\n""" % (basename, lfile)) script.write('declare -x appstatus=$?\n') script.write('exit $appstatus\n') script.close() os.chmod("overlayinput.sh", 0755) comm = 'sh -c "./overlayinput.sh"' self.result = shellCall(600, comm, callbackFunction = self.redirectLogOutput, bufferLimit = 20971520) #comm7=["/usr/bin/rfcp","'rfio://cgenstager.ads.rl.ac.uk:9002?svcClass=ilcTape&path=%s'"%lfile,"file:%s"%basename] #try: # res = subprocess.Popen(comm7,stdout=logfile,stderr=subprocess.STDOUT) #except Exception,x: # print ("failed : %s %s"%(Exception,x)) #logfile.close() #print res status = 0 if not os.path.exists(os.path.basename(lfile)): status = 1 mydict = {} mydict['Failed'] = [] mydict['Successful'] = [] if status: mydict['Failed'] = lfn else: mydict['Successful'] = lfn #return S_ERROR("Problem getting %s"%os.path.basename(lfn)) return S_OK(mydict)
def __makeDir(self, path): # First create a local file that will be used as a directory place holder in storage name space comm = "nsmkdir -m 775 %s" % path res = shellCall(100, comm) if not res['OK']: return res returncode, _stdout, stderr = res['Value'] if returncode not in [0]: return S_ERROR(stderr) return S_OK()
def getLoadAverage(self): """Obtains the load average. """ comm = '/bin/cat /proc/loadavg' loadAvgDict = shellCall(5, comm) if loadAvgDict['OK']: return S_OK(float(loadAvgDict['Value'][1].split()[0])) else: self.log.warn('Could not obtain load average') return S_ERROR('Could not obtain load average')
def getLoadAverage( self ): """Obtains the load average. """ comm = 'sysctl vm.loadavg' loadAvgDict = shellCall( 5, comm ) if loadAvgDict['OK']: la = float( string.split( loadAvgDict['Value'][1] )[3] ) return S_OK( la ) else: return S_ERROR( 'Could not obtain load average' )
def getLoadAverage(self): """Obtains the load average. """ comm = 'sysctl vm.loadavg' loadAvgDict = shellCall(5, comm) if loadAvgDict['OK']: la = float(loadAvgDict['Value'][1].split()[3]) return S_OK(la) else: return S_ERROR('Could not obtain load average')
def sudoExecute( self, executableFile, payloadProxy, payloadUsername, payloadUID, payloadGID ): """Run sudo with checking of the exit status code. """ # We now implement a file giveaway using groups, to avoid any need to sudo to root. # Each payload user must have their own group. The pilot user must be a member # of all of these groups. This allows the pilot user to set the group of the # payloadProxy file to be that of the payload user. The payload user can then # read it and make a copy of it (/tmp/x509up_uNNNN) that it owns. Some grid # commands check that the proxy is owned by the current user so the copy stage # is necessary. # 1) Make sure the payload user can read its proxy via its per-user group os.chown( payloadProxy, -1, payloadGID ) os.chmod( payloadProxy, stat.S_IRUSR + stat.S_IWUSR + stat.S_IRGRP ) # 2) Now create a copy of the proxy owned by the payload user result = shellCall( 0, '/usr/bin/sudo -u %s sh -c "cp -f %s /tmp/x509up_u%d ; chmod 0400 /tmp/x509up_u%d"' % ( payloadUsername, payloadProxy, payloadUID, payloadUID ), callbackFunction = self.sendOutput ) # 3) Make sure the current directory is +rwx by the pilot's group # (needed for InstallDIRAC but not for LHCbInstallDIRAC, for example) os.chmod('.', os.stat('.').st_mode | stat.S_IRWXG) # Run the executable (the wrapper in fact) cmd = "/usr/bin/sudo -u %s PATH=$PATH DIRACSYSCONFIG=/scratch/%s/pilot.cfg LD_LIBRARY_PATH=$LD_LIBRARY_PATH PYTHONPATH=$PYTHONPATH X509_CERT_DIR=$X509_CERT_DIR X509_USER_PROXY=/tmp/x509up_u%d sh -c '%s'" % ( payloadUsername, os.environ['USER'], payloadUID, executableFile ) self.log.info( 'CE submission command is: %s' % cmd ) result = shellCall( 0, cmd, callbackFunction = self.sendOutput ) if not result['OK']: result['Value'] = ( 0, '', '' ) return result resultTuple = result['Value'] status = resultTuple[0] stdOutput = resultTuple[1] stdError = resultTuple[2] self.log.info( "Status after the sudo execution is %s" % str( status ) ) if status > 128: error = S_ERROR( status ) error['Value'] = ( status, stdOutput, stdError ) return error return result
def __makeDir( self, path ): # First create a local file that will be used as a directory place holder in storage name space comm = "nsmkdir -m 775 %s" % path res = shellCall( 100, comm ) if not res['OK']: return res returncode, _stdout, stderr = res['Value'] if not returncode in [0]: return S_ERROR( stderr ) return S_OK()
def getLoadAverage(self): """Obtains the load average. """ comm = '/bin/cat /proc/loadavg' loadAvgDict = shellCall( 5, comm ) if loadAvgDict['OK']: return S_OK( float( string.split( loadAvgDict['Value'][1] )[0] ) ) else: self.log.warn( 'Could not obtain load average' ) return S_ERROR( 'Could not obtain load average' )
def getRALFile(self, lfn): """ Use rfcp to get the files from RAL castor """ prependpath = '/castor/ads.rl.ac.uk/prod' if not lfn.count('ads.rl.ac.uk/prod'): lfile = prependpath + lfn else: lfile = lfn self.log.info("Getting %s" % lfile) ###Don't check for CPU time as other wise, job can get killed self.__disableWatchDog() #command = "rfcp %s ./"%file #comm = [] #comm.append("cp $X509_USER_PROXY /tmp/x509up_u%s"%os.getuid()) if 'X509_USER_PROXY' in os.environ: comm2 = ["cp", os.environ['X509_USER_PROXY'],"/tmp/x509up_u%s" % os.getuid()] res = subprocess.Popen(comm2, stdout = subprocess.PIPE).communicate() print res #comm.append("xrdcp root://ccdcacsn179.in2p3.fr:1094%s ./ -s"%file) #command = string.join(comm,";") #logfile = file(self.applicationLog,"w") os.environ['CNS_HOST'] = 'castorns.ads.rl.ac.uk' #comm4= ['declare','-x','CNS_HOST=castorns.ads.rl.ac.uk'] #res = subprocess.Popen(comm4,stdout=logfile,stderr=subprocess.STDOUT) #print res os.environ['STAGE_SVCCLASS'] = 'ilcTape' # comm5= ['declare','-x','STAGE_SVCCLASS=ilcTape'] # res = subprocess.call(comm5) # print res os.environ['STAGE_HOST'] = 'cgenstager.ads.rl.ac.uk' # comm6=['declare','-x','STAGE_HOST=cgenstager.ads.rl.ac.uk'] # res = subprocess.call(comm6) # print res basename = os.path.basename(lfile) if os.path.exists("overlayinput.sh"): os.unlink("overlayinput.sh") with open("overlayinput.sh","w") as script: script.write('#!/bin/sh \n') script.write('###############################\n') script.write('# Dynamically generated scrip #\n') script.write('###############################\n') script.write("/usr/bin/rfcp 'rfio://cgenstager.ads.rl.ac.uk:9002?svcClass=ilcTape&path=%s' %s\n" % (lfile, basename)) script.write('declare -x appstatus=$?\n') script.write('exit $appstatus\n') os.chmod("overlayinput.sh", 0755) comm = 'sh -c "./overlayinput.sh"' self.result = shellCall(600, comm, callbackFunction = self.redirectLogOutput, bufferLimit = 20971520) localfile = os.path.basename(lfile) if os.path.exists(localfile): return S_OK(localfile) return S_ERROR("Failed")
def uploadProxy(self, proxy=False, useDNAsUserName=False): """ Upload a proxy to myproxy service. proxy param can be: : Default -> use current proxy : string -> upload file specified as proxy : X509Chain -> use chain """ retVal = File.multiProxyArgument(proxy) if not retVal["OK"]: return retVal proxyDict = retVal["Value"] chain = proxyDict["chain"] proxyLocation = proxyDict["file"] timeLeft = int(chain.getRemainingSecs()["Value"] / 3600) cmdArgs = ["-n"] cmdArgs.append('-s "%s"' % self._secServer) cmdArgs.append('-c "%s"' % (timeLeft - 1)) cmdArgs.append('-t "%s"' % self._secMaxProxyHours) cmdArgs.append('-C "%s"' % proxyLocation) cmdArgs.append('-y "%s"' % proxyLocation) if useDNAsUserName: cmdArgs.append("-d") else: retVal = self._getUsername(chain) if not retVal["OK"]: File.deleteMultiProxy(proxyDict) return retVal mpUsername = retVal["Value"] cmdArgs.append('-l "%s"' % mpUsername) mpEnv = self._getExternalCmdEnvironment() # Hack to upload properly mpEnv["GT_PROXY_MODE"] = "old" cmd = "myproxy-init %s" % " ".join(cmdArgs) result = shellCall(self._secCmdTimeout, cmd, env=mpEnv) File.deleteMultiProxy(proxyDict) if not result["OK"]: errMsg = "Call to myproxy-init failed: %s" % retVal["Message"] return S_ERROR(errMsg) status, output, error = result["Value"] # Clean-up files if status: errMsg = "Call to myproxy-init failed" extErrMsg = "Command: %s; StdOut: %s; StdErr: %s" % (cmd, result, error) return S_ERROR("%s %s" % (errMsg, extErrMsg)) return S_OK()
def __getDirectorySize(self, path): """ Get the total size of the given directory in bytes """ comm = "du -sb %s" % path result = shellCall(0, comm) if not result['OK'] or result['Value'][0]: return 0 else: output = result['Value'][1] size = int(output.split()[0]) return size