def getCEStatus(self):
        """ Method to return information on running and pending jobs.
    """

        result = S_OK()
        result['SubmittedJobs'] = self.submittedJobs

        ssh = SSH(parameters=self.ceParameters)
        cmd = ["qstat", "-Q", self.execQueue]
        ret = ssh.sshCall(10, cmd)

        if not ret['OK']:
            self.log.error('Timeout', ret['Message'])
            return ret

        status = ret['Value'][0]
        stdout = ret['Value'][1]
        stderr = ret['Value'][2]

        self.log.debug("status:", status)
        self.log.debug("stdout:", stdout)
        self.log.debug("stderr:", stderr)

        if status:
            self.log.error('Failed qstat execution:', stderr)
            return S_ERROR(stderr)

        matched = re.search(
            self.queue +
            "\D+(\d+)\D+(\d+)\W+(\w+)\W+(\w+)\D+(\d+)\D+(\d+)\D+(\d+)\D+(\d+)\D+(\d+)\D+(\d+)\W+(\w+)",
            stdout)

        if matched.groups < 6:
            return S_ERROR("Error retrieving information from qstat:" +
                           stdout + stderr)

        try:
            waitingJobs = int(matched.group(5))
            runningJobs = int(matched.group(6))
        except:
            return S_ERROR("Error retrieving information from qstat:" +
                           stdout + stderr)

        result['WaitingJobs'] = waitingJobs
        result['RunningJobs'] = runningJobs

        self.log.verbose('Waiting Jobs: ', waitingJobs)
        self.log.verbose('Running Jobs: ', runningJobs)

        return result
  def getJobStatus( self, jobIDList ):
    """ Get the status information for the given list of jobs
    """

    resultDict = {}
    ssh = SSH( parameters = self.ceParameters )

    for jobList in breakListIntoChunks( jobIDList, 100 ):
      
      jobDict = {}
      for job in jobList:
        result = pfnparse( job )
        if result['OK']:
          stamp = result['Value']['FileName'].split('.')[0] 
        else:
          self.log.error( 'Invalid job id', job )
          continue  
        jobDict[stamp] = job
      stampList = jobDict.keys() 

      cmd = [ 'qstat', ' '.join( stampList ) ]
      result = ssh.sshCall( 10, cmd )
      if not result['OK']:
        return result
      
      status = result['Value'][0]
      if status == -1:
        return S_ERROR( 'Timeout while SSH call' )
      elif status != 0:
        return S_ERROR( 'Error while SSH call' )
      output = result['Value'][1].replace( '\r', '' )
      lines = output.split( '\n' )
      for job in jobDict:
        resultDict[jobDict[job]] = 'Unknown'
        for line in lines:
          if line.find( job ) != -1:
            if line.find( 'Unknown' ) != -1:
              resultDict[jobDict[job]] = 'Unknown'
            else:
              torqueStatus = line.split()[4]
              if torqueStatus in ['E', 'C']:
                resultDict[jobDict[job]] = 'Done'
              elif torqueStatus in ['R']:
                resultDict[jobDict[job]] = 'Running'
              elif torqueStatus in ['S', 'W', 'Q', 'H', 'T']:
                resultDict[jobDict[job]] = 'Waiting'

    return S_OK( resultDict )
Exemple #3
0
    def getJobStatus(self, jobIDList):
        """ Get the status information for the given list of jobs
    """

        resultDict = {}
        ssh = SSH(parameters=self.ceParameters)

        for jobList in breakListIntoChunks(jobIDList, 100):

            jobDict = {}
            for job in jobList:
                result = pfnparse(job)
                if result['OK']:
                    stamp = result['Value']['FileName'].split('.')[0]
                else:
                    self.log.error('Invalid job id', job)
                    continue
                jobDict[stamp] = job
            stampList = jobDict.keys()

            cmd = ['qstat', ' '.join(stampList)]
            result = ssh.sshCall(10, cmd)
            if not result['OK']:
                return result

            status = result['Value'][0]
            if status == -1:
                return S_ERROR('Timeout while SSH call')
            elif status != 0:
                return S_ERROR('Error while SSH call')
            output = result['Value'][1].replace('\r', '')
            lines = output.split('\n')
            for job in jobDict:
                resultDict[jobDict[job]] = 'Unknown'
                for line in lines:
                    if line.find(job) != -1:
                        if line.find('Unknown') != -1:
                            resultDict[jobDict[job]] = 'Unknown'
                        else:
                            torqueStatus = line.split()[4]
                            if torqueStatus in ['E', 'C']:
                                resultDict[jobDict[job]] = 'Done'
                            elif torqueStatus in ['R']:
                                resultDict[jobDict[job]] = 'Running'
                            elif torqueStatus in ['S', 'W', 'Q', 'H', 'T']:
                                resultDict[jobDict[job]] = 'Waiting'

        return S_OK(resultDict)
  def getCEStatus( self ):
    """ Method to return information on running and pending jobs.
    """

    result = S_OK()
    result['SubmittedJobs'] = self.submittedJobs

    ssh = SSH( parameters = self.ceParameters )
    cmd = ["qstat", "-Q" , self.execQueue ]
    ret = ssh.sshCall( 10, cmd )

    if not ret['OK']:
      self.log.error( 'Timeout', ret['Message'] )
      return ret
    
    status = ret['Value'][0]
    stdout = ret['Value'][1]
    stderr = ret['Value'][2]

    self.log.debug( "status:", status )
    self.log.debug( "stdout:", stdout )
    self.log.debug( "stderr:", stderr )

    if status:
      self.log.error( 'Failed qstat execution:', stderr )
      return S_ERROR( stderr )

    matched = re.search( self.queue + "\D+(\d+)\D+(\d+)\W+(\w+)\W+(\w+)\D+(\d+)\D+(\d+)\D+(\d+)\D+(\d+)\D+(\d+)\D+(\d+)\W+(\w+)", stdout )

    if matched.groups < 6:
      return S_ERROR( "Error retrieving information from qstat:" + stdout + stderr )

    try:
      waitingJobs = int( matched.group( 5 ) )
      runningJobs = int( matched.group( 6 ) )
    except:
      return S_ERROR( "Error retrieving information from qstat:" + stdout + stderr )

    result['WaitingJobs'] = waitingJobs
    result['RunningJobs'] = runningJobs

    self.log.verbose( 'Waiting Jobs: ', waitingJobs )
    self.log.verbose( 'Running Jobs: ', runningJobs )

    return result
    def getCEStatus(self):
        """ Method to return information on running and pending jobs.
    """

        result = S_OK()
        result['SubmittedJobs'] = self.submittedJobs

        ssh = SSH(parameters=self.ceParameters)
        cmd = ["bjobs", "-q", self.execQueue, "-a"]
        ret = ssh.sshCall(100, cmd)

        if not ret['OK']:
            self.log.error('Timeout', ret['Message'])
            return ret

        status = ret['Value'][0]
        stdout = ret['Value'][1]
        stderr = ret['Value'][2]

        self.log.debug("status:", status)
        self.log.debug("stdout:", stdout)
        self.log.debug("stderr:", stderr)

        if status:
            self.log.error('Failed bjobs execution:', stderr)
            return S_ERROR(stderr)

        waitingJobs = 0
        runningJobs = 0
        lines = stdout.split("\n")
        for line in lines:
            if line.count("PEND") or line.count('PSUSP'):
                waitingJobs += 1
            if line.count("RUN") or line.count('USUSP'):
                runningJobs += 1

        result['WaitingJobs'] = waitingJobs
        result['RunningJobs'] = runningJobs

        self.log.verbose('Waiting Jobs: ', waitingJobs)
        self.log.verbose('Running Jobs: ', runningJobs)

        return result
  def getCEStatus( self ):
    """ Method to return information on running and pending jobs.
    """

    result = S_OK()
    result['SubmittedJobs'] = self.submittedJobs

    ssh = SSH( parameters = self.ceParameters )
    cmd = ["bjobs", "-q" , self.execQueue , "-a" ]
    ret = ssh.sshCall( 100, cmd )

    if not ret['OK']:
      self.log.error( 'Timeout', ret['Message'] )
      return ret

    status = ret['Value'][0]
    stdout = ret['Value'][1]
    stderr = ret['Value'][2]

    self.log.debug( "status:", status )
    self.log.debug( "stdout:", stdout )
    self.log.debug( "stderr:", stderr )

    if status:
      self.log.error( 'Failed bjobs execution:', stderr )
      return S_ERROR( stderr )

    waitingJobs = 0
    runningJobs = 0
    lines = stdout.split( "\n" )
    for line in lines:
      if line.count( "PEND" ) or line.count( 'PSUSP' ):
        waitingJobs += 1
      if line.count( "RUN" ) or line.count( 'USUSP' ):
        runningJobs += 1

    result['WaitingJobs'] = waitingJobs
    result['RunningJobs'] = runningJobs

    self.log.verbose( 'Waiting Jobs: ', waitingJobs )
    self.log.verbose( 'Running Jobs: ', runningJobs )

    return result
    def getJobStatus(self, jobIDList):
        """ Get the status information for the given list of jobs
    """

        resultDict = {}
        ssh = SSH(parameters=self.ceParameters)

        for jobList in breakListIntoChunks(jobIDList, 100):
            jobDict = {}
            for job in jobList:
                result = pfnparse(job)
                jobNumber = result['Value']['FileName']
                if jobNumber:
                    jobDict[jobNumber] = job

            jobStamps = jobDict.keys()
            cmd = ['bjobs', ' '.join(jobStamps)]
            result = ssh.sshCall(100, cmd)
            if not result['OK']:
                return result
            output = result['Value'][1].replace('\r', '')
            lines = output.split('\n')
            for job in jobDict:
                resultDict[jobDict[job]] = 'Unknown'
                for line in lines:
                    if line.find(job) != -1:
                        if line.find('UNKWN') != -1:
                            resultDict[jobDict[job]] = 'Unknown'
                        else:
                            lsfStatus = line.split()[2]
                            if lsfStatus in ['DONE', 'EXIT']:
                                resultDict[jobDict[job]] = 'Done'
                            elif lsfStatus in ['RUN', 'SSUSP']:
                                resultDict[jobDict[job]] = 'Running'
                            elif lsfStatus in ['PEND', 'PSUSP']:
                                resultDict[jobDict[job]] = 'Waiting'

        return S_OK(resultDict)
  def getJobStatus( self, jobIDList ):
    """ Get the status information for the given list of jobs
    """

    resultDict = {}
    ssh = SSH( parameters = self.ceParameters )

    for jobList in breakListIntoChunks( jobIDList, 100 ):
      jobDict = {}
      for job in jobList:
        result = pfnparse( job )
        jobNumber = result['Value']['FileName']
        if jobNumber:
          jobDict[jobNumber] = job

      jobStamps = jobDict.keys()
      cmd = [ 'bjobs', ' '.join( jobStamps ) ]
      result = ssh.sshCall( 100, cmd )
      if not result['OK']:
        return result
      output = result['Value'][1].replace( '\r', '' )
      lines = output.split( '\n' )
      for job in jobDict:
        resultDict[jobDict[job]] = 'Unknown'
        for line in lines:
          if line.find( job ) != -1:
            if line.find( 'UNKWN' ) != -1:
              resultDict[jobDict[job]] = 'Unknown'
            else:
              lsfStatus = line.split()[2]
              if lsfStatus in ['DONE', 'EXIT']:
                resultDict[jobDict[job]] = 'Done'
              elif lsfStatus in ['RUN', 'SSUSP']:
                resultDict[jobDict[job]] = 'Running'
              elif lsfStatus in ['PEND', 'PSUSP']:
                resultDict[jobDict[job]] = 'Waiting'

    return S_OK( resultDict )
    def submitJob_old(self, executableFile, proxy, numberOfJobs=1):
        """ Method to submit job
    """

        self.log.verbose("Executable file path: %s" % executableFile)
        if not os.access(executableFile, 5):
            os.chmod(executableFile, 0755)

        # if no proxy is supplied, the executable can be submitted directly
        # otherwise a wrapper script is needed to get the proxy to the execution node
        # The wrapper script makes debugging more complicated and thus it is
        # recommended to transfer a proxy inside the executable if possible.
        if proxy:
            self.log.verbose('Setting up proxy for payload')

            compressedAndEncodedProxy = base64.encodestring(
                bz2.compress(proxy.dumpAllToString()['Value'])).replace(
                    '\n', '')
            compressedAndEncodedExecutable = base64.encodestring(
                bz2.compress(open(executableFile, "rb").read(),
                             9)).replace('\n', '')

            wrapperContent = """#!/usr/bin/env python
# Wrapper script for executable and proxy
import os, tempfile, sys, base64, bz2, shutil
try:
  workingDirectory = tempfile.mkdtemp( suffix = '_wrapper', prefix= 'TORQUE_' )
  os.chdir( workingDirectory )
  open( 'proxy', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedProxy)s" ) ) )
  open( '%(executable)s', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedExecutable)s" ) ) )
  os.chmod('proxy',0600)
  os.chmod('%(executable)s',0700)
  os.environ["X509_USER_PROXY"]=os.path.join(workingDirectory, 'proxy')
except Exception, x:
  print >> sys.stderr, x
  sys.exit(-1)
cmd = "./%(executable)s"
print 'Executing: ', cmd
sys.stdout.flush()
os.system( cmd )

shutil.rmtree( workingDirectory )

""" % { 'compressedAndEncodedProxy': compressedAndEncodedProxy, \
              'compressedAndEncodedExecutable': compressedAndEncodedExecutable, \
              'executable': os.path.basename( executableFile ) }

            fd, name = tempfile.mkstemp(suffix='_wrapper.py',
                                        prefix='TORQUE_',
                                        dir=os.getcwd())
            wrapper = os.fdopen(fd, 'w')
            wrapper.write(wrapperContent)
            wrapper.close()

            submitFile = name

        else:  # no proxy
            submitFile = executableFile

        ssh = SSH(parameters=self.ceParameters)
        # Copy the executable
        os.chmod(submitFile, stat.S_IRUSR | stat.S_IXUSR)
        sFile = os.path.basename(submitFile)
        result = ssh.scpCall(
            10, submitFile,
            '%s/%s' % (self.executableArea, os.path.basename(submitFile)))
        # submit submitFile to the batch system
        cmd = "i=0; while [ $i -lt %(numberOfJobs)d ]; do qsub -o %(output)s -e %(error)s -q %(queue)s -N DIRACPilot %(submitOptions)s %(executable)s; let i=i+1; done; rm -f %(executable)s" % \
          {'numberOfJobs': numberOfJobs, \
           'output': self.batchOutput, \
           'error': self.batchError, \
           'queue': self.queue, \
           'submitOptions': self.submitOptions, \
           'executable': '%s/%s' % ( self.executableArea, os.path.basename( submitFile ) ) }

        self.log.verbose('CE submission command: %s' % (cmd))

        result = ssh.sshCall(10, cmd)

        if not result['OK'] or result['Value'][0] != 0:
            self.log.warn('===========> SSHTorque CE result NOT OK')
            self.log.debug(result)
            return S_ERROR(result['Value'])
        else:
            self.log.debug('Torque CE result OK')

        batchIDList = result['Value'][1].strip().replace('\r', '').split('\n')

        self.submittedJobs += 1

        return S_OK(batchIDList)
  def submitJob_old( self, executableFile, proxy, numberOfJobs = 1 ):
    """ Method to submit job
    """

    self.log.verbose( "Executable file path: %s" % executableFile )
    if not os.access( executableFile, 5 ):
      os.chmod( executableFile, 0755 )

    # if no proxy is supplied, the executable can be submitted directly
    # otherwise a wrapper script is needed to get the proxy to the execution node
    # The wrapper script makes debugging more complicated and thus it is
    # recommended to transfer a proxy inside the executable if possible.
    if proxy:
      self.log.verbose( 'Setting up proxy for payload' )

      compressedAndEncodedProxy = base64.encodestring( bz2.compress( proxy.dumpAllToString()['Value'] ) ).replace( '\n', '' )
      compressedAndEncodedExecutable = base64.encodestring( bz2.compress( open( executableFile, "rb" ).read(), 9 ) ).replace( '\n', '' )

      wrapperContent = """#!/usr/bin/env python
# Wrapper script for executable and proxy
import os, tempfile, sys, base64, bz2, shutil
try:
  workingDirectory = tempfile.mkdtemp( suffix = '_wrapper', prefix= 'TORQUE_' )
  os.chdir( workingDirectory )
  open( 'proxy', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedProxy)s" ) ) )
  open( '%(executable)s', "w" ).write(bz2.decompress( base64.decodestring( "%(compressedAndEncodedExecutable)s" ) ) )
  os.chmod('proxy',0600)
  os.chmod('%(executable)s',0700)
  os.environ["X509_USER_PROXY"]=os.path.join(workingDirectory, 'proxy')
except Exception, x:
  print >> sys.stderr, x
  sys.exit(-1)
cmd = "./%(executable)s"
print 'Executing: ', cmd
sys.stdout.flush()
os.system( cmd )

shutil.rmtree( workingDirectory )

""" % { 'compressedAndEncodedProxy': compressedAndEncodedProxy, \
        'compressedAndEncodedExecutable': compressedAndEncodedExecutable, \
        'executable': os.path.basename( executableFile ) }

      fd, name = tempfile.mkstemp( suffix = '_wrapper.py', prefix = 'TORQUE_', dir = os.getcwd() )
      wrapper = os.fdopen( fd, 'w' )
      wrapper.write( wrapperContent )
      wrapper.close()

      submitFile = name

    else: # no proxy
      submitFile = executableFile

    ssh = SSH( parameters = self.ceParameters )
    # Copy the executable
    os.chmod( submitFile, stat.S_IRUSR | stat.S_IXUSR )
    sFile = os.path.basename( submitFile )
    result = ssh.scpCall( 10, submitFile, '%s/%s' % ( self.executableArea, os.path.basename( submitFile ) ) )
    # submit submitFile to the batch system
    cmd = "i=0; while [ $i -lt %(numberOfJobs)d ]; do qsub -o %(output)s -e %(error)s -q %(queue)s -N DIRACPilot %(submitOptions)s %(executable)s; let i=i+1; done; rm -f %(executable)s" % \
      {'numberOfJobs': numberOfJobs, \
       'output': self.batchOutput, \
       'error': self.batchError, \
       'queue': self.queue, \
       'submitOptions': self.submitOptions, \
       'executable': '%s/%s' % ( self.executableArea, os.path.basename( submitFile ) ) }

    self.log.verbose( 'CE submission command: %s' % ( cmd ) )

    result = ssh.sshCall( 10, cmd )

    if not result['OK'] or result['Value'][0] != 0:
      self.log.warn( '===========> SSHTorque CE result NOT OK' )
      self.log.debug( result )
      return S_ERROR( result['Value'] )
    else:
      self.log.debug( 'Torque CE result OK' )

    batchIDList = result['Value'][1].strip().replace( '\r', '' ).split( '\n' )

    self.submittedJobs += 1

    return S_OK( batchIDList )