Ejemplo n.º 1
0
  def export_killPilot(self, pilotRefList ):
    """ Kill the specified pilots
    """
    # Make a list if it is not yet
    pilotRefs = list( pilotRefList )
    if type( pilotRefList ) in StringTypes:
      pilotRefs = [pilotRefList]
    
    # Regroup pilots per site and per owner
    pilotRefDict = {}
    for pilotReference in pilotRefs:
      result = pilotDB.getPilotInfo(pilotReference)
      if not result['OK'] or not result[ 'Value' ]:
        return S_ERROR('Failed to get info for pilot ' + pilotReference)
  
      pilotDict = result['Value'][pilotReference]
      owner = pilotDict['OwnerDN']
      group = pilotDict['OwnerGroup']
      queue = '@@@'.join( [owner, group, pilotDict['GridSite'], pilotDict['DestinationSite'], pilotDict['Queue']] )
      gridType = pilotDict['GridType']
      pilotRefDict.setdefault( queue, {} )
      pilotRefDict[queue].setdefault( 'PilotList', [] )
      pilotRefDict[queue]['PilotList'].append( pilotReference )
      pilotRefDict[queue]['GridType'] = gridType
      
    # Do the work now queue by queue  
    ceFactory = ComputingElementFactory()
    failed = []
    for key, pilotDict in pilotRefDict.items():
      
      owner,group,site,ce,queue = key.split( '@@@' )
      result = getQueue( site, ce, queue )
      if not result['OK']:
        return result
      queueDict = result['Value']
      gridType = pilotDict['GridType']
      result = ceFactory.getCE( gridType, ce, queueDict )
      if not result['OK']:
        return result
      ce = result['Value']
  
      if gridType in ["LCG","gLite","CREAM"]:
        group = getGroupOption(group,'VOMSRole',group)
        ret = gProxyManager.getPilotProxyFromVOMSGroup( owner, group )
        if not ret['OK']:
          gLogger.error( ret['Message'] )
          gLogger.error( 'Could not get proxy:', 'User "%s", Group "%s"' % ( owner, group ) )
          return S_ERROR("Failed to get the pilot's owner proxy")
        proxy = ret['Value']
        ce.setProxy( proxy )

      pilotList = pilotDict['PilotList']
      result = ce.killJob( pilotList )
      if not result['OK']:
        failed.extend( pilotList )
      
    if failed:
      return S_ERROR('Failed to kill at least some pilots')
    
    return S_OK()  
Ejemplo n.º 2
0
    def export_getPilotLoggingInfo(self, pilotReference):
        """ Get the pilot logging info for the Grid job reference
    """

        result = pilotDB.getPilotInfo(pilotReference)
        if not result['OK'] or not result['Value']:
            return S_ERROR('Failed to determine owner for pilot ' +
                           pilotReference)

        pilotDict = result['Value'][pilotReference]
        owner = pilotDict['OwnerDN']
        group = pilotDict['OwnerGroup']

        group = getGroupOption(group, 'VOMSRole', group)
        ret = gProxyManager.getPilotProxyFromVOMSGroup(owner, group)
        if not ret['OK']:
            gLogger.error(ret['Message'])
            gLogger.error('Could not get proxy:',
                          'User "%s", Group "%s"' % (owner, group))
            return S_ERROR("Failed to get the pilot's owner proxy")
        proxy = ret['Value']

        gridType = pilotDict['GridType']

        return getPilotLoggingInfo(proxy, gridType, pilotReference)
Ejemplo n.º 3
0
  def __getGridJobOutput(self,pilotReference):
    """ Get the pilot job standard output and standard error files for the Grid
        job reference
    """

    result = pilotDB.getPilotInfo(pilotReference)
    if not result['OK'] or not result[ 'Value' ]:
      return S_ERROR('Failed to get info for pilot ' + pilotReference)

    pilotDict = result['Value'][pilotReference]
    owner = pilotDict['OwnerDN']
    group = pilotDict['OwnerGroup']

    # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files?
    result = pilotDB.getPilotOutput(pilotReference)
    if result['OK']:
      stdout = result['Value']['StdOut']
      error = result['Value']['StdErr']
      if stdout or error:
        resultDict = {}
        resultDict['StdOut'] = stdout
        resultDict['StdErr'] = error
        resultDict['OwnerDN'] = owner
        resultDict['OwnerGroup'] = group
        resultDict['FileList'] = []
        return S_OK(resultDict)
      else:
        return S_ERROR('Empty pilot output found')

    gridType = pilotDict['GridType']
    if gridType in ["LCG","gLite","CREAM"]:
      group = getGroupOption(group,'VOMSRole',group)
      ret = gProxyManager.getPilotProxyFromVOMSGroup( owner, group )
      if not ret['OK']:
        gLogger.error( ret['Message'] )
        gLogger.error( 'Could not get proxy:', 'User "%s", Group "%s"' % ( owner, group ) )
        return S_ERROR("Failed to get the pilot's owner proxy")
      proxy = ret['Value']
 
      pilotStamp = pilotDict['PilotStamp'] 
      result = getPilotOutput( proxy, gridType, pilotReference, pilotStamp )
      if not result['OK']:
        return S_ERROR('Failed to get pilot output: '+result['Message'])
      # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files?
      stdout = result['StdOut']
      error = result['StdErr']
      fileList = result['FileList']
      result = pilotDB.storePilotOutput(pilotReference,stdout,error)
      if not result['OK']:
        gLogger.error('Failed to store pilot output:',result['Message'])
  
      resultDict = {}
      resultDict['StdOut'] = stdout
      resultDict['StdErr'] = error
      resultDict['OwnerDN'] = owner
      resultDict['OwnerGroup'] = group
      resultDict['FileList'] = fileList
      return S_OK(resultDict)
    else:
      return S_ERROR('Can not retrieve pilot output for the Grid %s ' % gridType)
Ejemplo n.º 4
0
def getPilotProxy(pilotDict):
    """Get a proxy bound to a pilot"""
    owner = pilotDict["OwnerDN"]
    group = pilotDict["OwnerGroup"]

    groupVOMS = getGroupOption(group, "VOMSRole", group)
    result = gProxyManager.getPilotProxyFromVOMSGroup(owner, groupVOMS)
    if not result["OK"]:
        gLogger.error("Could not get proxy:", 'User "%s" Group "%s" : %s' % (owner, groupVOMS, result["Message"]))
        return S_ERROR("Failed to get the pilot's owner proxy")
    proxy = result["Value"]
    return S_OK(proxy)
Ejemplo n.º 5
0
def killPilotsInQueues(pilotRefDict):
    """kill pilots queue by queue

    :params dict pilotRefDict: a dict of pilots in queues
    """

    ceFactory = ComputingElementFactory()
    failed = []
    for key, pilotDict in pilotRefDict.items():

        owner, group, site, ce, queue = key.split("@@@")
        result = getQueue(site, ce, queue)
        if not result["OK"]:
            return result
        queueDict = result["Value"]
        gridType = pilotDict["GridType"]
        result = ceFactory.getCE(gridType, ce, queueDict)
        if not result["OK"]:
            return result
        ce = result["Value"]

        # FIXME: quite hacky. Should be either removed, or based on some flag
        if gridType in ["CREAM", "ARC", "Globus", "HTCondorCE"]:
            group = getGroupOption(group, "VOMSRole", group)
            ret = gProxyManager.getPilotProxyFromVOMSGroup(owner, group)
            if not ret["OK"]:
                gLogger.error(
                    "Could not get proxy:", 'User "%s" Group "%s" : %s' %
                    (owner, group, ret["Message"]))
                return S_ERROR("Failed to get the pilot's owner proxy")
            proxy = ret["Value"]
            ce.setProxy(proxy)

        pilotList = pilotDict["PilotList"]
        result = ce.killJob(pilotList)
        if not result["OK"]:
            failed.extend(pilotList)

    return failed
Ejemplo n.º 6
0
  def export_getPilotLoggingInfo(self,pilotReference):
    """ Get the pilot logging info for the Grid job reference
    """

    result = pilotDB.getPilotInfo(pilotReference)
    if not result['OK'] or not result[ 'Value' ]:
      return S_ERROR('Failed to determine owner for pilot ' + pilotReference)

    pilotDict = result['Value'][pilotReference]
    owner = pilotDict['OwnerDN']
    group = pilotDict['OwnerGroup']

    ret = gProxyManager.getPilotProxyFromVOMSGroup( owner, group )
    if not ret['OK']:
      gLogger.error( ret['Message'] )
      gLogger.error( 'Could not get proxy:', 'User "%s", Group "%s"' % ( owner, group ) )
      return S_ERROR("Failed to get the pilot's owner proxy")
    proxy = ret['Value']

    gridType = pilotDict['GridType']

    return getPilotLoggingInfo( proxy, gridType, pilotReference )
Ejemplo n.º 7
0
  def __getGridJobOutput(self,pilotReference):
    """ Get the pilot job standard output and standard error files for the Grid
        job reference
    """

    result = pilotDB.getPilotInfo(pilotReference)
    if not result['OK'] or not result[ 'Value' ]:
      return S_ERROR('Failed to get info for pilot ' + pilotReference)

    pilotDict = result['Value'][pilotReference]
    owner = pilotDict['OwnerDN']
    group = pilotDict['OwnerGroup']

    # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files?
    result = pilotDB.getPilotOutput(pilotReference)
    if result['OK']:
      stdout = result['Value']['StdOut']
      error = result['Value']['StdErr']
      if stdout or error:
        resultDict = {}
        resultDict['StdOut'] = stdout
        resultDict['StdErr'] = error
        resultDict['OwnerDN'] = owner
        resultDict['OwnerGroup'] = group
        resultDict['FileList'] = []
        return S_OK(resultDict)
      else:
        gLogger.warn( 'Empty pilot output found for %s' % pilotReference )

    gridType = pilotDict['GridType']
    if gridType in ["LCG","gLite","CREAM"]:
      group = getGroupOption(group,'VOMSRole',group)
      ret = gProxyManager.getPilotProxyFromVOMSGroup( owner, group )
      if not ret['OK']:
        gLogger.error( ret['Message'] )
        gLogger.error( 'Could not get proxy:', 'User "%s", Group "%s"' % ( owner, group ) )
        return S_ERROR("Failed to get the pilot's owner proxy")
      proxy = ret['Value']

      pilotStamp = pilotDict['PilotStamp']
      result = getPilotOutput( proxy, gridType, pilotReference, pilotStamp )
      if not result['OK']:
        return S_ERROR('Failed to get pilot output: '+result['Message'])
      # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files?
      stdout = result['StdOut']
      error = result['StdErr']
      fileList = result['FileList']
      if stdout:
        result = pilotDB.storePilotOutput(pilotReference,stdout,error)
        if not result['OK']:
          gLogger.error('Failed to store pilot output:',result['Message'])

      resultDict = {}
      resultDict['StdOut'] = stdout
      resultDict['StdErr'] = error
      resultDict['OwnerDN'] = owner
      resultDict['OwnerGroup'] = group
      resultDict['FileList'] = fileList
      return S_OK(resultDict)
    else:
      # Instantiate the appropriate CE
      ceFactory = ComputingElementFactory()
      result = getQueue( pilotDict['GridSite'], pilotDict['DestinationSite'], pilotDict['Queue'] )
      if not result['OK']:
        return result
      queueDict = result['Value']
      result = ceFactory.getCE( gridType, pilotDict['DestinationSite'], queueDict )
      if not result['OK']:
        return result
      ce = result['Value']
      pilotStamp = pilotDict['PilotStamp']
      pRef = pilotReference
      if pilotStamp:
        pRef = pRef + ':::' + pilotStamp
      result = ce.getJobOutput( pRef )
      if not result['OK']:
        return result
      stdout,error = result['Value']
      if stdout:
        result = pilotDB.storePilotOutput(pilotReference,stdout,error)
        if not result['OK']:
          gLogger.error('Failed to store pilot output:',result['Message'])

      resultDict = {}
      resultDict['StdOut'] = stdout
      resultDict['StdErr'] = error
      resultDict['OwnerDN'] = owner
      resultDict['OwnerGroup'] = group
      resultDict['FileList'] = []
      return S_OK( resultDict )
Ejemplo n.º 8
0
    def export_killPilot(self, pilotRefList):
        """ Kill the specified pilots
    """
        # Make a list if it is not yet
        pilotRefs = list(pilotRefList)
        if type(pilotRefList) in StringTypes:
            pilotRefs = [pilotRefList]

        # Regroup pilots per site and per owner
        pilotRefDict = {}
        for pilotReference in pilotRefs:
            result = pilotDB.getPilotInfo(pilotReference)
            if not result['OK'] or not result['Value']:
                return S_ERROR('Failed to get info for pilot ' +
                               pilotReference)

            pilotDict = result['Value'][pilotReference]
            owner = pilotDict['OwnerDN']
            group = pilotDict['OwnerGroup']
            queue = '@@@'.join([
                owner, group, pilotDict['GridSite'],
                pilotDict['DestinationSite'], pilotDict['Queue']
            ])
            gridType = pilotDict['GridType']
            pilotRefDict.setdefault(queue, {})
            pilotRefDict[queue].setdefault('PilotList', [])
            pilotRefDict[queue]['PilotList'].append(pilotReference)
            pilotRefDict[queue]['GridType'] = gridType

        # Do the work now queue by queue
        ceFactory = ComputingElementFactory()
        failed = []
        for key, pilotDict in pilotRefDict.items():

            owner, group, site, ce, queue = key.split('@@@')
            result = getQueue(site, ce, queue)
            if not result['OK']:
                return result
            queueDict = result['Value']
            gridType = pilotDict['GridType']
            result = ceFactory.getCE(gridType, ce, queueDict)
            if not result['OK']:
                return result
            ce = result['Value']

            if gridType in ["LCG", "gLite", "CREAM"]:
                group = getGroupOption(group, 'VOMSRole', group)
                ret = gProxyManager.getPilotProxyFromVOMSGroup(owner, group)
                if not ret['OK']:
                    gLogger.error(ret['Message'])
                    gLogger.error('Could not get proxy:',
                                  'User "%s", Group "%s"' % (owner, group))
                    return S_ERROR("Failed to get the pilot's owner proxy")
                proxy = ret['Value']
                ce.setProxy(proxy)

            pilotList = pilotDict['PilotList']
            result = ce.killJob(pilotList)
            if not result['OK']:
                failed.extend(pilotList)

        if failed:
            return S_ERROR('Failed to kill at least some pilots')

        return S_OK()
Ejemplo n.º 9
0
    def __getGridJobOutput(self, pilotReference):
        """ Get the pilot job standard output and standard error files for the Grid
        job reference
    """

        result = pilotDB.getPilotInfo(pilotReference)
        if not result['OK'] or not result['Value']:
            return S_ERROR('Failed to get info for pilot ' + pilotReference)

        pilotDict = result['Value'][pilotReference]
        owner = pilotDict['OwnerDN']
        group = pilotDict['OwnerGroup']

        # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files?
        result = pilotDB.getPilotOutput(pilotReference)
        if result['OK']:
            stdout = result['Value']['StdOut']
            error = result['Value']['StdErr']
            if stdout or error:
                resultDict = {}
                resultDict['StdOut'] = stdout
                resultDict['StdErr'] = error
                resultDict['OwnerDN'] = owner
                resultDict['OwnerGroup'] = group
                resultDict['FileList'] = []
                return S_OK(resultDict)
            else:
                gLogger.warn('Empty pilot output found for %s' %
                             pilotReference)

        gridType = pilotDict['GridType']
        if gridType in ["LCG", "gLite", "CREAM"]:
            group = getGroupOption(group, 'VOMSRole', group)
            ret = gProxyManager.getPilotProxyFromVOMSGroup(owner, group)
            if not ret['OK']:
                gLogger.error(ret['Message'])
                gLogger.error('Could not get proxy:',
                              'User "%s", Group "%s"' % (owner, group))
                return S_ERROR("Failed to get the pilot's owner proxy")
            proxy = ret['Value']

            pilotStamp = pilotDict['PilotStamp']
            result = getPilotOutput(proxy, gridType, pilotReference,
                                    pilotStamp)
            if not result['OK']:
                return S_ERROR('Failed to get pilot output: ' +
                               result['Message'])
            # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files?
            stdout = result['StdOut']
            error = result['StdErr']
            fileList = result['FileList']
            if stdout:
                result = pilotDB.storePilotOutput(pilotReference, stdout,
                                                  error)
                if not result['OK']:
                    gLogger.error('Failed to store pilot output:',
                                  result['Message'])

            resultDict = {}
            resultDict['StdOut'] = stdout
            resultDict['StdErr'] = error
            resultDict['OwnerDN'] = owner
            resultDict['OwnerGroup'] = group
            resultDict['FileList'] = fileList
            return S_OK(resultDict)
        else:
            # Instantiate the appropriate CE
            ceFactory = ComputingElementFactory()
            result = getQueue(pilotDict['GridSite'],
                              pilotDict['DestinationSite'], pilotDict['Queue'])
            if not result['OK']:
                return result
            queueDict = result['Value']
            result = ceFactory.getCE(gridType, pilotDict['DestinationSite'],
                                     queueDict)
            if not result['OK']:
                return result
            ce = result['Value']
            pilotStamp = pilotDict['PilotStamp']
            pRef = pilotReference
            if pilotStamp:
                pRef = pRef + ':::' + pilotStamp
            result = ce.getJobOutput(pRef)
            if not result['OK']:
                return result
            stdout, error = result['Value']
            if stdout:
                result = pilotDB.storePilotOutput(pilotReference, stdout,
                                                  error)
                if not result['OK']:
                    gLogger.error('Failed to store pilot output:',
                                  result['Message'])

            resultDict = {}
            resultDict['StdOut'] = stdout
            resultDict['StdErr'] = error
            resultDict['OwnerDN'] = owner
            resultDict['OwnerGroup'] = group
            resultDict['FileList'] = []
            return S_OK(resultDict)
Ejemplo n.º 10
0
  def getJobOutput( self, jobID, _localDir = None ):
    """ Get the specified job standard output and error files. The output is returned
        as strings. 
    """

    if jobID.find( ':::' ) != -1:
      pilotRef, stamp = jobID.split( ':::' )
    else:
      pilotRef = jobID
      stamp = ''
    if not stamp:
      return S_ERROR( 'Pilot stamp not defined for %s' % pilotRef )

    ## somehow when this is called from the WMSAdministrator we don't
    ## get the right proxy, so we do all this stuff here now. Probably
    ## should be fixed in the WMSAdministrator? 

    ## Because this function is called from the WMSAdminsitrator, the
    ## gridEnv that is picked up is not the one from the SiteDirector
    ## Definition, but from Computing/CEDefaults
    result = PilotAgentsDB().getPilotInfo(pilotRef)
    if not result['OK'] or not result[ 'Value' ]:
      return S_ERROR('Failed to determine owner for pilot ' + pilotRef)
    pilotDict = result['Value'][pilotRef]
    owner = pilotDict['OwnerDN']
    group = getGroupOption(pilotDict['OwnerGroup'],'VOMSRole',pilotDict['OwnerGroup'])
    ret = gProxyManager.getPilotProxyFromVOMSGroup( owner, group )
    if not ret['OK']:
      self.log.error( ret['Message'] )
      self.log.error( 'Could not get proxy:', 'User "%s", Group "%s"' % ( owner, group ) )
      return S_ERROR("Failed to get the pilot's owner proxy")
    self.proxy = ret['Value']


    self.log.verbose("Getting output for: %s " % pilotRef)
    cmd = ['globus-job-get-output', '-out', pilotRef ]
    result = executeGridCommand( self.proxy, cmd, self.gridEnv )
    output = ''
    if result['OK']:
      if not result['Value'][0]:
        output = result['Value'][1]
      elif result['Value'][0] == 1 and "No such file or directory" in result['Value'][2]:
        output = "Standard Output is not available on the Globus service"
      else:
        error = '\n'.join( result['Value'][1:] )
        return S_ERROR( error )
    else:
      return S_ERROR( 'Failed to retrieve output for %s' % jobID )


    cmd = ['globus-job-get-output', '-err', pilotRef ]
    result = executeGridCommand( self.proxy, cmd, self.gridEnv )
    error = ''
    if result['OK']:
      if not result['Value'][0]:
        error = result['Value'][1]
      elif result['Value'][0] == 1 and "No such file or directory" in result['Value'][2]:
        error = "Standard Error is not available on the Globus service"
      else:
        error = '\n'.join( result['Value'][1:] )
        return S_ERROR( error )
    else:
      return S_ERROR( 'Failed to retrieve error for %s' % jobID )

    return S_OK( ( output, error ) )
Ejemplo n.º 11
0
  def getJobOutput(self, jobID, _localDir=None):
    """ Get the specified job standard output and error files. The output is returned
        as strings.
    """

    if jobID.find(':::') != -1:
      pilotRef, stamp = jobID.split(':::')
    else:
      pilotRef = jobID
      stamp = ''
    if not stamp:
      return S_ERROR('Pilot stamp not defined for %s' % pilotRef)

    # somehow when this is called from the WMSAdministrator we don't
    # get the right proxy, so we do all this stuff here now. Probably
    # should be fixed in the WMSAdministrator?

    # Because this function is called from the WMSAdminsitrator, the
    # gridEnv that is picked up is not the one from the SiteDirector
    # Definition, but from Computing/CEDefaults
    result = PilotAgentsDB().getPilotInfo(pilotRef)
    if not result['OK'] or not result['Value']:
      return S_ERROR('Failed to determine owner for pilot ' + pilotRef)
    pilotDict = result['Value'][pilotRef]
    owner = pilotDict['OwnerDN']
    group = getGroupOption(pilotDict['OwnerGroup'], 'VOMSRole', pilotDict['OwnerGroup'])
    ret = gProxyManager.getPilotProxyFromVOMSGroup(owner, group)
    if not ret['OK']:
      self.log.error(ret['Message'])
      self.log.error('Could not get proxy:', 'User "%s", Group "%s"' % (owner, group))
      return S_ERROR("Failed to get the pilot's owner proxy")
    self.proxy = ret['Value']

    self.log.verbose("Getting output for: %s " % pilotRef)
    cmd = ['globus-job-get-output', '-out', pilotRef]
    result = executeGridCommand(self.proxy, cmd, self.gridEnv)
    output = ''
    if result['OK']:
      if not result['Value'][0]:
        output = result['Value'][1]
      elif result['Value'][0] == 1 and "No such file or directory" in result['Value'][2]:
        output = "Standard Output is not available on the Globus service"
      else:
        error = '\n'.join(result['Value'][1:])
        return S_ERROR(error)
    else:
      return S_ERROR('Failed to retrieve output for %s' % jobID)

    cmd = ['globus-job-get-output', '-err', pilotRef]
    result = executeGridCommand(self.proxy, cmd, self.gridEnv)
    error = ''
    if result['OK']:
      if not result['Value'][0]:
        error = result['Value'][1]
      elif result['Value'][0] == 1 and "No such file or directory" in result['Value'][2]:
        error = "Standard Error is not available on the Globus service"
      else:
        error = '\n'.join(result['Value'][1:])
        return S_ERROR(error)
    else:
      return S_ERROR('Failed to retrieve error for %s' % jobID)

    return S_OK((output, error))
Ejemplo n.º 12
0
def getGridJobOutput(pilotReference):
    """ Get the pilot job standard output and standard error files for the Grid job reference

      :param str pilotReference: a grid (job) pilot reference
  """

    result = pilotAgentsDB.getPilotInfo(pilotReference)
    if not result['OK'] or not result['Value']:
        return S_ERROR('Failed to get info for pilot ' + pilotReference)

    pilotDict = result['Value'][pilotReference]
    owner = pilotDict['OwnerDN']
    group = pilotDict['OwnerGroup']

    # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files?
    result = pilotAgentsDB.getPilotOutput(pilotReference)
    if result['OK']:
        stdout = result['Value']['StdOut']
        error = result['Value']['StdErr']
        if stdout or error:
            resultDict = {}
            resultDict['StdOut'] = stdout
            resultDict['StdErr'] = error
            resultDict['OwnerDN'] = owner
            resultDict['OwnerGroup'] = group
            resultDict['FileList'] = []
            return S_OK(resultDict)
        else:
            gLogger.warn('Empty pilot output found', 'for %s' % pilotReference)

    # Instantiate the appropriate CE
    ceFactory = ComputingElementFactory()
    result = getQueue(pilotDict['GridSite'], pilotDict['DestinationSite'],
                      pilotDict['Queue'])
    if not result['OK']:
        return result
    queueDict = result['Value']
    gridEnv = getGridEnv()
    queueDict['GridEnv'] = gridEnv
    queueDict['WorkingDirectory'] = mkdtemp()
    result = ceFactory.getCE(pilotDict['GridType'],
                             pilotDict['DestinationSite'], queueDict)
    if not result['OK']:
        shutil.rmtree(queueDict['WorkingDirectory'])
        return result
    ce = result['Value']
    groupVOMS = getGroupOption(group, 'VOMSRole', group)
    result = gProxyManager.getPilotProxyFromVOMSGroup(owner, groupVOMS)
    if not result['OK']:
        gLogger.error(
            'Could not get proxy:', 'User "%s" Group "%s" : %s' %
            (owner, groupVOMS, result['Message']))
        return S_ERROR("Failed to get the pilot's owner proxy")
    proxy = result['Value']
    ce.setProxy(proxy)
    pilotStamp = pilotDict['PilotStamp']
    pRef = pilotReference
    if pilotStamp:
        pRef = pRef + ':::' + pilotStamp
    result = ce.getJobOutput(pRef)
    if not result['OK']:
        shutil.rmtree(queueDict['WorkingDirectory'])
        return result
    stdout, error = result['Value']
    if stdout:
        result = pilotAgentsDB.storePilotOutput(pilotReference, stdout, error)
        if not result['OK']:
            gLogger.error('Failed to store pilot output:', result['Message'])

    resultDict = {}
    resultDict['StdOut'] = stdout
    resultDict['StdErr'] = error
    resultDict['OwnerDN'] = owner
    resultDict['OwnerGroup'] = group
    resultDict['FileList'] = []
    shutil.rmtree(queueDict['WorkingDirectory'])
    return S_OK(resultDict)
Ejemplo n.º 13
0
    def execute(self):
        """The PilotAgent execution method.
    """

        self.pilotStalledDays = self.am_getOption('PilotStalledDays', 3)
        self.gridEnv = self.am_getOption('GridEnv')
        if not self.gridEnv:
            # No specific option found, try a general one
            setup = gConfig.getValue('/DIRAC/Setup', '')
            if setup:
                instance = gConfig.getValue(
                    '/DIRAC/Setups/%s/WorkloadManagement' % setup, '')
                if instance:
                    self.gridEnv = gConfig.getValue(
                        '/Systems/WorkloadManagement/%s/GridEnv' % instance,
                        '')
        result = self.pilotDB._getConnection()
        if result['OK']:
            connection = result['Value']
        else:
            return result

        result = self.pilotDB.getPilotGroups(self.identityFieldsList,
                                             {'Status': self.queryStateList})
        if not result['OK']:
            self.log.error('Fail to get identities Groups', result['Message'])
            return result
        if not result['Value']:
            return S_OK()

        pilotsToAccount = {}

        for ownerDN, ownerGroup, gridType, broker in result['Value']:

            if not gridType in self.eligibleGridTypes:
                continue

            self.log.verbose('Getting pilots for %s:%s @ %s %s' %
                             (ownerDN, ownerGroup, gridType, broker))

            condDict1 = {
                'Status': 'Done',
                'StatusReason': 'Report from JobAgent',
                'OwnerDN': ownerDN,
                'OwnerGroup': ownerGroup,
                'GridType': gridType,
                'Broker': broker
            }

            condDict2 = {
                'Status': self.queryStateList,
                'OwnerDN': ownerDN,
                'OwnerGroup': ownerGroup,
                'GridType': gridType,
                'Broker': broker
            }

            for condDict in [condDict1, condDict2]:
                result = self.clearWaitingPilots(condDict)
                if not result['OK']:
                    self.log.warn('Failed to clear Waiting Pilot Jobs')

                result = self.pilotDB.selectPilots(condDict)
                if not result['OK']:
                    self.log.warn('Failed to get the Pilot Agents')
                    return result
                if not result['Value']:
                    continue
                refList = result['Value']

                ret = gProxyManager.getPilotProxyFromVOMSGroup(
                    ownerDN, ownerGroup)
                if not ret['OK']:
                    self.log.error(ret['Message'])
                    self.log.error(
                        'Could not get proxy:',
                        'User "%s", Group "%s"' % (ownerDN, ownerGroup))
                    continue
                proxy = ret['Value']

                self.log.verbose(
                    "Getting status for %s pilots for owner %s and group %s" %
                    (len(refList), ownerDN, ownerGroup))

                for start_index in range(0, len(refList), MAX_JOBS_QUERY):
                    refsToQuery = refList[start_index:start_index +
                                          MAX_JOBS_QUERY]
                    self.log.verbose(
                        'Querying %d pilots of %s starting at %d' %
                        (len(refsToQuery), len(refList), start_index))
                    result = self.getPilotStatus(proxy, gridType, refsToQuery)
                    if not result['OK']:
                        if result['Message'] == 'Broker not Available':
                            self.log.error('Broker %s not Available' % broker)
                            break
                        self.log.warn('Failed to get pilot status:')
                        self.log.warn('%s:%s @ %s' %
                                      (ownerDN, ownerGroup, gridType))
                        continue

                    statusDict = result['Value']
                    for pRef in statusDict:
                        pDict = statusDict[pRef]
                        if pDict:
                            if pDict['isParent']:
                                self.log.verbose('Clear parametric parent %s' %
                                                 pRef)
                                result = self.clearParentJob(
                                    pRef, pDict, connection)
                                if not result['OK']:
                                    self.log.warn(result['Message'])
                                else:
                                    self.log.info(
                                        'Parameteric parent removed: %s' %
                                        pRef)
                            if pDict['FinalStatus']:
                                self.log.verbose(
                                    'Marking Status for %s to %s' %
                                    (pRef, pDict['Status']))
                                pilotsToAccount[pRef] = pDict
                            else:
                                self.log.verbose(
                                    'Setting Status for %s to %s' %
                                    (pRef, pDict['Status']))
                                result = self.pilotDB.setPilotStatus(
                                    pRef,
                                    pDict['Status'],
                                    pDict['DestinationSite'],
                                    updateTime=pDict['StatusDate'],
                                    conn=connection)

                    if len(pilotsToAccount) > 100:
                        self.accountPilots(pilotsToAccount, connection)
                        pilotsToAccount = {}

        self.accountPilots(pilotsToAccount, connection)
        # Now handle pilots not updated in the last N days (most likely the Broker is no
        # longer available) and declare them Deleted.
        result = self.handleOldPilots(connection)

        connection.close()

        return S_OK()
Ejemplo n.º 14
0
  def execute( self ):
    """The PilotAgent execution method.
    """

    self.pilotStalledDays = self.am_getOption( 'PilotStalledDays', 3 )
    self.gridEnv = self.am_getOption( 'GridEnv' )
    if not self.gridEnv:
      # No specific option found, try a general one
      setup = gConfig.getValue( '/DIRAC/Setup', '' )
      if setup:
        instance = gConfig.getValue( '/DIRAC/Setups/%s/WorkloadManagement' % setup, '' )
        if instance:
          self.gridEnv = gConfig.getValue( '/Systems/WorkloadManagement/%s/GridEnv' % instance, '' )
    result = self.pilotDB._getConnection()
    if result['OK']:
      connection = result['Value']
    else:
      return result

    result = self.pilotDB.getPilotGroups( self.identityFieldsList,
                                         {'Status': self.queryStateList } )
    if not result['OK']:
      self.log.error( 'Fail to get identities Groups', result['Message'] )
      return result
    if not result['Value']:
      return S_OK()

    pilotsToAccount = {}

    for ownerDN, ownerGroup, gridType, broker in result['Value']:

      if not gridType in self.eligibleGridTypes:
        continue

      self.log.verbose( 'Getting pilots for %s:%s @ %s %s' % ( ownerDN, ownerGroup, gridType, broker ) )

      condDict1 = {'Status':'Done',
                   'StatusReason':'Report from JobAgent',
                   'OwnerDN':ownerDN,
                   'OwnerGroup':ownerGroup,
                   'GridType':gridType,
                   'Broker':broker}

      condDict2 = {'Status':self.queryStateList,
                   'OwnerDN':ownerDN,
                   'OwnerGroup':ownerGroup,
                   'GridType':gridType,
                   'Broker':broker}

      for condDict in [ condDict1, condDict2]:
        result = self.clearWaitingPilots( condDict )
        if not result['OK']:
          self.log.warn( 'Failed to clear Waiting Pilot Jobs' )

        result = self.pilotDB.selectPilots( condDict )
        if not result['OK']:
          self.log.warn( 'Failed to get the Pilot Agents' )
          return result
        if not result['Value']:
          continue
        refList = result['Value']

        ret = gProxyManager.getPilotProxyFromVOMSGroup( ownerDN, ownerGroup )
        if not ret['OK']:
          self.log.error( ret['Message'] )
          self.log.error( 'Could not get proxy:', 'User "%s", Group "%s"' % ( ownerDN, ownerGroup ) )
          continue
        proxy = ret['Value']

        self.log.verbose( "Getting status for %s pilots for owner %s and group %s" % ( len( refList ),
                                                                                      ownerDN, ownerGroup ) )

        for start_index in range( 0, len( refList ), MAX_JOBS_QUERY ):
          refsToQuery = refList[ start_index : start_index + MAX_JOBS_QUERY ]
          self.log.verbose( 'Querying %d pilots of %s starting at %d' %
                            ( len( refsToQuery ), len( refList ), start_index ) )
          result = self.getPilotStatus( proxy, gridType, refsToQuery )
          if not result['OK']:
            if result['Message'] == 'Broker not Available':
              self.log.error( 'Broker %s not Available' % broker )
              break
            self.log.warn( 'Failed to get pilot status:' )
            self.log.warn( '%s:%s @ %s' % ( ownerDN, ownerGroup, gridType ) )
            continue

          statusDict = result[ 'Value' ]
          for pRef in statusDict:
            pDict = statusDict[ pRef ]
            if pDict:
              if pDict['isParent']:
                self.log.verbose( 'Clear parametric parent %s' % pRef )
                result = self.clearParentJob( pRef, pDict, connection )
                if not result['OK']:
                  self.log.warn( result['Message'] )
                else:
                  self.log.info( 'Parametric parent removed: %s' % pRef )
              if pDict[ 'FinalStatus' ]:
                self.log.verbose( 'Marking Status for %s to %s' % ( pRef, pDict['Status'] ) )
                pilotsToAccount[ pRef ] = pDict
              else:
                self.log.verbose( 'Setting Status for %s to %s' % ( pRef, pDict['Status'] ) )
                result = self.pilotDB.setPilotStatus( pRef,
                                                      pDict['Status'],
                                                      pDict['DestinationSite'],
                                                      updateTime = pDict['StatusDate'],
                                                      conn = connection )

          if len( pilotsToAccount ) > 100:
            self.accountPilots( pilotsToAccount, connection )
            pilotsToAccount = {}

    self.accountPilots( pilotsToAccount, connection )
    # Now handle pilots not updated in the last N days (most likely the Broker is no 
    # longer available) and declare them Deleted.
    result = self.handleOldPilots( connection )

    connection.close()

    return S_OK()
Ejemplo n.º 15
0
    def __getGridJobOutput(self, pilotReference):
        """ Get the pilot job standard output and standard error files for the Grid
        job reference
    """

        result = pilotDB.getPilotInfo(pilotReference)
        if not result['OK'] or not result['Value']:
            return S_ERROR('Failed to get info for pilot ' + pilotReference)

        pilotDict = result['Value'][pilotReference]
        owner = pilotDict['OwnerDN']
        group = pilotDict['OwnerGroup']

        # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files?
        result = pilotDB.getPilotOutput(pilotReference)
        if result['OK']:
            stdout = result['Value']['StdOut']
            error = result['Value']['StdErr']
            if stdout or error:
                resultDict = {}
                resultDict['StdOut'] = stdout
                resultDict['StdErr'] = error
                resultDict['OwnerDN'] = owner
                resultDict['OwnerGroup'] = group
                resultDict['FileList'] = []
                return S_OK(resultDict)
            else:
                return S_ERROR('Empty pilot output found')

        gridType = pilotDict['GridType']
        if gridType in ["LCG", "gLite", "CREAM"]:
            group = getGroupOption(group, 'VOMSRole', group)
            ret = gProxyManager.getPilotProxyFromVOMSGroup(owner, group)
            if not ret['OK']:
                gLogger.error(ret['Message'])
                gLogger.error('Could not get proxy:',
                              'User "%s", Group "%s"' % (owner, group))
                return S_ERROR("Failed to get the pilot's owner proxy")
            proxy = ret['Value']

            pilotStamp = pilotDict['PilotStamp']
            result = getPilotOutput(proxy, gridType, pilotReference,
                                    pilotStamp)
            if not result['OK']:
                return S_ERROR('Failed to get pilot output: ' +
                               result['Message'])
            # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files?
            stdout = result['StdOut']
            error = result['StdErr']
            fileList = result['FileList']
            result = pilotDB.storePilotOutput(pilotReference, stdout, error)
            if not result['OK']:
                gLogger.error('Failed to store pilot output:',
                              result['Message'])

            resultDict = {}
            resultDict['StdOut'] = stdout
            resultDict['StdErr'] = error
            resultDict['OwnerDN'] = owner
            resultDict['OwnerGroup'] = group
            resultDict['FileList'] = fileList
            return S_OK(resultDict)
        else:
            return S_ERROR('Can not retrieve pilot output for the Grid %s ' %
                           gridType)