Exemple #1
0
  def test_CreateAndSubmit( self ):

    jobParams = {'JobID': '1',
                 'JobType': 'Merge',
                 'CPUTime': '1000000',
                 'Executable': '$DIRACROOT/scripts/dirac-jobexec',
                 'Arguments': "helloWorld.xml -o LogLevel=DEBUG pilot.cfg",
                 'ExtraOptions': 'pilot.cfg',
                 'InputSandbox': ['helloWorld.xml', 'exe-script.py']}
    resourceParams = {}
    optimizerParams = {}

#     res = createJobWrapper( 1, jobParams, resourceParams, optimizerParams, logLevel = 'DEBUG' )
#     self.assert_( res['OK'] )
#     wrapperFile = res['Value']

    ceFactory = ComputingElementFactory()
    ceInstance = ceFactory.getCE( 'InProcess' )
    self.assert_( ceInstance['OK'] )
    computingElement = ceInstance['Value']

#     res = computingElement.submitJob( wrapperFile, self.payloadProxy )
#     self.assert_( res['OK'] )

    res = createJobWrapper( 2, jobParams, resourceParams, optimizerParams, extraOptions = 'pilot.cfg', logLevel = 'DEBUG' )
    self.assert_( res['OK'] )
    wrapperFile = res['Value']

    res = computingElement.submitJob( wrapperFile, self.payloadProxy )
    self.assert_( res['OK'] )
Exemple #2
0
    def _setUpworkloadCE(self):
        """Get application queue and configure it

        :return: a ComputingElement instance
        """
        # Get CE parameters
        workloadSite = gConfig.getValue("/LocalSite/Site")
        workloadCE = gConfig.getValue("/LocalSite/GridCE")
        workloadQueue = gConfig.getValue("/LocalSite/CEQueue")

        result = getQueue(workloadSite, workloadCE, workloadQueue)
        if not result["OK"]:
            return result
        ceType = result["Value"]["CEType"]
        ceParams = result["Value"]

        # Build CE
        ceFactory = ComputingElementFactory()
        result = ceFactory.getCE(ceName=workloadCE, ceType=ceType, ceParametersDict=ceParams)
        if not result["OK"]:
            return result
        workloadCE = result["Value"]

        # Add a proxy to the CE
        result = getProxyInfo()
        if not result["OK"] and not result["Value"]["chain"]:
            return result
        proxy = result["Value"]["chain"]
        result = proxy.getRemainingSecs()
        if not result["OK"]:
            return result
        lifetime_secs = result["Value"]
        workloadCE.setProxy(proxy, lifetime_secs)

        return S_OK(workloadCE)
  def export_killPilot(self, pilotRefList ):
    """ Kill the specified pilots
    """
    # Make a list if it is not yet
    pilotRefs = list( pilotRefList )
    if type( pilotRefList ) in StringTypes:
      pilotRefs = [pilotRefList]
    
    # Regroup pilots per site and per owner
    pilotRefDict = {}
    for pilotReference in pilotRefs:
      result = pilotDB.getPilotInfo(pilotReference)
      if not result['OK'] or not result[ 'Value' ]:
        return S_ERROR('Failed to get info for pilot ' + pilotReference)
  
      pilotDict = result['Value'][pilotReference]
      owner = pilotDict['OwnerDN']
      group = pilotDict['OwnerGroup']
      queue = '@@@'.join( [owner, group, pilotDict['GridSite'], pilotDict['DestinationSite'], pilotDict['Queue']] )
      gridType = pilotDict['GridType']
      pilotRefDict.setdefault( queue, {} )
      pilotRefDict[queue].setdefault( 'PilotList', [] )
      pilotRefDict[queue]['PilotList'].append( pilotReference )
      pilotRefDict[queue]['GridType'] = gridType
      
    # Do the work now queue by queue  
    ceFactory = ComputingElementFactory()
    failed = []
    for key, pilotDict in pilotRefDict.items():
      
      owner,group,site,ce,queue = key.split( '@@@' )
      result = getQueue( site, ce, queue )
      if not result['OK']:
        return result
      queueDict = result['Value']
      gridType = pilotDict['GridType']
      result = ceFactory.getCE( gridType, ce, queueDict )
      if not result['OK']:
        return result
      ce = result['Value']
  
      if gridType in ["LCG","gLite","CREAM"]:
        group = getGroupOption(group,'VOMSRole',group)
        ret = gProxyManager.getPilotProxyFromVOMSGroup( owner, group )
        if not ret['OK']:
          gLogger.error( ret['Message'] )
          gLogger.error( 'Could not get proxy:', 'User "%s", Group "%s"' % ( owner, group ) )
          return S_ERROR("Failed to get the pilot's owner proxy")
        proxy = ret['Value']
        ce.setProxy( proxy )

      pilotList = pilotDict['PilotList']
      result = ce.killJob( pilotList )
      if not result['OK']:
        failed.extend( pilotList )
      
    if failed:
      return S_ERROR('Failed to kill at least some pilots')
    
    return S_OK()  
Exemple #4
0
    def initialize(self, loops=0):
        """Sets default parameters and creates CE instance
    """
        #Disable monitoring
        self.am_setOption('MonitoringEnabled', False)
        # self.log.setLevel('debug') #temporary for debugging
        self.am_setOption('MaxCycles', loops)

        ceUniqueID = self.am_getOption('CEUniqueID', 'InProcess')
        localCE = gConfig.getValue('/LocalSite/LocalCE', '')
        if localCE:
            self.log.info('Defining CE from local configuration = %s' %
                          localCE)
            ceUniqueID = localCE

        ceFactory = ComputingElementFactory()
        self.ceName = ceUniqueID
        ceInstance = ceFactory.getCE(ceUniqueID)
        if not ceInstance['OK']:
            self.log.warn(ceInstance['Message'])
            return ceInstance

        self.computingElement = ceInstance['Value']
        self.diracRoot = os.path.dirname(
            os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
        #Localsite options
        self.siteRoot = gConfig.getValue('/LocalSite/Root', os.getcwd())
        self.siteName = gConfig.getValue('/LocalSite/Site', 'Unknown')
        self.pilotReference = gConfig.getValue('/LocalSite/PilotReference',
                                               'Unknown')
        self.defaultProxyLength = gConfig.getValue(
            '/Registry/DefaultProxyLifeTime', 86400 * 5)
        #Agent options
        # This is the factor to convert raw CPU to Normalized units (based on the CPU Model)
        self.cpuFactor = gConfig.getValue('/LocalSite/CPUNormalizationFactor',
                                          0.0)
        defaultWrapperLocation = 'DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py'
        self.jobWrapperTemplate = os.path.join(
            self.diracRoot,
            self.am_getOption('JobWrapperTemplate', defaultWrapperLocation))
        self.jobSubmissionDelay = self.am_getOption('SubmissionDelay', 10)
        self.defaultLogLevel = self.am_getOption('DefaultLogLevel', 'info')
        self.fillingMode = self.am_getOption('FillingModeFlag', False)
        self.stopOnApplicationFailure = self.am_getOption(
            'StopOnApplicationFailure', True)
        self.stopAfterFailedMatches = self.am_getOption(
            'StopAfterFailedMatches', 10)
        self.jobCount = 0
        self.matchFailedCount = 0
        #Timeleft
        self.timeLeftUtil = TimeLeft()
        self.timeLeft = gConfig.getValue(
            '/Resources/Computing/CEDefaults/MaxCPUTime', 0.0)
        self.gridCEQueue = gConfig.getValue(
            '/Resources/Computing/CEDefaults/GridCEQueue', '')
        self.timeLeftError = ''
        self.scaledCPUTime = 0.0
        self.pilotInfoReportedFlag = False
        return S_OK()
Exemple #5
0
    def initialize(self, loops=0):
        """Sets default parameters and creates CE instance
    """
        # Disable monitoring
        self.am_setOption('MonitoringEnabled', False)
        # self.log.setLevel('debug') #temporary for debugging
        self.am_setOption('MaxCycles', loops)

        ceType = self.am_getOption('CEType', 'InProcess')
        localCE = gConfig.getValue('/LocalSite/LocalCE', '')
        if localCE:
            self.log.info('Defining CE from local configuration = %s' %
                          localCE)
            ceType = localCE

        # Create backend Computing Element
        ceFactory = ComputingElementFactory()
        self.ceName = ceType
        ceInstance = ceFactory.getCE(ceType)
        if not ceInstance['OK']:
            self.log.warn(ceInstance['Message'])
            return ceInstance
        self.computingElement = ceInstance['Value']

        result = self.computingElement.getDescription()
        if not result['OK']:
            self.log.warn("Can not get the CE description")
            return result
        ceDict = result['Value']
        self.timeLeft = ceDict.get('CPUTime', self.timeLeft)
        self.timeLeft = gConfig.getValue(
            '/Resources/Computing/CEDefaults/MaxCPUTime', self.timeLeft)

        self.initTimes = os.times()
        # Localsite options
        self.siteName = gConfig.getValue('/LocalSite/Site', self.siteName)
        self.pilotReference = gConfig.getValue('/LocalSite/PilotReference',
                                               self.pilotReference)
        self.defaultProxyLength = gConfig.getValue(
            '/Registry/DefaultProxyLifeTime', self.defaultProxyLength)
        # Agent options
        # This is the factor to convert raw CPU to Normalized units (based on the CPU Model)
        self.cpuFactor = gConfig.getValue('/LocalSite/CPUNormalizationFactor',
                                          self.cpuFactor)
        self.jobSubmissionDelay = self.am_getOption('SubmissionDelay',
                                                    self.jobSubmissionDelay)
        self.fillingMode = self.am_getOption('FillingModeFlag',
                                             self.fillingMode)
        self.minimumTimeLeft = self.am_getOption('MinimumTimeLeft',
                                                 self.minimumTimeLeft)
        self.stopOnApplicationFailure = self.am_getOption(
            'StopOnApplicationFailure', self.stopOnApplicationFailure)
        self.stopAfterFailedMatches = self.am_getOption(
            'StopAfterFailedMatches', self.stopAfterFailedMatches)
        self.extraOptions = gConfig.getValue(
            '/AgentJobRequirements/ExtraOptions', self.extraOptions)
        # Timeleft
        self.timeLeftUtil = TimeLeft()
        return S_OK()
Exemple #6
0
  def getQueues( self, resourceDict ):
    """ Get the list of relevant CEs and their descriptions
    """

    self.queueDict = {}
    ceFactory = ComputingElementFactory()

    for site in resourceDict:
      for ce in resourceDict[site]:
        ceDict = resourceDict[site][ce]
        qDict = ceDict.pop( 'Queues' )
        for queue in qDict:
          queueName = '%s_%s' % ( ce, queue )
          self.queueDict[queueName] = {}
          self.queueDict[queueName]['ParametersDict'] = qDict[queue]
          self.queueDict[queueName]['ParametersDict']['Queue'] = queue
          self.queueDict[queueName]['ParametersDict']['Site'] = site
          self.queueDict[queueName]['ParametersDict']['GridEnv'] = self.gridEnv
          self.queueDict[queueName]['ParametersDict']['Setup'] = gConfig.getValue( '/DIRAC/Setup', 'unknown' )
          # Evaluate the CPU limit of the queue according to the Glue convention
          # To Do: should be a utility
          if "maxCPUTime" in self.queueDict[queueName]['ParametersDict'] and \
             "SI00" in self.queueDict[queueName]['ParametersDict']:
            maxCPUTime = float( self.queueDict[queueName]['ParametersDict']['maxCPUTime'] )
            # For some sites there are crazy values in the CS
            maxCPUTime = max( maxCPUTime, 0 )
            maxCPUTime = min( maxCPUTime, 86400 * 12.5 )
            si00 = float( self.queueDict[queueName]['ParametersDict']['SI00'] )
            queueCPUTime = 60. / 250. * maxCPUTime * si00
            self.queueDict[queueName]['ParametersDict']['CPUTime'] = int( queueCPUTime )
          qwDir = os.path.join( self.workingDirectory, queue )
          if not os.path.exists( qwDir ):
            os.makedirs( qwDir )
          self.queueDict[queueName]['ParametersDict']['WorkingDirectory'] = qwDir
          ceQueueDict = dict( ceDict )
          ceQueueDict.update( self.queueDict[queueName]['ParametersDict'] )
          result = ceFactory.getCE( ceName = ce,
                                    ceType = ceDict['CEType'],
                                    ceParametersDict = ceQueueDict )
          if not result['OK']:
            return result
          self.queueDict[queueName]['CE'] = result['Value']
          self.queueDict[queueName]['CEName'] = ce
          self.queueDict[queueName]['CEType'] = ceDict['CEType']
          self.queueDict[queueName]['Site'] = site
          self.queueDict[queueName]['QueueName'] = queue
          result = self.queueDict[queueName]['CE'].isValid()
          if not result['OK']:
            self.log.fatal( result['Message'] )
            return result
          if 'BundleProxy' in self.queueDict[queueName]['ParametersDict']:
            self.queueDict[queueName]['BundleProxy'] = True

    return S_OK()
Exemple #7
0
  def __createCE( self, ceName ):
    """
    return a CE object for the given ceName
    """
    self.log.info( "Creating %s CE" % ( ceName ) )

    ceFactory = ComputingElementFactory()
    ret = ceFactory.getCE( ceName )
    if not ret['OK']:
      self.log.warn( ret['Message'] )
      return ret
    return ret
Exemple #8
0
  def initialize( self, loops = 0 ):
    """Sets default parameters and creates CE instance
    """
    # Disable monitoring
    self.am_setOption( 'MonitoringEnabled', False )
    # self.log.setLevel('debug') #temporary for debugging
    self.am_setOption( 'MaxCycles', loops )

    ceType = self.am_getOption( 'CEType', 'InProcess' )
    localCE = gConfig.getValue( '/LocalSite/LocalCE', '' )
    if localCE:
      self.log.info( 'Defining CE from local configuration = %s' % localCE )
      ceType = localCE

    # Create backend Computing Element
    ceFactory = ComputingElementFactory()
    self.ceName = ceType
    ceInstance = ceFactory.getCE( ceType )
    if not ceInstance['OK']:
      self.log.warn( ceInstance['Message'] )
      return ceInstance
    self.computingElement = ceInstance['Value']

    result = self.computingElement.getDescription()
    if not result['OK']:
      self.log.warn( "Can not get the CE description" )
      return result
    ceDict = result['Value']
    self.timeLeft = ceDict.get( 'CPUTime', 0.0 )
    self.timeLeft = gConfig.getValue( '/Resources/Computing/CEDefaults/MaxCPUTime', self.timeLeft )

    self.initTimes = os.times()
    # Localsite options
    self.siteName = gConfig.getValue( '/LocalSite/Site', 'Unknown' )
    self.pilotReference = gConfig.getValue( '/LocalSite/PilotReference', 'Unknown' )
    self.defaultProxyLength = gConfig.getValue( '/Registry/DefaultProxyLifeTime', 86400 * 5 )
    # Agent options
    # This is the factor to convert raw CPU to Normalized units (based on the CPU Model)
    self.cpuFactor = gConfig.getValue( '/LocalSite/CPUNormalizationFactor', 0.0 )
    self.jobSubmissionDelay = self.am_getOption( 'SubmissionDelay', 10 )
    self.fillingMode = self.am_getOption( 'FillingModeFlag', False )
    self.minimumTimeLeft = self.am_getOption( 'MinimumTimeLeft', 1000 )
    self.stopOnApplicationFailure = self.am_getOption( 'StopOnApplicationFailure', True )
    self.stopAfterFailedMatches = self.am_getOption( 'StopAfterFailedMatches', 10 )
    self.jobCount = 0
    self.matchFailedCount = 0
    self.extraOptions = gConfig.getValue( '/AgentJobRequirements/ExtraOptions', '' )
    # Timeleft
    self.timeLeftUtil = TimeLeft()
    self.timeLeftError = ''
    self.scaledCPUTime = 0.0
    self.pilotInfoReportedFlag = False
    return S_OK()
Exemple #9
0
    def initialize(self, loops=0):
        """Sets default parameters and creates CE instance
    """
        # Disable monitoring
        self.am_setOption('MonitoringEnabled', False)
        # self.log.setLevel('debug') #temporary for debugging
        self.am_setOption('MaxCycles', loops)

        ceType = self.am_getOption('CEType', 'InProcess')
        localCE = gConfig.getValue('/LocalSite/LocalCE', '')
        if localCE:
            self.log.info('Defining CE from local configuration = %s' %
                          localCE)
            ceType = localCE

        ceFactory = ComputingElementFactory()
        self.ceName = ceType
        ceInstance = ceFactory.getCE(ceType)
        if not ceInstance['OK']:
            self.log.warn(ceInstance['Message'])
            return ceInstance

        self.initTimes = os.times()

        self.computingElement = ceInstance['Value']
        #Localsite options
        self.siteName = gConfig.getValue('/LocalSite/Site', 'Unknown')
        self.pilotReference = gConfig.getValue('/LocalSite/PilotReference',
                                               'Unknown')
        self.defaultProxyLength = gConfig.getValue(
            '/Registry/DefaultProxyLifeTime', 86400 * 5)
        #Agent options
        # This is the factor to convert raw CPU to Normalized units (based on the CPU Model)
        self.cpuFactor = gConfig.getValue('/LocalSite/CPUNormalizationFactor',
                                          0.0)
        self.jobSubmissionDelay = self.am_getOption('SubmissionDelay', 10)
        self.fillingMode = self.am_getOption('FillingModeFlag', False)
        self.stopOnApplicationFailure = self.am_getOption(
            'StopOnApplicationFailure', True)
        self.stopAfterFailedMatches = self.am_getOption(
            'StopAfterFailedMatches', 10)
        self.jobCount = 0
        self.matchFailedCount = 0
        self.extraOptions = gConfig.getValue(
            '/AgentJobRequirements/ExtraOptions', '')
        #Timeleft
        self.timeLeftUtil = TimeLeft()
        self.timeLeft = gConfig.getValue(
            '/Resources/Computing/CEDefaults/MaxCPUTime', 0.0)
        self.timeLeftError = ''
        self.scaledCPUTime = 0.0
        self.pilotInfoReportedFlag = False
        return S_OK()
Exemple #10
0
  def __createCE( self, ceName ):
    """
    return a CE object for the given ceName
    """
    self.log.info( "Creating %s CE" % ( ceName ) )

    ceFactory = ComputingElementFactory()
    ret = ceFactory.getCE( ceName )
    if not ret['OK']:
      self.log.warn( ret['Message'] )
      return ret
    return ret
Exemple #11
0
  def initialize( self, loops = 0 ):
    """Sets default parameters and creates CE instance
    """
    # Disable monitoring
    self.am_setOption( 'MonitoringEnabled', False )
    # self.log.setLevel('debug') #temporary for debugging
    self.am_setOption( 'MaxCycles', loops )

    ceType = self.am_getOption( 'CEType', 'InProcess' )
    localCE = gConfig.getValue( '/LocalSite/LocalCE', '' )
    if localCE:
      self.log.info( 'Defining CE from local configuration = %s' % localCE )
      ceType = localCE

    ceFactory = ComputingElementFactory()
    self.ceName = ceType
    ceInstance = ceFactory.getCE( ceType )
    if not ceInstance['OK']:
      self.log.warn( ceInstance['Message'] )
      return ceInstance

    self.initTimes = os.times()

    self.computingElement = ceInstance['Value']
    self.diracRoot = os.path.dirname( os.path.dirname( os.path.dirname( os.path.dirname( __file__ ) ) ) )
    #Localsite options
    self.siteRoot = gConfig.getValue( '/LocalSite/Root', os.getcwd() )
    self.siteName = gConfig.getValue( '/LocalSite/Site', 'Unknown' )
    self.pilotReference = gConfig.getValue( '/LocalSite/PilotReference', 'Unknown' )
    self.defaultProxyLength = gConfig.getValue( '/Registry/DefaultProxyLifeTime', 86400 * 5 )
    #Agent options
    # This is the factor to convert raw CPU to Normalized units (based on the CPU Model)
    self.cpuFactor = gConfig.getValue( '/LocalSite/CPUNormalizationFactor', 0.0 )
    defaultWrapperLocation = 'DIRAC/WorkloadManagementSystem/JobWrapper/JobWrapperTemplate.py'
    self.jobWrapperTemplate = os.path.join( self.diracRoot,
                                            self.am_getOption( 'JobWrapperTemplate',
                                                                defaultWrapperLocation ) )
    self.jobSubmissionDelay = self.am_getOption( 'SubmissionDelay', 10 )
    self.defaultLogLevel = self.am_getOption( 'DefaultLogLevel', 'info' )
    self.fillingMode = self.am_getOption( 'FillingModeFlag', False )
    self.stopOnApplicationFailure = self.am_getOption( 'StopOnApplicationFailure', True )
    self.stopAfterFailedMatches = self.am_getOption( 'StopAfterFailedMatches', 10 )
    self.jobCount = 0
    self.matchFailedCount = 0
    #Timeleft
    self.timeLeftUtil = TimeLeft()
    self.timeLeft = gConfig.getValue( '/Resources/Computing/CEDefaults/MaxCPUTime', 0.0 )
    self.gridCEQueue = gConfig.getValue( '/Resources/Computing/CEDefaults/GridCEQueue', '' )
    self.timeLeftError = ''
    self.scaledCPUTime = 0.0
    self.pilotInfoReportedFlag = False
    return S_OK()
Exemple #12
0
 def addComputingElement(self, ceList):
   """
     Check if a CE object for the current CE is available,
     instantiate one if necessary
   """
   for CE in ceList:
     if CE not in self.computingElementDict:
       ceFactory = ComputingElementFactory( )
       ceInstance = ceFactory.getCE( ceName = CE )
       if not ceInstance['OK']:
         self.log.error('Can not create CE object:', ceInstance['Message'])
         return
       self.computingElementDict[CE] = ceInstance['Value'].ceConfigDict
       # add the 'CE' instance at the end to avoid being overwritten
       self.computingElementDict[CE]['CE'] = ceInstance['Value']
Exemple #13
0
 def addComputingElement(self, ceList):
   """
     Check if a CE object for the current CE is available,
     instantiate one if necessary
   """
   for CE in ceList:
     if CE not in self.computingElementDict:
       ceFactory = ComputingElementFactory( )
       ceInstance = ceFactory.getCE( ceName = CE )
       if not ceInstance['OK']:
         self.log.error('Can not create CE object:', ceInstance['Message'])
         return
       self.computingElementDict[CE] = ceInstance['Value'].ceConfigDict
       # add the 'CE' instance at the end to avoid being overwritten
       self.computingElementDict[CE]['CE'] = ceInstance['Value']
Exemple #14
0
    def _initializeComputingElement(self, localCE):
        """Generate a ComputingElement and configure it"""
        ceFactory = ComputingElementFactory()
        self.ceName = localCE.split("/")[
            0]  # It might be "Pool/Singularity", or simply "Pool"
        self.innerCESubmissionType = (localCE.split("/")[1] if len(
            localCE.split("/")) == 2 else self.innerCESubmissionType)
        ceInstance = ceFactory.getCE(self.ceName)
        if not ceInstance["OK"]:
            self.log.warn("Can't instantiate a CE", ceInstance["Message"])
            return ceInstance
        self.computingElement = ceInstance["Value"]
        self.computingElement.setParameters(
            {"InnerCESubmissionType": self.innerCESubmissionType})

        return S_OK()
Exemple #15
0
def getCREAMPilotOutput(proxy,pilotRef,pilotStamp):
  """
  """
  gridEnv = getGridEnv()
  tmpdir = mkdtemp()
  result = ComputingElementFactory().getCE(ceName = 'CREAMSite',ceType = 'CREAM',
                                       ceParametersDict = {'GridEnv':gridEnv,
                                                           'Queue':'Qeuue',
                                                           'OutputURL':"gsiftp://localhost",
                                                           'WorkingDirectory':tmpdir})
                                   
  if not result['OK']:
    shutil.rmtree(tmpdir)  
    return result
  ce = result['Value']
  ce.reset()
  ce.setProxy(proxy)
  fullPilotRef = ":::".join([pilotRef,pilotStamp])
  result = ce.getJobOutput( fullPilotRef )
  shutil.rmtree(tmpdir)
  if not result['OK']:
    return S_ERROR( 'Failed to get pilot output: %s' % result['Message'] )
  output, error = result['Value']  
  fileList = outputSandboxFiles
  result = S_OK()
  result['FileList'] = fileList
  result['StdOut'] = output
  result['StdErr'] = error
  return result
Exemple #16
0
def getPilotCE(pilotDict):
    """Instantiate and return a CE bound to a pilot"""
    ceFactory = ComputingElementFactory()
    result = getQueue(pilotDict["GridSite"], pilotDict["DestinationSite"], pilotDict["Queue"])
    if not result["OK"]:
        return result
    queueDict = result["Value"]
    gridEnv = getGridEnv()
    queueDict["GridEnv"] = gridEnv
    queueDict["WorkingDirectory"] = mkdtemp()
    result = ceFactory.getCE(pilotDict["GridType"], pilotDict["DestinationSite"], queueDict)
    if not result["OK"]:
        shutil.rmtree(queueDict["WorkingDirectory"])
        return result
    ce = result["Value"]
    return S_OK(ce)
Exemple #17
0
    def test_CreateAndSubmit(self):

        jobParams = {
            "JobID": "1",
            "JobType": "Merge",
            "CPUTime": "1000000",
            "Executable": "dirac-jobexec",
            "Arguments": "helloWorld.xml -o LogLevel=DEBUG --cfg pilot.cfg",
            "InputSandbox": ["helloWorld.xml", "exe-script.py"],
        }
        resourceParams = {}
        optimizerParams = {}

        #     res = createJobWrapper( 1, jobParams, resourceParams, optimizerParams, logLevel = 'DEBUG' )
        #     self.assertTrue( res['OK'] )
        #     wrapperFile = res['Value']

        ceFactory = ComputingElementFactory()
        ceInstance = ceFactory.getCE("InProcess")
        self.assertTrue(ceInstance["OK"])
        computingElement = ceInstance["Value"]

        #     res = computingElement.submitJob( wrapperFile, self.payloadProxy )
        #     self.assertTrue( res['OK'] )

        if "pilot.cfg" in os.listdir("."):
            jobParams.setdefault("ExtraOptions", "pilot.cfg")
            res = createJobWrapper(2,
                                   jobParams,
                                   resourceParams,
                                   optimizerParams,
                                   extraOptions="pilot.cfg",
                                   logLevel="DEBUG")
        else:
            res = createJobWrapper(2,
                                   jobParams,
                                   resourceParams,
                                   optimizerParams,
                                   logLevel="DEBUG")
        self.assertTrue(res["OK"], res.get("Message"))
        wrapperFile = res["Value"][0]

        res = computingElement.submitJob(wrapperFile, self.payloadProxy)
        self.assertTrue(res["OK"], res.get("Message"))
Exemple #18
0
def killPilotsInQueues(pilotRefDict):
    """kill pilots queue by queue

    :params dict pilotRefDict: a dict of pilots in queues
    """

    ceFactory = ComputingElementFactory()
    failed = []
    for key, pilotDict in pilotRefDict.items():

        owner, group, site, ce, queue = key.split("@@@")
        result = getQueue(site, ce, queue)
        if not result["OK"]:
            return result
        queueDict = result["Value"]
        gridType = pilotDict["GridType"]
        result = ceFactory.getCE(gridType, ce, queueDict)
        if not result["OK"]:
            return result
        ce = result["Value"]

        # FIXME: quite hacky. Should be either removed, or based on some flag
        if gridType in ["CREAM", "ARC", "Globus", "HTCondorCE"]:
            group = getGroupOption(group, "VOMSRole", group)
            ret = gProxyManager.getPilotProxyFromVOMSGroup(owner, group)
            if not ret["OK"]:
                gLogger.error(
                    "Could not get proxy:", 'User "%s" Group "%s" : %s' %
                    (owner, group, ret["Message"]))
                return S_ERROR("Failed to get the pilot's owner proxy")
            proxy = ret["Value"]
            ce.setProxy(proxy)

        pilotList = pilotDict["PilotList"]
        result = ce.killJob(pilotList)
        if not result["OK"]:
            failed.extend(pilotList)

    return failed
Exemple #19
0
def test__getCEDict(mocker, ceType, expectedType, expectedNumberElement):
    """Test JobAgent()._getCEDict()"""
    mocker.patch(
        "DIRAC.WorkloadManagementSystem.Agent.JobAgent.AgentModule.__init__")

    jobAgent = JobAgent("Test", "Test1")
    jobAgent.log = gLogger
    jobAgent.log.setLevel("DEBUG")

    result = ComputingElementFactory().getCE(ceType)
    assert result["OK"]

    ce = result["Value"]
    ce.ceParameters["MultiProcessorStrategy"] = True
    ce.ceParameters["NumberOfProcessors"] = 4
    result = jobAgent._getCEDict(ce)
    assert result["OK"]
    ceDict = result["Value"]
    assert isinstance(ceDict, expectedType)
    assert len(ceDict) == expectedNumberElement
Exemple #20
0
def test__checkCEAvailability(mocker, ceType, mockCEReply, expectedResult):
    """Test JobAgent()._checkAvailability()"""
    mocker.patch(
        "DIRAC.WorkloadManagementSystem.Agent.JobAgent.AgentModule.__init__")
    mocker.patch(
        "DIRAC.Resources.Computing.ComputingElement.ComputingElement.available",
        return_value=mockCEReply)

    jobAgent = JobAgent("Test", "Test1")
    jobAgent.log = gLogger
    jobAgent.log.setLevel("DEBUG")

    result = ComputingElementFactory().getCE(ceType)
    assert result["OK"]

    ce = result["Value"]
    result = jobAgent._checkCEAvailability(ce)
    assert result["OK"] == expectedResult["OK"]
    if "Value" in expectedResult:
        assert result["Value"] == expectedResult["Value"]
    if "Message" in expectedResult:
        assert result["Message"] == expectedResult["Message"]
Exemple #21
0
    def getQueues(self, resourceDict):
        """ Get the list of relevant CEs and their descriptions
    """

        self.queueDict = {}
        ceFactory = ComputingElementFactory()

        for site in resourceDict:
            for ce in resourceDict[site]:
                ceDict = resourceDict[site][ce]
                qDict = ceDict.pop("Queues")
                for queue in qDict:
                    queueName = "%s_%s" % (ce, queue)
                    self.queueDict[queueName] = {}
                    self.queueDict[queueName]["ParametersDict"] = qDict[queue]
                    self.queueDict[queueName]["ParametersDict"]["Queue"] = queue
                    self.queueDict[queueName]["ParametersDict"]["Site"] = site
                    self.queueDict[queueName]["ParametersDict"]["GridEnv"] = self.gridEnv
                    self.queueDict[queueName]["ParametersDict"]["Setup"] = gConfig.getValue("/DIRAC/Setup", "unknown")
                    # Evaluate the CPU limit of the queue according to the Glue convention
                    # To Do: should be a utility
                    if (
                        "maxCPUTime" in self.queueDict[queueName]["ParametersDict"]
                        and "SI00" in self.queueDict[queueName]["ParametersDict"]
                    ):
                        maxCPUTime = float(self.queueDict[queueName]["ParametersDict"]["maxCPUTime"])
                        # For some sites there are crazy values in the CS
                        maxCPUTime = max(maxCPUTime, 0)
                        maxCPUTime = min(maxCPUTime, 86400 * 12.5)
                        si00 = float(self.queueDict[queueName]["ParametersDict"]["SI00"])
                        queueCPUTime = 60.0 / 250.0 * maxCPUTime * si00
                        self.queueDict[queueName]["ParametersDict"]["CPUTime"] = int(queueCPUTime)
                    qwDir = os.path.join(self.workingDirectory, queue)
                    if not os.path.exists(qwDir):
                        os.makedirs(qwDir)
                    self.queueDict[queueName]["ParametersDict"]["WorkingDirectory"] = qwDir

                    platform = ""
                    if "Platform" in self.queueDict[queueName]["ParametersDict"]:
                        platform = self.queueDict[queueName]["ParametersDict"]["Platform"]
                    elif "Platform" in ceDict:
                        platform = ceDict["Platform"]
                    elif "OS" in ceDict:
                        architecture = ceDict.get("architecture", "x86_64")
                        OS = ceDict["OS"]
                        platform = "_".join([architecture, OS])
                    if platform and not platform in self.platforms:
                        self.platforms.append(platform)

                    if not "Platform" in self.queueDict[queueName]["ParametersDict"] and platform:
                        result = Resources.getDIRACPlatform(platform)
                        if result["OK"]:
                            self.queueDict[queueName]["ParametersDict"]["Platform"] = result["Value"]

                    ceQueueDict = dict(ceDict)
                    ceQueueDict.update(self.queueDict[queueName]["ParametersDict"])
                    result = ceFactory.getCE(ceName=ce, ceType=ceDict["CEType"], ceParametersDict=ceQueueDict)
                    if not result["OK"]:
                        return result
                    self.queueDict[queueName]["CE"] = result["Value"]
                    self.queueDict[queueName]["CEName"] = ce
                    self.queueDict[queueName]["CEType"] = ceDict["CEType"]
                    self.queueDict[queueName]["Site"] = site
                    self.queueDict[queueName]["QueueName"] = queue
                    result = self.queueDict[queueName]["CE"].isValid()
                    if not result["OK"]:
                        self.log.fatal(result["Message"])
                        return result
                    if "BundleProxy" in self.queueDict[queueName]["ParametersDict"]:
                        self.queueDict[queueName]["BundleProxy"] = True
                    elif "BundleProxy" in ceDict:
                        self.queueDict[queueName]["BundleProxy"] = True

                    if site not in self.sites:
                        self.sites.append(site)

        return S_OK()
  def getQueues( self, resourceDict ):
    """ Get the list of relevant CEs and their descriptions
    """

    self.queueDict = {}
    ceFactory = ComputingElementFactory()

    for site in resourceDict:
      for ce in resourceDict[site]:
        ceDict = resourceDict[site][ce]
        ceTags = ceDict.get( 'Tag' )
        if isinstance( ceTags, basestring ):
          ceTags = fromChar( ceTags )
        qDict = ceDict.pop( 'Queues' )
        for queue in qDict:
          queueName = '%s_%s' % ( ce, queue )
          self.queueDict[queueName] = {}
          self.queueDict[queueName]['ParametersDict'] = qDict[queue]
          self.queueDict[queueName]['ParametersDict']['Queue'] = queue
          self.queueDict[queueName]['ParametersDict']['Site'] = site
          self.queueDict[queueName]['ParametersDict']['GridEnv'] = self.gridEnv
          self.queueDict[queueName]['ParametersDict']['Setup'] = gConfig.getValue( '/DIRAC/Setup', 'unknown' )
          # Evaluate the CPU limit of the queue according to the Glue convention
          # To Do: should be a utility
          if "maxCPUTime" in self.queueDict[queueName]['ParametersDict'] and \
             "SI00" in self.queueDict[queueName]['ParametersDict']:
            maxCPUTime = float( self.queueDict[queueName]['ParametersDict']['maxCPUTime'] )
            # For some sites there are crazy values in the CS
            maxCPUTime = max( maxCPUTime, 0 )
            maxCPUTime = min( maxCPUTime, 86400 * 12.5 )
            si00 = float( self.queueDict[queueName]['ParametersDict']['SI00'] )
            queueCPUTime = 60. / 250. * maxCPUTime * si00
            self.queueDict[queueName]['ParametersDict']['CPUTime'] = int( queueCPUTime )
          queueTags = self.queueDict[queueName]['ParametersDict'].get( 'Tag' )
          if queueTags and isinstance( queueTags, basestring ):
            queueTags = fromChar( queueTags )
            self.queueDict[queueName]['ParametersDict']['Tag'] = queueTags
          if ceTags:
            if queueTags:
              allTags = list( set( ceTags + queueTags ) )
              self.queueDict[queueName]['ParametersDict']['Tag'] = allTags
            else:
              self.queueDict[queueName]['ParametersDict']['Tag'] = ceTags

          maxMemory = self.queueDict[queueName]['ParametersDict'].get( 'MaxRAM', None )
          if maxMemory:
            # MaxRAM value is supposed to be in MB
            maxMemoryList = range( 1, int( maxMemory )/1000 + 1 )
            memoryTags = [ '%dGB' % mem for mem in maxMemoryList ]
            if memoryTags:
              self.queueDict[queueName]['ParametersDict'].setdefault( 'Tag', [] )
              self.queueDict[queueName]['ParametersDict']['Tag'] += memoryTags
          qwDir = os.path.join( self.workingDirectory, queue )
          if not os.path.exists( qwDir ):
            os.makedirs( qwDir )
          self.queueDict[queueName]['ParametersDict']['WorkingDirectory'] = qwDir

          platform = ''
          if "Platform" in self.queueDict[queueName]['ParametersDict']:
            platform = self.queueDict[queueName]['ParametersDict']['Platform']
          elif "Platform" in ceDict:
            platform = ceDict['Platform']
          elif "OS" in ceDict:
            architecture = ceDict.get( 'architecture', 'x86_64' )
            OS = ceDict['OS']
            platform = '_'.join( [architecture, OS] )
          if platform and not platform in self.platforms:
            self.platforms.append( platform )

          if not "Platform" in self.queueDict[queueName]['ParametersDict'] and platform:
            result = Resources.getDIRACPlatform( platform )
            if result['OK']:
              self.queueDict[queueName]['ParametersDict']['Platform'] = result['Value'][0]

          ceQueueDict = dict( ceDict )
          ceQueueDict.update( self.queueDict[queueName]['ParametersDict'] )

          # Generate the CE object for the queue or pick the already existing one
          # if the queue definition did not change
          queueHash = self.__generateQueueHash( ceQueueDict )
          if queueName in self.queueCECache and self.queueCECache[queueName]['Hash'] == queueHash:
            queueCE = self.queueCECache[queueName]['CE']
          else:
            result = ceFactory.getCE( ceName = ce,
                                      ceType = ceDict['CEType'],
                                      ceParametersDict = ceQueueDict )
            if not result['OK']:
              return result
            self.queueCECache.setdefault( queueName, {} )
            self.queueCECache[queueName]['Hash'] = queueHash
            self.queueCECache[queueName]['CE'] = result['Value']
            queueCE = self.queueCECache[queueName]['CE']

          self.queueDict[queueName]['CE'] = queueCE
          self.queueDict[queueName]['CEName'] = ce
          self.queueDict[queueName]['CEType'] = ceDict['CEType']
          self.queueDict[queueName]['Site'] = site
          self.queueDict[queueName]['QueueName'] = queue
          self.queueDict[queueName]['Platform'] = platform
          result = self.queueDict[queueName]['CE'].isValid()
          if not result['OK']:
            self.log.fatal( result['Message'] )
            return result
          if 'BundleProxy' in self.queueDict[queueName]['ParametersDict']:
            if self.queueDict[queueName]['ParametersDict']['BundleProxy'].lower() in ['true','yes','1']:
              self.queueDict[queueName]['BundleProxy'] = True
          elif 'BundleProxy' in ceDict:
            if ceDict['BundleProxy'].lower() in ['true','yes','1']:
              self.queueDict[queueName]['BundleProxy'] = True

          if site not in self.sites:
            self.sites.append( site )

    return S_OK()
Exemple #23
0
  def getQueues( self ):
    """ Get the list of relevant CEs and their descriptions
    """

    ceFactory = ComputingElementFactory()
    ceTypes = self.am_getOption( 'CETypes', [] )
    ceConfList = self.am_getOption( 'CEs', [] )

    for siteName in self.siteNames:
      # Look up CE definitions in the site CS description
      ceList = []
      gridType = siteName.split( '.' )[0]
      result = gConfig.getSections( '/Resources/Sites/%s/%s/CEs' % ( gridType, siteName ) )
      if not result['OK']:
        return S_ERROR( 'Failed to look up the CS for the site %s CEs' % siteName )
      if not result['Value']:
        return S_ERROR( 'No CEs found for site %s' % siteName )
      ceTotalList = result['Value']
      for ce in ceTotalList:
        if ( ceConfList and ce in ceConfList ) or not ceConfList:
          ceType = gConfig.getValue( '/Resources/Sites/%s/%s/CEs/%s/CEType' % ( gridType, siteName, ce ), 'Unknown' )
          result = gConfig.getOptionsDict( '/Resources/Sites/%s/%s/CEs/%s' % ( gridType, siteName, ce ) )
          if not result['OK']:
            return S_ERROR( 'Failed to look up the CS for ce %s' % ce )
          ceDict = result['Value']
          if "SubmissionMode" in ceDict and ceDict['SubmissionMode'].lower() == "direct":
            if ceType in ceTypes:
              ceList.append( ( ce, ceType, ceDict ) )

      for ce, ceType, ceDict in ceList:
        section = '/Resources/Sites/%s/%s/CEs/%s/Queues' % ( gridType, siteName, ce )
        result = gConfig.getSections( section )
        if not result['OK']:
          return S_ERROR( 'Failed to look up the CS for queues' )
        if not result['Value']:
          return S_ERROR( 'No Queues found for site %s, ce %s' % ( siteName, ce ) )

        queues = result['Value']
        for queue in queues:
          result = gConfig.getOptionsDict( '%s/%s' % ( section, queue ) )
          if not result['OK']:
            return S_ERROR( 'Failed to look up the CS for ce,queue %s,%s' % ( ce, queue ) )

          queueName = '%s_%s' % ( ce, queue )
          self.queueDict[queueName] = {}
          self.queueDict[queueName]['ParametersDict'] = result['Value']
          self.queueDict[queueName]['ParametersDict']['Queue'] = queue
          self.queueDict[queueName]['ParametersDict']['Site'] = siteName
          self.queueDict[queueName]['ParametersDict']['GridEnv'] = self.gridEnv
          self.queueDict[queueName]['ParametersDict']['Setup'] = gConfig.getValue( '/DIRAC/Setup', 'unknown' )
          # Evaluate the CPU limit of the queue according to the Glue convention
          # To Do: should be a utility
          if "maxCPUTime" in self.queueDict[queueName]['ParametersDict'] and \
             "SI00" in self.queueDict[queueName]['ParametersDict']:
            maxCPUTime = float( self.queueDict[queueName]['ParametersDict']['maxCPUTime'] )
            # For some sites there are crazy values in the CS
            maxCPUTime = max( maxCPUTime, 0 )
            maxCPUTime = min( maxCPUTime, 86400 * 12.5 )
            si00 = float( self.queueDict[queueName]['ParametersDict']['SI00'] )
            queueCPUTime = 60. / 250. * maxCPUTime * si00
            self.queueDict[queueName]['ParametersDict']['CPUTime'] = int( queueCPUTime )
          qwDir = os.path.join( self.workingDirectory, queue )
          if not os.path.exists( qwDir ):
            os.mkdir( qwDir )
          self.queueDict[queueName]['ParametersDict']['WorkingDirectory'] = qwDir
          queueDict = dict( ceDict )
          queueDict.update( self.queueDict[queueName]['ParametersDict'] )
          result = ceFactory.getCE( ceName = ce,
                                   ceType = ceType,
                                   ceParametersDict = queueDict )
          if not result['OK']:
            return result
          self.queueDict[queueName]['CE'] = result['Value']
          self.queueDict[queueName]['CEName'] = ce
          self.queueDict[queueName]['CEType'] = ceType
          self.queueDict[queueName]['Site'] = siteName
          self.queueDict[queueName]['QueueName'] = queue
          result = self.queueDict[queueName]['CE'].isValid()
          if not result['OK']:
            self.log.fatal( result['Message'] )
            return result
          if 'BundleProxy' in self.queueDict[queueName]['ParametersDict']:
            self.queueDict[queueName]['BundleProxy'] = True

    return S_OK()
Exemple #24
0
    def initialize(self, loops=0):
        """Sets default parameters and creates CE instance
    """
        # Disable monitoring, logLevel INFO, limited cycles
        self.am_setOption('MonitoringEnabled', False)
        self.am_setOption('MaxCycles', loops)

        ceType = self.am_getOption('CEType', self.ceName)
        localCE = gConfig.getValue('/LocalSite/LocalCE', '')
        if localCE:
            self.log.info('Defining CE from local configuration',
                          '= %s' % localCE)
            ceType = localCE

        # Create backend Computing Element
        ceFactory = ComputingElementFactory()
        self.ceName = ceType.split('/')[
            0]  # It might be "Pool/Singularity", or simply "Pool"
        self.innerCESubmissionType = ceType.split('/')[1] if len(
            ceType.split('/')) == 2 else self.innerCESubmissionType
        ceInstance = ceFactory.getCE(self.ceName)
        if not ceInstance['OK']:
            self.log.warn("Can't instantiate a CE", ceInstance['Message'])
            return ceInstance
        self.computingElement = ceInstance['Value']
        self.computingElement.ceParameters[
            'InnerCESubmissionType'] = self.innerCESubmissionType

        result = self.computingElement.getDescription()
        if not result['OK']:
            self.log.warn("Can not get the CE description")
            return result
        if isinstance(result['Value'], list):
            ceDict = result['Value'][0]
        else:
            ceDict = result['Value']
        self.timeLeft = ceDict.get('CPUTime', self.timeLeft)
        self.timeLeft = gConfig.getValue(
            '/Resources/Computing/CEDefaults/MaxCPUTime', self.timeLeft)

        self.initTimes = os.times()
        # Localsite options
        self.siteName = gConfig.getValue('/LocalSite/Site', self.siteName)
        self.pilotReference = gConfig.getValue('/LocalSite/PilotReference',
                                               self.pilotReference)
        self.defaultProxyLength = gConfig.getValue(
            '/Registry/DefaultProxyLifeTime', self.defaultProxyLength)
        # Agent options
        # This is the factor to convert raw CPU to Normalized units (based on the CPU Model)
        self.cpuFactor = gConfig.getValue('/LocalSite/CPUNormalizationFactor',
                                          self.cpuFactor)
        self.jobSubmissionDelay = self.am_getOption('SubmissionDelay',
                                                    self.jobSubmissionDelay)
        self.fillingMode = self.am_getOption('FillingModeFlag',
                                             self.fillingMode)
        self.minimumTimeLeft = self.am_getOption('MinimumTimeLeft',
                                                 self.minimumTimeLeft)
        self.stopOnApplicationFailure = self.am_getOption(
            'StopOnApplicationFailure', self.stopOnApplicationFailure)
        self.stopAfterFailedMatches = self.am_getOption(
            'StopAfterFailedMatches', self.stopAfterFailedMatches)
        self.extraOptions = gConfig.getValue(
            '/AgentJobRequirements/ExtraOptions', self.extraOptions)
        # Timeleft
        self.timeLeftUtil = TimeLeft()
        return S_OK()
Exemple #25
0
    def __getGridJobOutput(pilotReference):
        """ Get the pilot job standard output and standard error files for the Grid
        job reference
    """

        result = pilotDB.getPilotInfo(pilotReference)
        if not result['OK'] or not result['Value']:
            return S_ERROR('Failed to get info for pilot ' + pilotReference)

        pilotDict = result['Value'][pilotReference]
        owner = pilotDict['OwnerDN']
        group = pilotDict['OwnerGroup']

        # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files?
        result = pilotDB.getPilotOutput(pilotReference)
        if result['OK']:
            stdout = result['Value']['StdOut']
            error = result['Value']['StdErr']
            if stdout or error:
                resultDict = {}
                resultDict['StdOut'] = stdout
                resultDict['StdErr'] = error
                resultDict['OwnerDN'] = owner
                resultDict['OwnerGroup'] = group
                resultDict['FileList'] = []
                return S_OK(resultDict)
            else:
                gLogger.warn('Empty pilot output found for %s' %
                             pilotReference)

        gridType = pilotDict['GridType']
        if gridType == "gLite":
            result = getWMSPilotOutput(pilotReference,
                                       proxyUserDN=owner,
                                       proxyUserGroup=group)  #pylint: disable=unexpected-keyword-arg
            if not result['OK']:
                return S_ERROR('Failed to get pilot output: ' +
                               result['Message'])
            # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files?
            stdout = result['StdOut']
            error = result['StdErr']
            fileList = result['FileList']
            if stdout:
                result = pilotDB.storePilotOutput(pilotReference, stdout,
                                                  error)
                if not result['OK']:
                    gLogger.error('Failed to store pilot output:',
                                  result['Message'])

            resultDict = {}
            resultDict['StdOut'] = stdout
            resultDict['StdErr'] = error
            resultDict['OwnerDN'] = owner
            resultDict['OwnerGroup'] = group
            resultDict['FileList'] = fileList
            return S_OK(resultDict)
        else:
            # Instantiate the appropriate CE
            ceFactory = ComputingElementFactory()
            result = getQueue(pilotDict['GridSite'],
                              pilotDict['DestinationSite'], pilotDict['Queue'])
            if not result['OK']:
                return result
            queueDict = result['Value']
            gridEnv = getGridEnv()
            queueDict['GridEnv'] = gridEnv
            queueDict['WorkingDirectory'] = mkdtemp()
            result = ceFactory.getCE(gridType, pilotDict['DestinationSite'],
                                     queueDict)
            if not result['OK']:
                shutil.rmtree(queueDict['WorkingDirectory'])
                return result
            ce = result['Value']
            groupVOMS = getGroupOption(group, 'VOMSRole', group)
            result = gProxyManager.getPilotProxyFromVOMSGroup(owner, groupVOMS)
            if not result['OK']:
                gLogger.error(result['Message'])
                gLogger.error('Could not get proxy:',
                              'User "%s", Group "%s"' % (owner, groupVOMS))
                return S_ERROR("Failed to get the pilot's owner proxy")
            proxy = result['Value']
            ce.setProxy(proxy)
            pilotStamp = pilotDict['PilotStamp']
            pRef = pilotReference
            if pilotStamp:
                pRef = pRef + ':::' + pilotStamp
            result = ce.getJobOutput(pRef)
            if not result['OK']:
                shutil.rmtree(queueDict['WorkingDirectory'])
                return result
            stdout, error = result['Value']
            if stdout:
                result = pilotDB.storePilotOutput(pilotReference, stdout,
                                                  error)
                if not result['OK']:
                    gLogger.error('Failed to store pilot output:',
                                  result['Message'])

            resultDict = {}
            resultDict['StdOut'] = stdout
            resultDict['StdErr'] = error
            resultDict['OwnerDN'] = owner
            resultDict['OwnerGroup'] = group
            resultDict['FileList'] = []
            shutil.rmtree(queueDict['WorkingDirectory'])
            return S_OK(resultDict)
Exemple #26
0
def getQueuesResolved(
    siteDict, queueCECache, gridEnv=None, setup=None, workingDir="", checkPlatform=False, instantiateCEs=False
):
    """Get the list of relevant CEs (what is in siteDict) and their descriptions.
    The main goal of this method is to return a dictionary of queues
    """
    queueDict = {}
    ceFactory = ComputingElementFactory()

    for site in siteDict:
        for ce in siteDict[site]:
            ceDict = siteDict[site][ce]
            pilotRunDirectory = ceDict.get("PilotRunDirectory", "")
            # ceMaxRAM = ceDict.get('MaxRAM', None)
            qDict = ceDict.pop("Queues")
            for queue in qDict:
                queueName = "%s_%s" % (ce, queue)
                queueDict[queueName] = {}
                queueDict[queueName]["ParametersDict"] = qDict[queue]
                queueDict[queueName]["ParametersDict"]["Queue"] = queue
                queueDict[queueName]["ParametersDict"]["GridCE"] = ce
                queueDict[queueName]["ParametersDict"]["Site"] = site
                queueDict[queueName]["ParametersDict"]["GridEnv"] = gridEnv
                queueDict[queueName]["ParametersDict"]["Setup"] = setup

                # Evaluate the CPU limit of the queue according to the Glue convention
                computeQueueCPULimit(queueDict[queueName]["ParametersDict"])

                # Tags & RequiredTags defined on the Queue level and on the CE level are concatenated
                # This also converts them from a string to a list if required.
                resolveTags(ceDict, queueDict[queueName]["ParametersDict"])

                # Some parameters can be defined on the CE level and are inherited by all Queues
                setAdditionalParams(ceDict, queueDict[queueName]["ParametersDict"])

                if pilotRunDirectory:
                    queueDict[queueName]["ParametersDict"]["JobExecDir"] = pilotRunDirectory

                ceQueueDict = dict(ceDict)
                ceQueueDict.update(queueDict[queueName]["ParametersDict"])

                if instantiateCEs:
                    # Generate the CE object for the queue or pick the already existing one
                    # if the queue definition did not change
                    queueHash = generateQueueHash(ceQueueDict)
                    if queueName in queueCECache and queueCECache[queueName]["Hash"] == queueHash:
                        queueCE = queueCECache[queueName]["CE"]
                    else:
                        result = ceFactory.getCE(ceName=ce, ceType=ceDict["CEType"], ceParametersDict=ceQueueDict)
                        if not result["OK"]:
                            queueDict.pop(queueName)
                            continue
                        queueCECache.setdefault(queueName, {})
                        queueCECache[queueName]["Hash"] = queueHash
                        queueCECache[queueName]["CE"] = result["Value"]
                        queueCE = queueCECache[queueName]["CE"]

                    queueDict[queueName]["ParametersDict"].update(queueCE.ceParameters)
                    queueDict[queueName]["CE"] = queueCE
                    result = queueDict[queueName]["CE"].isValid()
                    if not result["OK"]:
                        queueDict.pop(queueName)
                        queueCECache.pop(queueName)
                        continue

                queueDict[queueName]["CEName"] = ce
                queueDict[queueName]["CEType"] = ceDict["CEType"]
                queueDict[queueName]["Site"] = site
                queueDict[queueName]["QueueName"] = queue
                queueDict[queueName]["QueryCEFlag"] = ceDict.get("QueryCEFlag", "false")

                if checkPlatform:
                    setPlatform(ceDict, queueDict[queueName]["ParametersDict"])

                bundleProxy = queueDict[queueName]["ParametersDict"].get("BundleProxy", ceDict.get("BundleProxy"))
                if bundleProxy and bundleProxy.lower() in ["true", "yes", "1"]:
                    queueDict[queueName]["BundleProxy"] = True

    return S_OK(queueDict)
Exemple #27
0
  def getQueues( self, resourceDict ):
    """ Get the list of relevant CEs and their descriptions
    """

    self.queueDict = {}
    ceFactory = ComputingElementFactory()

    for site in resourceDict:
      for ce in resourceDict[site]:
        ceDict = resourceDict[site][ce]
        ceTags = ceDict.get( 'Tag', [] )
        pilotRunDirectory = ceDict.get( 'PilotRunDirectory', '' )
        if isinstance( ceTags, basestring ):
          ceTags = fromChar( ceTags )
        ceMaxRAM = ceDict.get( 'MaxRAM', None )
        qDict = ceDict.pop( 'Queues' )
        for queue in qDict:
          queueName = '%s_%s' % ( ce, queue )
          self.queueDict[queueName] = {}
          self.queueDict[queueName]['ParametersDict'] = qDict[queue]
          self.queueDict[queueName]['ParametersDict']['Queue'] = queue
          self.queueDict[queueName]['ParametersDict']['Site'] = site
          self.queueDict[queueName]['ParametersDict']['GridEnv'] = self.gridEnv
          self.queueDict[queueName]['ParametersDict']['Setup'] = gConfig.getValue( '/DIRAC/Setup', 'unknown' )
          # Evaluate the CPU limit of the queue according to the Glue convention
          # To Do: should be a utility
          if "maxCPUTime" in self.queueDict[queueName]['ParametersDict'] and \
             "SI00" in self.queueDict[queueName]['ParametersDict']:
            maxCPUTime = float( self.queueDict[queueName]['ParametersDict']['maxCPUTime'] )
            # For some sites there are crazy values in the CS
            maxCPUTime = max( maxCPUTime, 0 )
            maxCPUTime = min( maxCPUTime, 86400 * 12.5 )
            si00 = float( self.queueDict[queueName]['ParametersDict']['SI00'] )
            queueCPUTime = 60. / 250. * maxCPUTime * si00
            self.queueDict[queueName]['ParametersDict']['CPUTime'] = int( queueCPUTime )

          queueTags = self.queueDict[queueName]['ParametersDict'].get( 'Tag' )
          if queueTags and isinstance( queueTags, basestring ):
            queueTags = fromChar( queueTags )
            self.queueDict[queueName]['ParametersDict']['Tag'] = queueTags
          if ceTags:
            if queueTags:
              allTags = list( set( ceTags + queueTags ) )
              self.queueDict[queueName]['ParametersDict']['Tag'] = allTags
            else:
              self.queueDict[queueName]['ParametersDict']['Tag'] = ceTags

          maxRAM = self.queueDict[queueName]['ParametersDict'].get( 'MaxRAM' )
          maxRAM = ceMaxRAM if not maxRAM else maxRAM
          if maxRAM:
            self.queueDict[queueName]['ParametersDict']['MaxRAM'] = maxRAM
          if pilotRunDirectory:
            self.queueDict[queueName]['ParametersDict']['JobExecDir'] = pilotRunDirectory
          qwDir = os.path.join( self.workingDirectory, queue )
          mkDir(qwDir)
          self.queueDict[queueName]['ParametersDict']['WorkingDirectory'] = qwDir
          platform = ''
          if "Platform" in self.queueDict[queueName]['ParametersDict']:
            platform = self.queueDict[queueName]['ParametersDict']['Platform']
          elif "Platform" in ceDict:
            platform = ceDict['Platform']
          elif "OS" in ceDict:
            architecture = ceDict.get( 'architecture', 'x86_64' )
            OS = ceDict['OS']
            platform = '_'.join( [architecture, OS] )
          if platform and not platform in self.platforms:
            self.platforms.append( platform )

          if not "Platform" in self.queueDict[queueName]['ParametersDict'] and platform:
            result = Resources.getDIRACPlatform( platform )
            if result['OK']:
              self.queueDict[queueName]['ParametersDict']['Platform'] = result['Value'][0]

          ceQueueDict = dict( ceDict )
          ceQueueDict.update( self.queueDict[queueName]['ParametersDict'] )

          # Generate the CE object for the queue or pick the already existing one
          # if the queue definition did not change
          queueHash = self.__generateQueueHash( ceQueueDict )
          if queueName in self.queueCECache and self.queueCECache[queueName]['Hash'] == queueHash:
            queueCE = self.queueCECache[queueName]['CE']
          else:
            result = ceFactory.getCE( ceName = ce,
                                      ceType = ceDict['CEType'],
                                      ceParametersDict = ceQueueDict )
            if not result['OK']:
              return result
            self.queueCECache.setdefault( queueName, {} )
            self.queueCECache[queueName]['Hash'] = queueHash
            self.queueCECache[queueName]['CE'] = result['Value']
            queueCE = self.queueCECache[queueName]['CE']

          self.queueDict[queueName]['CE'] = queueCE
          self.queueDict[queueName]['CEName'] = ce
          self.queueDict[queueName]['CEType'] = ceDict['CEType']
          self.queueDict[queueName]['Site'] = site
          self.queueDict[queueName]['QueueName'] = queue
          self.queueDict[queueName]['Platform'] = platform
          result = self.queueDict[queueName]['CE'].isValid()
          if not result['OK']:
            self.log.fatal( result['Message'] )
            return result
          if 'BundleProxy' in self.queueDict[queueName]['ParametersDict']:
            if self.queueDict[queueName]['ParametersDict']['BundleProxy'].lower() in ['true','yes','1']:
              self.queueDict[queueName]['BundleProxy'] = True
          elif 'BundleProxy' in ceDict:
            if ceDict['BundleProxy'].lower() in ['true','yes','1']:
              self.queueDict[queueName]['BundleProxy'] = True

          if site not in self.sites:
            self.sites.append( site )

    return S_OK()
  def __getGridJobOutput(self,pilotReference):
    """ Get the pilot job standard output and standard error files for the Grid
        job reference
    """

    result = pilotDB.getPilotInfo(pilotReference)
    if not result['OK'] or not result[ 'Value' ]:
      return S_ERROR('Failed to get info for pilot ' + pilotReference)

    pilotDict = result['Value'][pilotReference]
    owner = pilotDict['OwnerDN']
    group = pilotDict['OwnerGroup']

    # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files?
    result = pilotDB.getPilotOutput(pilotReference)
    if result['OK']:
      stdout = result['Value']['StdOut']
      error = result['Value']['StdErr']
      if stdout or error:
        resultDict = {}
        resultDict['StdOut'] = stdout
        resultDict['StdErr'] = error
        resultDict['OwnerDN'] = owner
        resultDict['OwnerGroup'] = group
        resultDict['FileList'] = []
        return S_OK(resultDict)
      else:
        gLogger.warn( 'Empty pilot output found for %s' % pilotReference )

    gridType = pilotDict['GridType']
    if gridType in ["LCG","gLite","CREAM"]:
      group = getGroupOption(group,'VOMSRole',group)
      ret = gProxyManager.getPilotProxyFromVOMSGroup( owner, group )
      if not ret['OK']:
        gLogger.error( ret['Message'] )
        gLogger.error( 'Could not get proxy:', 'User "%s", Group "%s"' % ( owner, group ) )
        return S_ERROR("Failed to get the pilot's owner proxy")
      proxy = ret['Value']

      pilotStamp = pilotDict['PilotStamp']
      result = getPilotOutput( proxy, gridType, pilotReference, pilotStamp )
      if not result['OK']:
        return S_ERROR('Failed to get pilot output: '+result['Message'])
      # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files?
      stdout = result['StdOut']
      error = result['StdErr']
      fileList = result['FileList']
      if stdout:
        result = pilotDB.storePilotOutput(pilotReference,stdout,error)
        if not result['OK']:
          gLogger.error('Failed to store pilot output:',result['Message'])

      resultDict = {}
      resultDict['StdOut'] = stdout
      resultDict['StdErr'] = error
      resultDict['OwnerDN'] = owner
      resultDict['OwnerGroup'] = group
      resultDict['FileList'] = fileList
      return S_OK(resultDict)
    else:
      # Instantiate the appropriate CE
      ceFactory = ComputingElementFactory()
      result = getQueue( pilotDict['GridSite'], pilotDict['DestinationSite'], pilotDict['Queue'] )
      if not result['OK']:
        return result
      queueDict = result['Value']
      result = ceFactory.getCE( gridType, pilotDict['DestinationSite'], queueDict )
      if not result['OK']:
        return result
      ce = result['Value']
      pilotStamp = pilotDict['PilotStamp']
      pRef = pilotReference
      if pilotStamp:
        pRef = pRef + ':::' + pilotStamp
      result = ce.getJobOutput( pRef )
      if not result['OK']:
        return result
      stdout,error = result['Value']
      if stdout:
        result = pilotDB.storePilotOutput(pilotReference,stdout,error)
        if not result['OK']:
          gLogger.error('Failed to store pilot output:',result['Message'])

      resultDict = {}
      resultDict['StdOut'] = stdout
      resultDict['StdErr'] = error
      resultDict['OwnerDN'] = owner
      resultDict['OwnerGroup'] = group
      resultDict['FileList'] = []
      return S_OK( resultDict )
Exemple #29
0
    def export_killPilot(pilotRefList):
        """ Kill the specified pilots
    """
        # Make a list if it is not yet
        pilotRefs = list(pilotRefList)
        if type(pilotRefList) in StringTypes:
            pilotRefs = [pilotRefList]

        # Regroup pilots per site and per owner
        pilotRefDict = {}
        for pilotReference in pilotRefs:
            result = pilotDB.getPilotInfo(pilotReference)
            if not result['OK'] or not result['Value']:
                return S_ERROR('Failed to get info for pilot ' +
                               pilotReference)

            pilotDict = result['Value'][pilotReference]
            owner = pilotDict['OwnerDN']
            group = pilotDict['OwnerGroup']
            queue = '@@@'.join([
                owner, group, pilotDict['GridSite'],
                pilotDict['DestinationSite'], pilotDict['Queue']
            ])
            gridType = pilotDict['GridType']
            pilotRefDict.setdefault(queue, {})
            pilotRefDict[queue].setdefault('PilotList', [])
            pilotRefDict[queue]['PilotList'].append(pilotReference)
            pilotRefDict[queue]['GridType'] = gridType

        # Do the work now queue by queue
        ceFactory = ComputingElementFactory()
        failed = []
        for key, pilotDict in pilotRefDict.items():

            owner, group, site, ce, queue = key.split('@@@')
            result = getQueue(site, ce, queue)
            if not result['OK']:
                return result
            queueDict = result['Value']
            gridType = pilotDict['GridType']
            result = ceFactory.getCE(gridType, ce, queueDict)
            if not result['OK']:
                return result
            ce = result['Value']

            # FIXME: quite hacky. Should be either removed, or based on some flag
            if gridType in ["LCG", "gLite", "CREAM", "ARC", "Globus"]:
                group = getGroupOption(group, 'VOMSRole', group)
                ret = gProxyManager.getPilotProxyFromVOMSGroup(owner, group)
                if not ret['OK']:
                    gLogger.error(ret['Message'])
                    gLogger.error('Could not get proxy:',
                                  'User "%s", Group "%s"' % (owner, group))
                    return S_ERROR("Failed to get the pilot's owner proxy")
                proxy = ret['Value']
                ce.setProxy(proxy)

            pilotList = pilotDict['PilotList']
            result = ce.killJob(pilotList)
            if not result['OK']:
                failed.extend(pilotList)

        if failed:
            return S_ERROR('Failed to kill at least some pilots')

        return S_OK()
Exemple #30
0
    def __getGridJobOutput(self, pilotReference):
        """ Get the pilot job standard output and standard error files for the Grid
        job reference
    """

        result = pilotDB.getPilotInfo(pilotReference)
        if not result['OK'] or not result['Value']:
            return S_ERROR('Failed to get info for pilot ' + pilotReference)

        pilotDict = result['Value'][pilotReference]
        owner = pilotDict['OwnerDN']
        group = pilotDict['OwnerGroup']

        # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files?
        result = pilotDB.getPilotOutput(pilotReference)
        if result['OK']:
            stdout = result['Value']['StdOut']
            error = result['Value']['StdErr']
            if stdout or error:
                resultDict = {}
                resultDict['StdOut'] = stdout
                resultDict['StdErr'] = error
                resultDict['OwnerDN'] = owner
                resultDict['OwnerGroup'] = group
                resultDict['FileList'] = []
                return S_OK(resultDict)
            else:
                gLogger.warn('Empty pilot output found for %s' %
                             pilotReference)

        gridType = pilotDict['GridType']
        if gridType in ["LCG", "gLite", "CREAM"]:
            group = getGroupOption(group, 'VOMSRole', group)
            ret = gProxyManager.getPilotProxyFromVOMSGroup(owner, group)
            if not ret['OK']:
                gLogger.error(ret['Message'])
                gLogger.error('Could not get proxy:',
                              'User "%s", Group "%s"' % (owner, group))
                return S_ERROR("Failed to get the pilot's owner proxy")
            proxy = ret['Value']

            pilotStamp = pilotDict['PilotStamp']
            result = getPilotOutput(proxy, gridType, pilotReference,
                                    pilotStamp)
            if not result['OK']:
                return S_ERROR('Failed to get pilot output: ' +
                               result['Message'])
            # FIXME: What if the OutputSandBox is not StdOut and StdErr, what do we do with other files?
            stdout = result['StdOut']
            error = result['StdErr']
            fileList = result['FileList']
            if stdout:
                result = pilotDB.storePilotOutput(pilotReference, stdout,
                                                  error)
                if not result['OK']:
                    gLogger.error('Failed to store pilot output:',
                                  result['Message'])

            resultDict = {}
            resultDict['StdOut'] = stdout
            resultDict['StdErr'] = error
            resultDict['OwnerDN'] = owner
            resultDict['OwnerGroup'] = group
            resultDict['FileList'] = fileList
            return S_OK(resultDict)
        else:
            # Instantiate the appropriate CE
            ceFactory = ComputingElementFactory()
            result = Resources(group=group).getQueueDescription(
                pilotDict['Queue'])
            if not result['OK']:
                return result
            queueDict = result['Value']
            result = ceFactory.getCE(gridType, pilotDict['DestinationSite'],
                                     queueDict)
            if not result['OK']:
                return result
            ce = result['Value']
            pilotStamp = pilotDict['PilotStamp']
            pRef = pilotReference
            if pilotStamp:
                pRef = pRef + ':::' + pilotStamp
            result = ce.getJobOutput(pRef)
            if not result['OK']:
                return result
            stdout, error = result['Value']
            if stdout:
                result = pilotDB.storePilotOutput(pilotReference, stdout,
                                                  error)
                if not result['OK']:
                    gLogger.error('Failed to store pilot output:',
                                  result['Message'])

            resultDict = {}
            resultDict['StdOut'] = stdout
            resultDict['StdErr'] = error
            resultDict['OwnerDN'] = owner
            resultDict['OwnerGroup'] = group
            resultDict['FileList'] = []
            return S_OK(resultDict)
Exemple #31
0
    def getQueues(self):
        """ Get the list of relevant CEs and their descriptions
    """

        self.queueDict = {}
        ceFactory = ComputingElementFactory()
        ceTypes = self.am_getOption('CETypes', [])
        ceConfList = self.am_getOption('CEs', [])

        for siteName in self.siteNames:
            # Look up CE definitions in the site CS description
            ceList = []
            gridType = siteName.split('.')[0]
            result = gConfig.getSections('/Resources/Sites/%s/%s/CEs' %
                                         (gridType, siteName))
            if not result['OK']:
                return S_ERROR('Failed to look up the CS for the site %s CEs' %
                               siteName)
            if not result['Value']:
                return S_ERROR('No CEs found for site %s' % siteName)
            ceTotalList = result['Value']
            for ce in ceTotalList:
                if (ceConfList and ce in ceConfList) or not ceConfList:
                    ceType = gConfig.getValue(
                        '/Resources/Sites/%s/%s/CEs/%s/CEType' %
                        (gridType, siteName, ce), 'Unknown')
                    result = gConfig.getOptionsDict(
                        '/Resources/Sites/%s/%s/CEs/%s' %
                        (gridType, siteName, ce))
                    if not result['OK']:
                        return S_ERROR('Failed to look up the CS for ce %s' %
                                       ce)
                    ceDict = result['Value']
                    if "SubmissionMode" in ceDict and ceDict[
                            'SubmissionMode'].lower() == "direct":
                        if ceType in ceTypes:
                            ceList.append((ce, ceType, ceDict))

            for ce, ceType, ceDict in ceList:
                section = '/Resources/Sites/%s/%s/CEs/%s/Queues' % (
                    gridType, siteName, ce)
                result = gConfig.getSections(section)
                if not result['OK']:
                    return S_ERROR('Failed to look up the CS for queues')
                if not result['Value']:
                    return S_ERROR('No Queues found for site %s, ce %s' %
                                   (siteName, ce))

                queues = result['Value']
                for queue in queues:
                    result = gConfig.getOptionsDict('%s/%s' % (section, queue))
                    if not result['OK']:
                        return S_ERROR(
                            'Failed to look up the CS for ce,queue %s,%s' %
                            (ce, queue))

                    queueName = '%s_%s' % (ce, queue)
                    self.queueDict[queueName] = {}
                    self.queueDict[queueName]['ParametersDict'] = result[
                        'Value']
                    self.queueDict[queueName]['ParametersDict'][
                        'Queue'] = queue
                    self.queueDict[queueName]['ParametersDict'][
                        'Site'] = siteName
                    self.queueDict[queueName]['ParametersDict'][
                        'GridEnv'] = self.gridEnv
                    self.queueDict[queueName]['ParametersDict'][
                        'Setup'] = gConfig.getValue('/DIRAC/Setup', 'unknown')
                    # Evaluate the CPU limit of the queue according to the Glue convention
                    # To Do: should be a utility
                    if "maxCPUTime" in self.queueDict[queueName]['ParametersDict'] and \
                       "SI00" in self.queueDict[queueName]['ParametersDict']:
                        maxCPUTime = float(self.queueDict[queueName]
                                           ['ParametersDict']['maxCPUTime'])
                        # For some sites there are crazy values in the CS
                        maxCPUTime = max(maxCPUTime, 0)
                        maxCPUTime = min(maxCPUTime, 86400 * 12.5)
                        si00 = float(self.queueDict[queueName]
                                     ['ParametersDict']['SI00'])
                        queueCPUTime = 60. / 250. * maxCPUTime * si00
                        self.queueDict[queueName]['ParametersDict'][
                            'CPUTime'] = int(queueCPUTime)
                    qwDir = os.path.join(self.workingDirectory, queue)
                    if not os.path.exists(qwDir):
                        os.makedirs(qwDir)
                    self.queueDict[queueName]['ParametersDict'][
                        'WorkingDirectory'] = qwDir
                    queueDict = dict(ceDict)
                    queueDict.update(
                        self.queueDict[queueName]['ParametersDict'])
                    result = ceFactory.getCE(ceName=ce,
                                             ceType=ceType,
                                             ceParametersDict=queueDict)
                    if not result['OK']:
                        return result
                    self.queueDict[queueName]['CE'] = result['Value']
                    self.queueDict[queueName]['CEName'] = ce
                    self.queueDict[queueName]['CEType'] = ceType
                    self.queueDict[queueName]['Site'] = siteName
                    self.queueDict[queueName]['QueueName'] = queue
                    result = self.queueDict[queueName]['CE'].isValid()
                    if not result['OK']:
                        self.log.fatal(result['Message'])
                        return result
                    if 'BundleProxy' in self.queueDict[queueName][
                            'ParametersDict']:
                        self.queueDict[queueName]['BundleProxy'] = True

        return S_OK()