예제 #1
파일: CSAPI.py 프로젝트: JanEbbing/DIRAC
    def getOpsSection():
      Where is the shifters section?
      vo = CSGlobals.getVO()
      setup = CSGlobals.getSetup()

      if vo:
        res = gConfig.getSections( '/Operations/%s/%s/Shifter' % (vo, setup) )
        if res['OK']:
          return S_OK( '/Operations/%s/%s/Shifter' % ( vo, setup ) )

        res = gConfig.getSections( '/Operations/%s/Defaults/Shifter' % vo )
        if res['OK']:
          return S_OK( '/Operations/%s/Defaults/Shifter' % vo )

        res = gConfig.getSections( '/Operations/%s/Shifter' % setup )
        if res['OK']:
          return S_OK( '/Operations/%s/Shifter' % setup )

        res = gConfig.getSections( '/Operations/Defaults/Shifter' )
        if res['OK']:
          return S_OK( '/Operations/Defaults/Shifter' )

      return S_ERROR( "No shifter section" )
예제 #2
 def __getSearchPaths( self ):
   paths = [ "/Operations/Defaults", "/Operations/%s" % self.__setup ]
   if not self.__vo:
     globalVO = CSGlobals.getVO()
     if not globalVO:
       return paths
     self.__vo = CSGlobals.getVO()
   paths.append( "/Operations/%s/Defaults" % self.__vo )
   paths.append( "/Operations/%s/%s" % ( self.__vo, self.__setup ) )
   return paths
예제 #3
  def _curlDownload( self, granularity, site, tests ):
    """ Download SAM status for entity using the SAM DB programmatic interface

    samdbpi_url = "http://lcg-sam.cern.ch:8080/same-pi/"
    # Set your method
    if granularity in ( 'Site', 'Sites' ):
      samdbpi_method = "site_status.jsp?"
    elif granularity in ( 'Resource', 'Resources' ):
      samdbpi_method = "service_endpoint_status.jsp?"
    # Set your site
    samdbpi_site = site
    # set test
    samdbpi_test = ""
    if tests is None:
      samdbpi_test = "&only_ss"

    extension = CSGlobals.getCSExtensions()[0]

    samdb_ep = samdbpi_url + samdbpi_method + "VO_name=" + extension + "&Site_name=" + samdbpi_site + samdbpi_test

    req = urllib2.Request( samdb_ep )
    samPage = urllib2.urlopen( req )

    sam = samPage.read()

    return sam
  def __getInstallFlags(self):
    """ Get the flags to pass to dirac-install.py inside the container.
        Returns a string containing the command line flags.
    instOpts = []
    setup = gConfig.getValue("/DIRAC/Setup", "unknown")
    opsHelper = Operations.Operations(setup=setup)

    installationName = opsHelper.getValue("Pilot/Installation", "")
    if installationName:
      instOpts.append('-V %s' % installationName)

    diracVersions = opsHelper.getValue("Pilot/Version", [])
    instOpts.append("-r '%s'" % diracVersions[0])

    pyVer = "%u%u" % (sys.version_info.major, sys.version_info.minor)
    instOpts.append("-i %s" % pyVer)
    pilotExtensionsList = opsHelper.getValue("Pilot/Extensions", [])
    extensionsList = []
    if pilotExtensionsList:
      if pilotExtensionsList[0] != 'None':
        extensionsList = pilotExtensionsList
      extensionsList = CSGlobals.getCSExtensions()
    if extensionsList:
      instOpts.append("-e '%s'" % ','.join([ext for ext in extensionsList if 'Web' not in ext]))
    if 'ContainerExtraOpts' in self.ceParameters:
    return ' '.join(instOpts)
예제 #5
 def __discoverSettings( self ):
   #Set the VO
   globalVO = CSGlobals.getVO()
   if globalVO:
     self.__vo = globalVO
   elif self.__uVO:
     self.__vo = self.__uVO
     self.__vo = Registry.getVOForGroup( self.__uGroup )
     if not self.__vo:
       self.__vo = False
   #Set the setup
   self.__setup = False
   if self.__uSetup:
     self.__setup = self.__uSetup
     self.__setup = CSGlobals.getSetup()
예제 #6
 def __generateRootModules( self, baseModules ):
   """ Iterate over all the possible root modules
   self.__rootModules = baseModules
   for rootModule in reversed( CSGlobals.getCSExtensions() ):
     if rootModule[-5:] != "DIRAC" and rootModule not in self.__rootModules:
       self.__rootModules.append( "%sDIRAC" % rootModule )
   self.__rootModules.append( "" )
예제 #7
파일: ObjectLoader.py 프로젝트: bmb/DIRAC
 def __rootModules( self ):
   """ Iterate over all the possible root modules
   for rootModule in CSGlobals.getCSExtensions():
     if rootModule[-5:] != "DIRAC":
       rootModule = "%sDIRAC" % rootModule
     yield rootModule
   yield 'DIRAC'
   yield ''
예제 #8
파일: Resources.py 프로젝트: graciani/DIRAC
 def __discoverSettings( self ):
   #Set the VO
   globalVO = CSGlobals.getVO()
   if globalVO:
     self.__vo = globalVO
   elif self.__uVO:
     self.__vo = self.__uVO
     self.__vo = Registry.getVOForGroup( self.__uGroup )
     if not self.__vo:
       self.__vo = None
예제 #9
  def __generateRootModules( self, baseModules ):
    """ Iterate over all the possible root modules
    self.__rootModules = baseModules
    for rootModule in reversed( CSGlobals.getCSExtensions() ):
      if rootModule[-5:] != "DIRAC" and rootModule not in self.__rootModules:
        self.__rootModules.append( "%sDIRAC" % rootModule )
    self.__rootModules.append( "" )

    # Reversing the order because we want first to look in the extension(s)
예제 #10
파일: DErrno.py 프로젝트: marianne013/DIRAC
def includeExtensionErrors():
  """ Merge all the errors of all the extensions into the errors of these modules
      Should be called only at the initialization of DIRAC, so by the parseCommandLine,
      dirac-agent.py, dirac-service.py, dirac-executor.py

  def __recurseImport( modName, parentModule = None, fullName = False ):
    """ Internal function to load modules
    if isinstance( modName, basestring ):
      modName = modName.split( "." )
    if not fullName:
      fullName = ".".join( modName )
      if parentModule:
        impData = imp.find_module( modName[0], parentModule.__path__ )
        impData = imp.find_module( modName[0] )
      impModule = imp.load_module( modName[0], *impData )
      if impData[0]:
    except ImportError:
      return  None
    if len( modName ) == 1:
      return  impModule
    return __recurseImport( modName[1:], impModule, fullName = fullName )

  from DIRAC.ConfigurationSystem.Client.Helpers import CSGlobals
  allExtensions = CSGlobals.getCSExtensions()

  for extension in allExtensions:
    ext_derrno = None

      ext_derrno = __recurseImport( '%sDIRAC.Core.Utilities.DErrno' % extension )

      if ext_derrno:
        # The next 3 dictionary MUST be present for consistency

        # Global name of errors
        sys.modules[__name__].__dict__.update( ext_derrno.extra_dErrName )
        # Dictionary with the error codes
        sys.modules[__name__].dErrorCode.update( ext_derrno.extra_dErrorCode )
        # Error description string
        sys.modules[__name__].dStrError.update( ext_derrno.extra_dStrError )

        # extra_compatErrorString is optional
        for err in getattr( ext_derrno, 'extra_compatErrorString', [] ) :
          sys.modules[__name__].compatErrorString.setdefault( err, [] ).extend( ext_derrno.extra_compatErrorString[err] )

예제 #11
 def __discoverSettings( self ):
   #Set the VO
   globalVO = CSGlobals.getVO()
   if globalVO:
     self.__vo = globalVO
   elif self.__uVO:
     self.__vo = self.__uVO
   elif self.__uGroup:
     self.__vo = Registry.getVOForGroup( self.__uGroup )
     if not self.__vo:
       self.__vo = False
     result = getVOfromProxyGroup()
     if result['OK']:
       self.__vo = result['Value']    
   #Set the setup
   self.__setup = False
   if self.__uSetup:
     self.__setup = self.__uSetup
     self.__setup = CSGlobals.getSetup()
예제 #12
 def setHandlers( cls, handlers ):
   cls.__handlers = {}
   for k in handlers:
     handler = handlers[ k ]
     cls.__handlers[ handler.LOCATION.strip("/") ] = handler
   #Calculate extensions
   cls.__extensions = []
   for ext in CSGlobals.getInstalledExtensions():
     if ext in ( "WebAppDIRAC", "DIRAC" ):
     cls.__extensions.append( ext )
   cls.__extensions.append( "DIRAC" )
   cls.__extensions.append( "WebAppDIRAC" )
예제 #13
def loadObjects( path, reFilter = None, parentClass = None ):
  :param str path the path to the syetem for example: DIRAC/AccountingSystem
  :param object reFilter regular expression used to found the class
  :param object parentClass class instance
  :return dict it return the name of the clase and the instance of the class.
  if not reFilter:
    reFilter = re.compile( ".*[a-z1-9]\.py$" )
  pathList = List.fromChar( path, "/" )

  parentModuleList = [ "%sDIRAC" % ext for ext in CSGlobals.getCSExtensions() ] + [ 'DIRAC' ]
  objectsToLoad = {}
  #Find which object files match
  for parentModule in parentModuleList:
    objDir = os.path.join( DIRAC.rootPath, parentModule, *pathList )
    if not os.path.isdir( objDir ):
    for objFile in os.listdir( objDir ):
      if reFilter.match( objFile ):
        pythonClassName = objFile[:-3]
        if pythonClassName not in objectsToLoad:
          gLogger.info( "Adding to load queue %s/%s/%s" % ( parentModule, path, pythonClassName ) )
          objectsToLoad[ pythonClassName ] = parentModule

  #Load them!
  loadedObjects = {}

  for pythonClassName in objectsToLoad:
    parentModule = objectsToLoad[ pythonClassName ]
      #Where parentModule can be DIRAC, pathList is something like [ "AccountingSystem", "Client", "Types" ]
      #And the python class name is.. well, the python class name
      objPythonPath = "%s.%s.%s" % ( parentModule, ".".join( pathList ), pythonClassName )
      objModule = __import__( objPythonPath,
                               locals(), pythonClassName )
      objClass = getattr( objModule, pythonClassName )
    except Exception as e:
      gLogger.error( "Can't load type", "%s/%s: %s" % ( parentModule, pythonClassName, str( e ) ) )
    if parentClass == objClass:
    if parentClass and not issubclass( objClass, parentClass ):
      gLogger.warn( "%s is not a subclass of %s. Skipping" % ( objClass, parentClass ) )
    gLogger.info( "Loaded %s" % objPythonPath )
    loadedObjects[ pythonClassName ] = objClass

  return loadedObjects
예제 #14
 def __discoverSettings( self ):
   #Set the VO
   self.__threadData.vo = False
   if self.__threadData.uVO:
     self.__threadData.vo = self.__threadData.uVO
     self.__threadData.vo = Registry.getVOForGroup( self.__threadData.uGroup )
     if not self.__threadData.vo:
       raise RuntimeError( "Don't know how to discover VO. Please check your VO and groups configuration" )
   #Set the setup
   self.__threadData.setup = False
   if self.__threadData.uSetup:
     self.__threadData.setup = self.__threadData.uSetup
     self.__threadData.setup = CSGlobals.getSetup()
예제 #15
def loadWebAppCFGFiles():
  Load WebApp/web.cfg definitions
  exts = []
  for ext in CSGlobals.getCSExtensions():
    if ext == "DIRAC":
    if ext[-5:] != "DIRAC":
      ext = "%sDIRAC" % ext
    if ext != "WebAppDIRAC":
      exts.append( ext )
  exts.append( "DIRAC" )
  exts.append( "WebAppDIRAC" )
  webCFG = CFG()
  for modName in reversed( exts ):
      modPath = imp.find_module( modName )[1]
    except ImportError:
    gLogger.verbose( "Found module %s at %s" % ( modName, modPath ) )
    cfgPath = os.path.join( modPath, "WebApp", "web.cfg" )
    if not os.path.isfile( cfgPath ):
      gLogger.verbose( "Inexistant %s" % cfgPath )
      modCFG = CFG().loadFromFile( cfgPath )
    except Exception, excp:
      gLogger.error( "Could not load %s: %s" % ( cfgPath, excp ) )
    gLogger.verbose( "Loaded %s" % cfgPath )
    expl = [ BASECS ]
    while len( expl ):
      current = expl.pop( 0 )
      if not modCFG.isSection( current ):
      if modCFG.getOption( "%s/AbsoluteDefinition" % current, False ):
        gLogger.verbose( "%s:%s is an absolute definition" % ( modName, current ) )
          webCFG.deleteKey( current )
        modCFG.deleteKey( "%s/AbsoluteDefinition" % current )
        for sec in modCFG[ current ].listSections():
          expl.append( "%s/%s" % ( current, sec ) )
    #Add the modCFG
    webCFG = webCFG.mergeWith( modCFG )
예제 #16
 def getPaths( self, dirName ):
   Get lists of paths for all installed and enabled extensions
   pathList = []
   for extName in CSGlobals.getCSExtensions():
     if extName.rfind( "DIRAC" ) != len( extName ) - 5:
       extName = "%sDIRAC" % extName
     if extName == "WebAppDIRAC":
       modFile, modPath, desc = imp.find_module( extName )
     except ImportError:
     staticPath = os.path.join( modPath, "WebApp", dirName )
     if os.path.isdir( staticPath ):
       pathList.append( staticPath )
   #Add WebAppDirac to the end
   pathList.append( os.path.join( WebAppDIRAC.rootPath, "WebApp", dirName ) )
   return pathList
예제 #17
 def generatePath( self, option, vo = False, setup = False ):
   Generate the CS path for an option
   if vo is not defined, the helper's vo will be used for multi VO installations
   if setup evaluates False (except None) -> The helpers setup will  be used
   if setup is defined -> whatever is defined will be used as setup
   if setup is None -> Defaults will be used
   path = "/Operations"
   if not CSGlobals.getVO():
     if not vo:
       vo = self.__vo
     if vo:
       path += "/%s" % vo
   if not setup and setup != None:
     if not setup:
       setup = self.__setup
   if setup:
     path += "/%s" % setup
     path += "/Defaults" 
   return "%s/%s" % ( path, option )
예제 #18
  def submitJobs( self ):
    """ Go through defined computing elements and submit jobs if necessary

    # Check that there is some work at all
    setup = CSGlobals.getSetup()
    tqDict = { 'Setup':setup,
               'CPUTime': 9999999,
               'SubmitPool' : self.defaultSubmitPools }
    if self.vo:
      tqDict['Community'] = self.vo
    if self.voGroups:
      tqDict['OwnerGroup'] = self.voGroups

    result = Resources.getCompatiblePlatforms( self.platforms )
    if not result['OK']:
      return result
    tqDict['Platform'] = result['Value']
    tqDict['Site'] = self.sites
    tqDict['Tag'] = []
    self.log.verbose( 'Checking overall TQ availability with requirements' )
    self.log.verbose( tqDict )

    rpcMatcher = RPCClient( "WorkloadManagement/Matcher" )
    result = rpcMatcher.getMatchingTaskQueues( tqDict )
    if not result[ 'OK' ]:
      return result
    if not result['Value']:
      self.log.verbose( 'No Waiting jobs suitable for the director' )
      return S_OK()

    jobSites = set()
    anySite = False
    testSites = set()
    totalWaitingJobs = 0
    for tqID in result['Value']:
      if "Sites" in result['Value'][tqID]:
        for site in result['Value'][tqID]['Sites']:
          if site.lower() != 'any':
            jobSites.add( site )
            anySite = True
        anySite = True
      if "JobTypes" in result['Value'][tqID]:
        if "Sites" in result['Value'][tqID]:
          for site in result['Value'][tqID]['Sites']:
            if site.lower() != 'any':
              testSites.add( site )
      totalWaitingJobs += result['Value'][tqID]['Jobs']

    tqIDList = result['Value'].keys()
    result = pilotAgentsDB.countPilots( { 'TaskQueueID': tqIDList,
                                          'Status': WAITING_PILOT_STATUS },
                                           None )
    totalWaitingPilots = 0
    if result['OK']:
      totalWaitingPilots = result['Value']
    self.log.info( 'Total %d jobs in %d task queues with %d waiting pilots' % (totalWaitingJobs, len( tqIDList ), totalWaitingPilots ) )
    #if totalWaitingPilots >= totalWaitingJobs:
    #  self.log.info( 'No more pilots to be submitted in this cycle' )
    #  return S_OK()

    # Check if the site is allowed in the mask
    result = jobDB.getSiteMask()
    if not result['OK']:
      return S_ERROR( 'Can not get the site mask' )
    siteMaskList = result['Value']

    queues = self.queueDict.keys()
    random.shuffle( queues )
    totalSubmittedPilots = 0
    matchedQueues = 0
    for queue in queues:

      # Check if the queue failed previously
      failedCount = self.failedQueues.setdefault( queue, 0 ) % self.failedQueueCycleFactor
      if failedCount != 0:
        self.log.warn( "%s queue failed recently, skipping %d cycles" % ( queue, 10-failedCount ) )
        self.failedQueues[queue] += 1

      ce = self.queueDict[queue]['CE']
      ceName = self.queueDict[queue]['CEName']
      ceType = self.queueDict[queue]['CEType']
      queueName = self.queueDict[queue]['QueueName']
      siteName = self.queueDict[queue]['Site']
      platform = self.queueDict[queue]['Platform']
      siteMask = siteName in siteMaskList

      if not anySite and siteName not in jobSites:
        self.log.verbose( "Skipping queue %s at %s: no workload expected" % (queueName, siteName) )
      if not siteMask and siteName not in testSites:
        self.log.verbose( "Skipping queue %s at site %s not in the mask" % (queueName, siteName) )

      if 'CPUTime' in self.queueDict[queue]['ParametersDict'] :
        queueCPUTime = int( self.queueDict[queue]['ParametersDict']['CPUTime'] )
        self.log.warn( 'CPU time limit is not specified for queue %s, skipping...' % queue )
      if queueCPUTime > self.maxQueueLength:
        queueCPUTime = self.maxQueueLength

      # Prepare the queue description to look for eligible jobs
      ceDict = ce.getParameterDict()
      ceDict[ 'GridCE' ] = ceName
      #if not siteMask and 'Site' in ceDict:
      #  self.log.info( 'Site not in the mask %s' % siteName )
      #  self.log.info( 'Removing "Site" from matching Dict' )
      #  del ceDict[ 'Site' ]
      if not siteMask:
        ceDict['JobType'] = "Test"
      if self.vo:
        ceDict['Community'] = self.vo
      if self.voGroups:
        ceDict['OwnerGroup'] = self.voGroups

      # This is a hack to get rid of !
      ceDict['SubmitPool'] = self.defaultSubmitPools
      result = Resources.getCompatiblePlatforms( platform )
      if not result['OK']:
      ceDict['Platform'] = result['Value']

      # Get the number of eligible jobs for the target site/queue
      result = rpcMatcher.getMatchingTaskQueues( ceDict )
      if not result['OK']:
        self.log.error( 'Could not retrieve TaskQueues from TaskQueueDB', result['Message'] )
        return result
      taskQueueDict = result['Value']
      if not taskQueueDict:
        self.log.verbose( 'No matching TQs found for %s' % queue )

      matchedQueues += 1
      totalTQJobs = 0
      tqIDList = taskQueueDict.keys()
      for tq in taskQueueDict:
        totalTQJobs += taskQueueDict[tq]['Jobs']

      self.log.verbose( '%d job(s) from %d task queue(s) are eligible for %s queue' % (totalTQJobs, len( tqIDList ), queue) )

      # Get the number of already waiting pilots for these task queues
      totalWaitingPilots = 0
      if self.pilotWaitingFlag:
        lastUpdateTime = dateTime() - self.pilotWaitingTime * second
        result = pilotAgentsDB.countPilots( { 'TaskQueueID': tqIDList,
                                              'Status': WAITING_PILOT_STATUS },
                                              None, lastUpdateTime )
        if not result['OK']:
          self.log.error( 'Failed to get Number of Waiting pilots', result['Message'] )
          totalWaitingPilots = 0
          totalWaitingPilots = result['Value']
          self.log.verbose( 'Waiting Pilots for TaskQueue %s:' % tqIDList, totalWaitingPilots )
      if totalWaitingPilots >= totalTQJobs:
        self.log.verbose( "%d waiting pilots already for all the available jobs" % totalWaitingPilots )

      self.log.verbose( "%d waiting pilots for the total of %d eligible jobs for %s" % (totalWaitingPilots, totalTQJobs, queue) )

      # Get the working proxy
      cpuTime = queueCPUTime + 86400
      self.log.verbose( "Getting pilot proxy for %s/%s %d long" % ( self.pilotDN, self.pilotGroup, cpuTime ) )
      result = gProxyManager.getPilotProxyFromDIRACGroup( self.pilotDN, self.pilotGroup, cpuTime )
      if not result['OK']:
        return result
      self.proxy = result['Value']
      ce.setProxy( self.proxy, cpuTime - 60 )

      # Get the number of available slots on the target site/queue
      totalSlots = self.__getQueueSlots( queue )
      if totalSlots == 0:
        self.log.debug( '%s: No slots available' % queue )

      pilotsToSubmit = max( 0, min( totalSlots, totalTQJobs - totalWaitingPilots ) )
      self.log.info( '%s: Slots=%d, TQ jobs=%d, Pilots: waiting %d, to submit=%d' % \
                              ( queue, totalSlots, totalTQJobs, totalWaitingPilots, pilotsToSubmit ) )

      # Limit the number of pilots to submit to MAX_PILOTS_TO_SUBMIT
      pilotsToSubmit = min( self.maxPilotsToSubmit, pilotsToSubmit )

      while pilotsToSubmit > 0:
        self.log.info( 'Going to submit %d pilots to %s queue' % ( pilotsToSubmit, queue ) )

        bundleProxy = self.queueDict[queue].get( 'BundleProxy', False )
        jobExecDir = ''
        if ceType == 'CREAM':
          jobExecDir = '.'
        jobExecDir = self.queueDict[queue].get( 'JobExecDir', jobExecDir )
        httpProxy = self.queueDict[queue].get( 'HttpProxy', '' )

        result = self.__getExecutable( queue, pilotsToSubmit, bundleProxy, httpProxy, jobExecDir )
        if not result['OK']:
          return result

        executable, pilotSubmissionChunk = result['Value']
        result = ce.submitJob( executable, '', pilotSubmissionChunk )
        os.unlink( executable )
        if not result['OK']:
          self.log.error( 'Failed submission to queue %s:\n' % queue, result['Message'] )
          pilotsToSubmit = 0
          self.failedQueues[queue] += 1

        pilotsToSubmit = pilotsToSubmit - pilotSubmissionChunk
        # Add pilots to the PilotAgentsDB assign pilots to TaskQueue proportionally to the
        # task queue priorities
        pilotList = result['Value']
        self.queueSlots[queue]['AvailableSlots'] -= len( pilotList )
        totalSubmittedPilots += len( pilotList )
        self.log.info( 'Submitted %d pilots to %s@%s' % ( len( pilotList ), queueName, ceName ) )
        stampDict = {}
        if result.has_key( 'PilotStampDict' ):
          stampDict = result['PilotStampDict']
        tqPriorityList = []
        sumPriority = 0.
        for tq in taskQueueDict:
          sumPriority += taskQueueDict[tq]['Priority']
          tqPriorityList.append( ( tq, sumPriority ) )
        rndm = random.random()*sumPriority
        tqDict = {}
        for pilotID in pilotList:
          rndm = random.random() * sumPriority
          for tq, prio in tqPriorityList:
            if rndm < prio:
              tqID = tq
          if not tqDict.has_key( tqID ):
            tqDict[tqID] = []
          tqDict[tqID].append( pilotID )

        for tqID, pilotList in tqDict.items():
          result = pilotAgentsDB.addPilotTQReference( pilotList,
                                                      stampDict )
          if not result['OK']:
            self.log.error( 'Failed add pilots to the PilotAgentsDB: ', result['Message'] )
          for pilot in pilotList:
            result = pilotAgentsDB.setPilotStatus( pilot, 'Submitted', ceName,
                                                  'Successfully submitted by the SiteDirector',
                                                  siteName, queueName )
            if not result['OK']:
              self.log.error( 'Failed to set pilot status: ', result['Message'] )

    self.log.info( "%d pilots submitted in total in this cycle, %d matched queues" % ( totalSubmittedPilots, matchedQueues ) )
    return S_OK()
예제 #19
    def submitJobs(self):
        """ Go through defined computing elements and submit jobs if necessary

        # Check that there is some work at all
        setup = CSGlobals.getSetup()
        tqDict = {"Setup": setup, "CPUTime": 9999999, "SubmitPool": self.defaultSubmitPools}
        if self.vo:
            tqDict["Community"] = self.vo
        if self.voGroups:
            tqDict["OwnerGroup"] = self.voGroups

        result = Resources.getCompatiblePlatforms(self.platforms)
        if not result["OK"]:
            return result
        tqDict["Platform"] = result["Value"]
        tqDict["Site"] = self.sites

        self.log.verbose("Checking overall TQ availability with requirements")

        rpcMatcher = RPCClient("WorkloadManagement/Matcher")
        result = rpcMatcher.getMatchingTaskQueues(tqDict)
        if not result["OK"]:
            return result
        if not result["Value"]:
            self.log.verbose("No Waiting jobs suitable for the director")
            return S_OK()

        # Check if the site is allowed in the mask
        result = jobDB.getSiteMask()
        if not result["OK"]:
            return S_ERROR("Can not get the site mask")
        siteMaskList = result["Value"]

        queues = self.queueDict.keys()
        for queue in queues:
            ce = self.queueDict[queue]["CE"]
            ceName = self.queueDict[queue]["CEName"]
            ceType = self.queueDict[queue]["CEType"]
            queueName = self.queueDict[queue]["QueueName"]
            siteName = self.queueDict[queue]["Site"]
            siteMask = siteName in siteMaskList

            if "CPUTime" in self.queueDict[queue]["ParametersDict"]:
                queueCPUTime = int(self.queueDict[queue]["ParametersDict"]["CPUTime"])
                self.log.warn("CPU time limit is not specified for queue %s, skipping..." % queue)
            if queueCPUTime > self.maxQueueLength:
                queueCPUTime = self.maxQueueLength

            # Get the working proxy
            cpuTime = queueCPUTime + 86400

            self.log.verbose("Getting pilot proxy for %s/%s %d long" % (self.pilotDN, self.pilotGroup, cpuTime))
            result = gProxyManager.getPilotProxyFromDIRACGroup(self.pilotDN, self.pilotGroup, cpuTime)
            if not result["OK"]:
                return result
            self.proxy = result["Value"]
            ce.setProxy(self.proxy, cpuTime - 60)

            # Get the number of available slots on the target site/queue
            result = ce.available()
            if not result["OK"]:
                self.log.warn("Failed to check the availability of queue %s: \n%s" % (queue, result["Message"]))
            ceInfoDict = result["CEInfoDict"]
                "CE queue report(%s_%s): Wait=%d, Run=%d, Submitted=%d, Max=%d"
                % (

            totalSlots = result["Value"]

            ceDict = ce.getParameterDict()
            ceDict["GridCE"] = ceName
            if not siteMask and "Site" in ceDict:
                self.log.info("Site not in the mask %s" % siteName)
                self.log.info('Removing "Site" from matching Dict')
                del ceDict["Site"]
            if self.vo:
                ceDict["Community"] = self.vo
            if self.voGroups:
                ceDict["OwnerGroup"] = self.voGroups

            # This is a hack to get rid of !
            ceDict["SubmitPool"] = self.defaultSubmitPools

            result = Resources.getCompatiblePlatforms(self.platforms)
            if not result["OK"]:
            ceDict["Platform"] = result["Value"]

            # Get the number of eligible jobs for the target site/queue
            result = rpcMatcher.getMatchingTaskQueues(ceDict)
            if not result["OK"]:
                self.log.error("Could not retrieve TaskQueues from TaskQueueDB", result["Message"])
                return result
            taskQueueDict = result["Value"]
            if not taskQueueDict:
                self.log.info("No matching TQs found")

            totalTQJobs = 0
            tqIDList = taskQueueDict.keys()
            for tq in taskQueueDict:
                totalTQJobs += taskQueueDict[tq]["Jobs"]

            pilotsToSubmit = min(totalSlots, totalTQJobs)

            # Get the number of already waiting pilots for this queue
            totalWaitingPilots = 0
            if self.pilotWaitingFlag:
                lastUpdateTime = dateTime() - self.pilotWaitingTime * second
                result = pilotAgentsDB.countPilots(
                    {"TaskQueueID": tqIDList, "Status": WAITING_PILOT_STATUS}, None, lastUpdateTime
                if not result["OK"]:
                    self.log.error("Failed to get Number of Waiting pilots", result["Message"])
                    totalWaitingPilots = 0
                    totalWaitingPilots = result["Value"]
                    self.log.verbose("Waiting Pilots for TaskQueue %s:" % tqIDList, totalWaitingPilots)

            pilotsToSubmit = max(0, min(totalSlots, totalTQJobs - totalWaitingPilots))
                "Available slots=%d, TQ jobs=%d, Waiting Pilots=%d, Pilots to submit=%d"
                % (totalSlots, totalTQJobs, totalWaitingPilots, pilotsToSubmit)

            # Limit the number of pilots to submit to MAX_PILOTS_TO_SUBMIT
            pilotsToSubmit = min(self.maxPilotsToSubmit, pilotsToSubmit)

            while pilotsToSubmit > 0:
                self.log.info("Going to submit %d pilots to %s queue" % (pilotsToSubmit, queue))

                bundleProxy = self.queueDict[queue].get("BundleProxy", False)
                jobExecDir = ""
                if ceType == "CREAM":
                    jobExecDir = "."
                jobExecDir = self.queueDict[queue].get("JobExecDir", jobExecDir)
                httpProxy = self.queueDict[queue].get("HttpProxy", "")

                result = self.__getExecutable(queue, pilotsToSubmit, bundleProxy, httpProxy, jobExecDir)
                if not result["OK"]:
                    return result

                executable, pilotSubmissionChunk = result["Value"]
                result = ce.submitJob(executable, "", pilotSubmissionChunk)
                if not result["OK"]:
                    self.log.error("Failed submission to queue %s:\n" % queue, result["Message"])
                    pilotsToSubmit = 0

                pilotsToSubmit = pilotsToSubmit - pilotSubmissionChunk
                # Add pilots to the PilotAgentsDB assign pilots to TaskQueue proportionally to the
                # task queue priorities
                pilotList = result["Value"]
                self.log.info("Submitted %d pilots to %s@%s" % (len(pilotList), queueName, ceName))
                stampDict = {}
                if result.has_key("PilotStampDict"):
                    stampDict = result["PilotStampDict"]
                tqPriorityList = []
                sumPriority = 0.0
                for tq in taskQueueDict:
                    sumPriority += taskQueueDict[tq]["Priority"]
                    tqPriorityList.append((tq, sumPriority))
                rndm = random.random() * sumPriority
                tqDict = {}
                for pilotID in pilotList:
                    rndm = random.random() * sumPriority
                    for tq, prio in tqPriorityList:
                        if rndm < prio:
                            tqID = tq
                    if not tqDict.has_key(tqID):
                        tqDict[tqID] = []

                for tqID, pilotList in tqDict.items():
                    result = pilotAgentsDB.addPilotTQReference(
                        pilotList, tqID, self.pilotDN, self.pilotGroup, self.localhost, ceType, "", stampDict
                    if not result["OK"]:
                        self.log.error("Failed add pilots to the PilotAgentsDB: ", result["Message"])
                    for pilot in pilotList:
                        result = pilotAgentsDB.setPilotStatus(
                            "Successfully submitted by the SiteDirector",
                        if not result["OK"]:
                            self.log.error("Failed to set pilot status: ", result["Message"])

        return S_OK()
예제 #20
    def __getPilotOptions(self, queue, pilotsToSubmit):
        """ Prepare pilot options

        queueDict = self.queueDict[queue]["ParametersDict"]
        pilotOptions = []

        setup = gConfig.getValue("/DIRAC/Setup", "unknown")
        if setup == "unknown":
            self.log.error("Setup is not defined in the configuration")
            return [None, None]
        pilotOptions.append("-S %s" % setup)
        opsHelper = Operations.Operations(group=self.pilotGroup, setup=setup)

        # Installation defined?
        installationName = opsHelper.getValue("Pilot/Installation", "")
        if installationName:
            pilotOptions.append("-V %s" % installationName)

        # Project defined?
        projectName = opsHelper.getValue("Pilot/Project", "")
        if projectName:
            pilotOptions.append("-l %s" % projectName)
            self.log.info("DIRAC project will be installed by pilots")

        # Request a release
        diracVersion = opsHelper.getValue("Pilot/Version", [])
        if not diracVersion:
            self.log.error("Pilot/Version is not defined in the configuration")
            return [None, None]
        # diracVersion is a list of accepted releases. Just take the first one
        pilotOptions.append("-r %s" % diracVersion[0])

        ownerDN = self.pilotDN
        ownerGroup = self.pilotGroup
        # Request token for maximum pilot efficiency
        result = gProxyManager.requestToken(ownerDN, ownerGroup, pilotsToSubmit * self.maxJobsInFillMode)
        if not result["OK"]:
            self.log.error("Invalid proxy token request", result["Message"])
            return [None, None]
        (token, numberOfUses) = result["Value"]
        pilotOptions.append("-o /Security/ProxyToken=%s" % token)
        # Use Filling mode
        pilotOptions.append("-M %s" % min(numberOfUses, self.maxJobsInFillMode))

        # Since each pilot will execute min( numberOfUses, self.maxJobsInFillMode )
        # with numberOfUses tokens we can submit at most:
        #    numberOfUses / min( numberOfUses, self.maxJobsInFillMode )
        # pilots
        newPilotsToSubmit = numberOfUses / min(numberOfUses, self.maxJobsInFillMode)
        if newPilotsToSubmit != pilotsToSubmit:
                "Number of pilots to submit is changed to %d after getting the proxy token" % newPilotsToSubmit
            pilotsToSubmit = newPilotsToSubmit
        # Debug
        if self.pilotLogLevel.lower() == "debug":
        # CS Servers
        csServers = gConfig.getValue("/DIRAC/Configuration/Servers", [])
        pilotOptions.append("-C %s" % ",".join(csServers))

        # DIRAC Extensions to be used in pilots
        pilotExtensionsList = opsHelper.getValue("Pilot/Extensions", [])
        extensionsList = []
        if pilotExtensionsList:
            if pilotExtensionsList[0] != "None":
                extensionsList = pilotExtensionsList
            extensionsList = CSGlobals.getCSExtensions()
        if extensionsList:
            pilotOptions.append("-e %s" % ",".join(extensionsList))

        # Requested CPU time
        pilotOptions.append("-T %s" % queueDict["CPUTime"])
        # CEName
        pilotOptions.append("-N %s" % self.queueDict[queue]["CEName"])
        # SiteName
        pilotOptions.append("-n %s" % queueDict["Site"])
        if "ClientPlatform" in queueDict:
            pilotOptions.append("-p '%s'" % queueDict["ClientPlatform"])

        if "SharedArea" in queueDict:
            pilotOptions.append("-o '/LocalSite/SharedArea=%s'" % queueDict["SharedArea"])

        if "SI00" in queueDict:
            factor = float(queueDict["SI00"]) / 250.0
            pilotOptions.append("-o '/LocalSite/CPUScalingFactor=%s'" % factor)
            pilotOptions.append("-o '/LocalSite/CPUNormalizationFactor=%s'" % factor)
            if "CPUScalingFactor" in queueDict:
                pilotOptions.append("-o '/LocalSite/CPUScalingFactor=%s'" % queueDict["CPUScalingFactor"])
            if "CPUNormalizationFactor" in queueDict:
                pilotOptions.append("-o '/LocalSite/CPUNormalizationFactor=%s'" % queueDict["CPUNormalizationFactor"])

        # Hack
        if self.defaultSubmitPools:
            pilotOptions.append("-o /Resources/Computing/CEDefaults/SubmitPool=%s" % self.defaultSubmitPools)

        if self.group:
            pilotOptions.append("-G %s" % self.group)

        self.log.verbose("pilotOptions: ", " ".join(pilotOptions))

        return [pilotOptions, pilotsToSubmit]
예제 #21
 def __getVOPath( self ):
   if CSGlobals.getVO():
     return "/Operations"
   return "/Operations/%s" % self.__threadData.vo
예제 #22
 def __init__( self, name = 'Monitoring/MonitoringDB', readOnly = False ):
   super( MonitoringDB, self ).__init__( 'MonitoringDB', name, CSGlobals.getSetup().lower() )
   self.__readonly = readOnly
   self.__documents = {}
예제 #23
    def submitJobs(self):
        """ Go through defined computing elements and submit jobs if necessary

        queues = self.queueDict.keys()

        # Check that there is some work at all
        setup = CSGlobals.getSetup()
        tqDict = {
            'Setup': setup,
            'CPUTime': 9999999,
            'SubmitPool': self.defaultSubmitPools
        if self.vo:
            tqDict['Community'] = self.vo
        if self.voGroups:
            tqDict['OwnerGroup'] = self.voGroups

        if self.checkPlatform:
            result = self.resourcesModule.getCompatiblePlatforms(
            if not result['OK']:
                return result
            tqDict['Platform'] = result['Value']
        tqDict['Site'] = self.sites
        tags = []
        for queue in queues:
            tags += self.queueDict[queue]['ParametersDict']['Tag']
        tqDict['Tag'] = list(set(tags))

        self.log.verbose('Checking overall TQ availability with requirements')

        matcherClient = MatcherClient()
        result = matcherClient.getMatchingTaskQueues(tqDict)
        if not result['OK']:
            return result
        if not result['Value']:
            self.log.verbose('No Waiting jobs suitable for the director')
            return S_OK()

        jobSites = set()
        anySite = False
        testSites = set()
        totalWaitingJobs = 0
        for tqID in result['Value']:
            if "Sites" in result['Value'][tqID]:
                for site in result['Value'][tqID]['Sites']:
                    if site.lower() != 'any':
                        anySite = True
                anySite = True
            if "JobTypes" in result['Value'][tqID]:
                if "Sites" in result['Value'][tqID]:
                    for site in result['Value'][tqID]['Sites']:
                        if site.lower() != 'any':
            totalWaitingJobs += result['Value'][tqID]['Jobs']

        tqIDList = result['Value'].keys()
        result = pilotAgentsDB.countPilots(
                'TaskQueueID': tqIDList,
                'Status': WAITING_PILOT_STATUS
            }, None)
        tagWaitingPilots = 0
        if result['OK']:
            tagWaitingPilots = result['Value']
            'Total %d jobs in %d task queues with %d waiting pilots' %
            (totalWaitingJobs, len(tqIDList), tagWaitingPilots))
        self.log.info('Queues: ', self.queueDict.keys())
        # if tagWaitingPilots >= totalWaitingJobs:
        #  self.log.info( 'No more pilots to be submitted in this cycle' )
        #  return S_OK()

        result = self.siteClient.getUsableSites()
        if not result['OK']:
            return result
        siteMaskList = result['Value']

        queues = self.queueDict.keys()
        totalSubmittedPilots = 0
        matchedQueues = 0
        for queue in queues:

            # Check if the queue failed previously
            failedCount = self.failedQueues[queue] % self.failedQueueCycleFactor
            if failedCount != 0:
                self.log.warn("%s queue failed recently, skipping %d cycles" %
                              (queue, 10 - failedCount))
                self.failedQueues[queue] += 1

            ce = self.queueDict[queue]['CE']
            ceName = self.queueDict[queue]['CEName']
            ceType = self.queueDict[queue]['CEType']
            queueName = self.queueDict[queue]['QueueName']
            siteName = self.queueDict[queue]['Site']
            queueTags = self.queueDict[queue]['ParametersDict']['Tag']
            siteMask = siteName in siteMaskList
            processorTags = []

            # Check the status of the Site
            result = self.siteClient.getUsableSites(siteName)
            if not result['OK']:
                self.log.error("Can not get the status of site %s: %s" %
                               (siteName, result['Message']))
            if siteName not in result.get('Value', []):
                self.log.info("site %s is not active" % siteName)

            if self.rssFlag:
                # Check the status of the ComputingElement
                result = self.rssClient.getElementStatus(
                    ceName, "ComputingElement")
                if not result['OK']:
                        "Can not get the status of computing element",
                        " %s: %s" % (siteName, result['Message']))
                if result['Value']:
                    # get the value of the status
                    result = result['Value'][ceName]['all']

                if result not in ('Active', 'Degraded'):
                        "Skipping computing element %s at %s: resource not usable"
                        % (ceName, siteName))

            for tag in queueTags:
                if re.match(r'^[0-9]+Processors$', tag):
            if 'WholeNode' in queueTags:

            if not anySite and siteName not in jobSites:
                    "Skipping queue %s at %s: no workload expected" %
                    (queueName, siteName))
            if not siteMask and siteName not in testSites:
                    "Skipping queue %s at site %s not in the mask" %
                    (queueName, siteName))

            if 'CPUTime' in self.queueDict[queue]['ParametersDict']:
                queueCPUTime = int(
                    'CPU time limit is not specified for queue %s, skipping...'
                    % queue)
            if queueCPUTime > self.maxQueueLength:
                queueCPUTime = self.maxQueueLength

            # Prepare the queue description to look for eligible jobs
            ceDict = ce.getParameterDict()
            ceDict['GridCE'] = ceName
            # if not siteMask and 'Site' in ceDict:
            #  self.log.info( 'Site not in the mask %s' % siteName )
            #  self.log.info( 'Removing "Site" from matching Dict' )
            #  del ceDict[ 'Site' ]
            if not siteMask:
                ceDict['JobType'] = "Test"
            if self.vo:
                ceDict['Community'] = self.vo
            if self.voGroups:
                ceDict['OwnerGroup'] = self.voGroups

            # This is a hack to get rid of !
            ceDict['SubmitPool'] = self.defaultSubmitPools

            if self.checkPlatform:
                platform = self.queueDict[queue]['Platform']
                result = self.resourcesModule.getCompatiblePlatforms(platform)
                if not result['OK']:
                ceDict['Platform'] = result['Value']

            ceDict['Tag'] = queueTags
            # Get the number of eligible jobs for the target site/queue
            result = matcherClient.getMatchingTaskQueues(ceDict)
            if not result['OK']:
                    'Could not retrieve TaskQueues from TaskQueueDB',
                return result
            taskQueueDict = result['Value']
            if not taskQueueDict:
                self.log.verbose('No matching TQs found for %s' % queue)

            matchedQueues += 1
            totalTQJobs = 0
            totalTQJobsByProcessors = {}
            tqIDList = taskQueueDict.keys()
            tqIDListByProcessors = {}
            for tq in taskQueueDict:
                if 'Tags' not in taskQueueDict[tq]:
                    # skip non multiprocessor tqs
                for tag in taskQueueDict[tq]['Tags']:
                    if tag in processorTags:
                        tqIDListByProcessors.setdefault(tag, [])

                        totalTQJobsByProcessors.setdefault(tag, 0)
                        totalTQJobsByProcessors[tag] += taskQueueDict[tq][

                totalTQJobs += taskQueueDict[tq]['Jobs']

                '%d job(s) from %d task queue(s) are eligible for %s queue' %
                (totalTQJobs, len(tqIDList), queue))

            queueSubmittedPilots = 0
            for tag in tqIDListByProcessors:

                self.log.verbose("Try to submit pilots for Tag=%s (TQs=%s)" %
                                 (tag, tqIDListByProcessors[tag]))

                processors = 1

                m = re.match(r'^(?P<processors>[0-9]+)Processors$', tag)
                if m:
                    processors = int(m.group('processors'))
                if tag == 'WholeNode':
                    processors = -1

                tagTQJobs = totalTQJobsByProcessors[tag]
                tagTqIDList = tqIDListByProcessors[tag]

                # Get the number of already waiting pilots for these task queues
                tagWaitingPilots = 0
                if self.pilotWaitingFlag:
                    result = pilotAgentsDB.countPilots(
                            'TaskQueueID': tagTqIDList,
                            'Status': WAITING_PILOT_STATUS
                        }, None)
                    if not result['OK']:
                            'Failed to get Number of Waiting pilots',
                        tagWaitingPilots = 0
                        tagWaitingPilots = result['Value']
                            'Waiting Pilots for TaskQueue %s:' % tagTqIDList,
                if tagWaitingPilots >= tagTQJobs:
                        "%d waiting pilots already for all the available jobs"
                        % tagWaitingPilots)

                    "%d waiting pilots for the total of %d eligible jobs for %s"
                    % (tagWaitingPilots, tagTQJobs, queue))

                # Get the working proxy
                cpuTime = queueCPUTime + 86400
                self.log.verbose("Getting pilot proxy for %s/%s %d long" %
                                 (self.pilotDN, self.pilotGroup, cpuTime))
                result = gProxyManager.getPilotProxyFromDIRACGroup(
                    self.pilotDN, self.pilotGroup, cpuTime)
                if not result['OK']:
                    return result
                self.proxy = result['Value']
                ce.setProxy(self.proxy, cpuTime - 60)

                # Get the number of available slots on the target site/queue
                totalSlots = self.getQueueSlots(queue, False)
                if totalSlots == 0:
                    self.log.debug('%s: No slots available' % queue)

                # Note: comparing slots to job numbers is not accurate in multiprocessor case.
                #       This could lead to over submission.
                pilotsToSubmit = max(
                    0, min(totalSlots, tagTQJobs - tagWaitingPilots))
                    '%s: Slots=%d, TQ jobs=%d, Pilots: waiting %d, to submit=%d'
                    % (queue, totalSlots, tagTQJobs, tagWaitingPilots,

                # Limit the number of pilots to submit to MAX_PILOTS_TO_SUBMIT
                pilotsToSubmit = min(
                    self.maxPilotsToSubmit - queueSubmittedPilots,

                while pilotsToSubmit > 0:
                    self.log.info('Going to submit %d pilots to %s queue' %
                                  (pilotsToSubmit, queue))

                    bundleProxy = self.queueDict[queue].get(
                        'BundleProxy', False)
                    jobExecDir = ''
                    jobExecDir = self.queueDict[queue]['ParametersDict'].get(
                        'JobExecDir', jobExecDir)

                    executable, pilotSubmissionChunk = self.getExecutable(
                    result = ce.submitJob(executable,
                    # ## FIXME: The condor thing only transfers the file with some
                    # ## delay, so when we unlink here the script is gone
                    # ## FIXME 2: but at some time we need to clean up the pilot wrapper scripts...
                    if ceType != 'HTCondorCE':
                    if not result['OK']:
                            'Failed submission to queue %s:\n' % queue,
                        pilotsToSubmit = 0
                        self.failedQueues[queue] += 1

                    pilotsToSubmit = pilotsToSubmit - pilotSubmissionChunk
                    queueSubmittedPilots += pilotSubmissionChunk
                    # Add pilots to the PilotAgentsDB assign pilots to TaskQueue proportionally to the
                    # task queue priorities
                    pilotList = result['Value']
                    self.queueSlots[queue]['AvailableSlots'] -= len(pilotList)
                    totalSubmittedPilots += len(pilotList)
                    self.log.info('Submitted %d pilots to %s@%s' %
                                  (len(pilotList), queueName, ceName))
                    stampDict = {}
                    if 'PilotStampDict' in result:
                        stampDict = result['PilotStampDict']
                    tqPriorityList = []
                    sumPriority = 0.
                    for tq in tagTqIDList:
                        sumPriority += taskQueueDict[tq]['Priority']
                        tqPriorityList.append((tq, sumPriority))
                    rndm = random.random() * sumPriority
                    tqDict = {}
                    for pilotID in pilotList:
                        rndm = random.random() * sumPriority
                        for tq, prio in tqPriorityList:
                            if rndm < prio:
                                tqID = tq
                        if tqID not in tqDict:
                            tqDict[tqID] = []

                    for tqID, pilotList in tqDict.items():
                        result = pilotAgentsDB.addPilotTQReference(
                            pilotList, tqID, self.pilotDN, self.pilotGroup,
                            self.localhost, ceType, stampDict)
                        if not result['OK']:
                                'Failed add pilots to the PilotAgentsDB: ',
                        for pilot in pilotList:
                            result = pilotAgentsDB.setPilotStatus(
                                pilot, 'Submitted', ceName,
                                'Successfully submitted by the SiteDirector',
                                siteName, queueName)
                            if not result['OK']:
                                self.log.error('Failed to set pilot status: ',

            "%d pilots submitted in total in this cycle, %d matched queues" %
            (totalSubmittedPilots, matchedQueues))
        return S_OK()
예제 #24
  def submitJobs(self):
    """ Go through defined computing elements and submit jobs if necessary

    queues = self.queueDict.keys()

    # Check that there is some work at all
    setup = CSGlobals.getSetup()
    tqDict = {'Setup': setup,
              'CPUTime': 9999999,
              'SubmitPool': self.defaultSubmitPools}
    if self.vo:
      tqDict['Community'] = self.vo
    if self.voGroups:
      tqDict['OwnerGroup'] = self.voGroups

    if self.checkPlatform:
      result = self.resourcesModule.getCompatiblePlatforms(self.platforms)
      if not result['OK']:
        return result
      tqDict['Platform'] = result['Value']
    tqDict['Site'] = self.sites
    tags = []
    for queue in queues:
      tags += self.queueDict[queue]['ParametersDict']['Tag']
    tqDict['Tag'] = list(set(tags))

    self.log.verbose('Checking overall TQ availability with requirements')

    matcherClient = MatcherClient()
    result = matcherClient.getMatchingTaskQueues(tqDict)
    if not result['OK']:
      return result
    if not result['Value']:
      self.log.verbose('No Waiting jobs suitable for the director')
      return S_OK()

    jobSites = set()
    anySite = False
    testSites = set()
    totalWaitingJobs = 0
    for tqID in result['Value']:
      if "Sites" in result['Value'][tqID]:
        for site in result['Value'][tqID]['Sites']:
          if site.lower() != 'any':
            anySite = True
        anySite = True
      if "JobTypes" in result['Value'][tqID]:
        if "Sites" in result['Value'][tqID]:
          for site in result['Value'][tqID]['Sites']:
            if site.lower() != 'any':
      totalWaitingJobs += result['Value'][tqID]['Jobs']

    tqIDList = result['Value'].keys()
    result = pilotAgentsDB.countPilots({'TaskQueueID': tqIDList,
                                        'Status': WAITING_PILOT_STATUS},
    tagWaitingPilots = 0
    if result['OK']:
      tagWaitingPilots = result['Value']
    self.log.info('Total %d jobs in %d task queues with %d waiting pilots' %
                  (totalWaitingJobs, len(tqIDList), tagWaitingPilots))
    self.log.info('Queues: ', self.queueDict.keys())
    # if tagWaitingPilots >= totalWaitingJobs:
    #  self.log.info( 'No more pilots to be submitted in this cycle' )
    #  return S_OK()

    result = self.siteClient.getUsableSites()
    if not result['OK']:
      return result
    siteMaskList = result['Value']

    queues = self.queueDict.keys()
    totalSubmittedPilots = 0
    matchedQueues = 0
    for queue in queues:

      # Check if the queue failed previously
      failedCount = self.failedQueues[queue] % self.failedQueueCycleFactor
      if failedCount != 0:
        self.log.warn("%s queue failed recently, skipping %d cycles" % (queue, 10 - failedCount))
        self.failedQueues[queue] += 1

      ce = self.queueDict[queue]['CE']
      ceName = self.queueDict[queue]['CEName']
      ceType = self.queueDict[queue]['CEType']
      queueName = self.queueDict[queue]['QueueName']
      siteName = self.queueDict[queue]['Site']
      queueTags = self.queueDict[queue]['ParametersDict']['Tag']
      siteMask = siteName in siteMaskList
      processorTags = []

      # Check the status of the Site
      result = self.siteClient.getUsableSites(siteName)
      if not result['OK']:
        self.log.error("Can not get the status of site %s: %s" %
                       (siteName, result['Message']))
      if siteName not in result.get('Value', []):
        self.log.info("site %s is not active" % siteName)

      if self.rssFlag:
        # Check the status of the ComputingElement
        result = self.rssClient.getElementStatus(ceName, "ComputingElement")
        if not result['OK']:
          self.log.error("Can not get the status of computing element",
                         " %s: %s" % (siteName, result['Message']))
        if result['Value']:
          # get the value of the status
          result = result['Value'][ceName]['all']

        if result not in ('Active', 'Degraded'):
              "Skipping computing element %s at %s: resource not usable" % (ceName, siteName))

      for tag in queueTags:
        if re.match(r'^[0-9]+Processors$', tag):
      if 'WholeNode' in queueTags:

      if not anySite and siteName not in jobSites:
        self.log.verbose("Skipping queue %s at %s: no workload expected" % (queueName, siteName))
      if not siteMask and siteName not in testSites:
        self.log.verbose("Skipping queue %s at site %s not in the mask" % (queueName, siteName))

      if 'CPUTime' in self.queueDict[queue]['ParametersDict']:
        queueCPUTime = int(self.queueDict[queue]['ParametersDict']['CPUTime'])
        self.log.warn('CPU time limit is not specified for queue %s, skipping...' % queue)
      if queueCPUTime > self.maxQueueLength:
        queueCPUTime = self.maxQueueLength

      # Prepare the queue description to look for eligible jobs
      ceDict = ce.getParameterDict()
      ceDict['GridCE'] = ceName
      # if not siteMask and 'Site' in ceDict:
      #  self.log.info( 'Site not in the mask %s' % siteName )
      #  self.log.info( 'Removing "Site" from matching Dict' )
      #  del ceDict[ 'Site' ]
      if not siteMask:
        ceDict['JobType'] = "Test"
      if self.vo:
        ceDict['Community'] = self.vo
      if self.voGroups:
        ceDict['OwnerGroup'] = self.voGroups

      # This is a hack to get rid of !
      ceDict['SubmitPool'] = self.defaultSubmitPools

      if self.checkPlatform:
        platform = self.queueDict[queue]['Platform']
        result = self.resourcesModule.getCompatiblePlatforms(platform)
        if not result['OK']:
        ceDict['Platform'] = result['Value']

      ceDict['Tag'] = queueTags
      # Get the number of eligible jobs for the target site/queue
      result = matcherClient.getMatchingTaskQueues(ceDict)
      if not result['OK']:
        self.log.error('Could not retrieve TaskQueues from TaskQueueDB', result['Message'])
        return result
      taskQueueDict = result['Value']
      if not taskQueueDict:
        self.log.verbose('No matching TQs found for %s' % queue)

      matchedQueues += 1
      totalTQJobs = 0
      totalTQJobsByProcessors = {}
      tqIDList = taskQueueDict.keys()
      tqIDListByProcessors = {}
      for tq in taskQueueDict:
        if 'Tags' not in taskQueueDict[tq]:
          # skip non multiprocessor tqs
        for tag in taskQueueDict[tq]['Tags']:
          if tag in processorTags:
            tqIDListByProcessors.setdefault(tag, [])

            totalTQJobsByProcessors.setdefault(tag, 0)
            totalTQJobsByProcessors[tag] += taskQueueDict[tq]['Jobs']

        totalTQJobs += taskQueueDict[tq]['Jobs']

      self.log.verbose('%d job(s) from %d task queue(s) are eligible for %s queue' % (totalTQJobs,
                                                                                      len(tqIDList), queue))

      queueSubmittedPilots = 0
      for tag in tqIDListByProcessors:

        self.log.verbose("Try to submit pilots for Tag=%s (TQs=%s)" % (tag, tqIDListByProcessors[tag]))

        processors = 1

        m = re.match(r'^(?P<processors>[0-9]+)Processors$', tag)
        if m:
          processors = int(m.group('processors'))
        if tag == 'WholeNode':
          processors = -1

        tagTQJobs = totalTQJobsByProcessors[tag]
        tagTqIDList = tqIDListByProcessors[tag]

        # Get the number of already waiting pilots for these task queues
        tagWaitingPilots = 0
        if self.pilotWaitingFlag:
          result = pilotAgentsDB.countPilots({'TaskQueueID': tagTqIDList,
                                              'Status': WAITING_PILOT_STATUS},
          if not result['OK']:
            self.log.error('Failed to get Number of Waiting pilots', result['Message'])
            tagWaitingPilots = 0
            tagWaitingPilots = result['Value']
            self.log.verbose('Waiting Pilots for TaskQueue %s:' % tagTqIDList, tagWaitingPilots)
        if tagWaitingPilots >= tagTQJobs:
          self.log.verbose("%d waiting pilots already for all the available jobs" % tagWaitingPilots)

        self.log.verbose("%d waiting pilots for the total of %d eligible jobs for %s" % (tagWaitingPilots,
                                                                                         tagTQJobs, queue))

        # Get the working proxy
        cpuTime = queueCPUTime + 86400
        self.log.verbose("Getting pilot proxy for %s/%s %d long" % (self.pilotDN, self.pilotGroup, cpuTime))
        result = gProxyManager.getPilotProxyFromDIRACGroup(self.pilotDN, self.pilotGroup, cpuTime)
        if not result['OK']:
          return result
        self.proxy = result['Value']
        ce.setProxy(self.proxy, cpuTime - 60)

        # Get the number of available slots on the target site/queue
        totalSlots = self.getQueueSlots(queue, False)
        if totalSlots == 0:
          self.log.debug('%s: No slots available' % queue)

        # Note: comparing slots to job numbers is not accurate in multiprocessor case.
        #       This could lead to over submission.
        pilotsToSubmit = max(0, min(totalSlots, tagTQJobs - tagWaitingPilots))
        self.log.info('%s: Slots=%d, TQ jobs=%d, Pilots: waiting %d, to submit=%d' %
                      (queue, totalSlots, tagTQJobs, tagWaitingPilots, pilotsToSubmit))

        # Limit the number of pilots to submit to MAX_PILOTS_TO_SUBMIT
        pilotsToSubmit = min(self.maxPilotsToSubmit - queueSubmittedPilots, pilotsToSubmit)

        while pilotsToSubmit > 0:
          self.log.info('Going to submit %d pilots to %s queue' % (pilotsToSubmit, queue))

          bundleProxy = self.queueDict[queue].get('BundleProxy', False)
          jobExecDir = ''
          jobExecDir = self.queueDict[queue]['ParametersDict'].get('JobExecDir', jobExecDir)

          executable, pilotSubmissionChunk = self.getExecutable(queue, pilotsToSubmit,
          result = ce.submitJob(executable, '', pilotSubmissionChunk, processors=processors)
          # ## FIXME: The condor thing only transfers the file with some
          # ## delay, so when we unlink here the script is gone
          # ## FIXME 2: but at some time we need to clean up the pilot wrapper scripts...
          if ceType != 'HTCondorCE':
          if not result['OK']:
            self.log.error('Failed submission to queue %s:\n' % queue, result['Message'])
            pilotsToSubmit = 0
            self.failedQueues[queue] += 1

          pilotsToSubmit = pilotsToSubmit - pilotSubmissionChunk
          queueSubmittedPilots += pilotSubmissionChunk
          # Add pilots to the PilotAgentsDB assign pilots to TaskQueue proportionally to the
          # task queue priorities
          pilotList = result['Value']
          self.queueSlots[queue]['AvailableSlots'] -= len(pilotList)
          totalSubmittedPilots += len(pilotList)
          self.log.info('Submitted %d pilots to %s@%s' % (len(pilotList), queueName, ceName))
          stampDict = {}
          if 'PilotStampDict' in result:
            stampDict = result['PilotStampDict']
          tqPriorityList = []
          sumPriority = 0.
          for tq in tagTqIDList:
            sumPriority += taskQueueDict[tq]['Priority']
            tqPriorityList.append((tq, sumPriority))
          rndm = random.random() * sumPriority
          tqDict = {}
          for pilotID in pilotList:
            rndm = random.random() * sumPriority
            for tq, prio in tqPriorityList:
              if rndm < prio:
                tqID = tq
            if tqID not in tqDict:
              tqDict[tqID] = []

          for tqID, pilotList in tqDict.items():
            result = pilotAgentsDB.addPilotTQReference(pilotList,
            if not result['OK']:
              self.log.error('Failed add pilots to the PilotAgentsDB: ', result['Message'])
            for pilot in pilotList:
              result = pilotAgentsDB.setPilotStatus(pilot, 'Submitted', ceName,
                                                    'Successfully submitted by the SiteDirector',
                                                    siteName, queueName)
              if not result['OK']:
                self.log.error('Failed to set pilot status: ', result['Message'])

        "%d pilots submitted in total in this cycle, %d matched queues" %
        (totalSubmittedPilots, matchedQueues))
    return S_OK()
예제 #25
  def submitJobs( self ):
    """ Go through defined computing elements and submit jobs if necessary

    # Check that there is some work at all
    setup = CSGlobals.getSetup()
    tqDict = { 'Setup':setup,
               'CPUTime': 9999999,
               'SubmitPool' : self.defaultSubmitPools }
    if self.vo:
      tqDict['Community'] = self.vo
    if self.voGroups:
      tqDict['OwnerGroup'] = self.voGroups

    result = Resources.getCompatiblePlatforms( self.platforms )
    if not result['OK']:
      return result
    tqDict['Platform'] = result['Value']
    tqDict['Site'] = self.sites

    self.log.verbose( 'Checking overall TQ availability with requirements' )
    self.log.verbose( tqDict )

    rpcMatcher = RPCClient( "WorkloadManagement/Matcher" )
    result = rpcMatcher.getMatchingTaskQueues( tqDict )
    if not result[ 'OK' ]:
      return result
    if not result['Value']:
      self.log.verbose( 'No Waiting jobs suitable for the director' )
      return S_OK()

    queues = self.queueDict.keys()
    random.shuffle( queues )
    for queue in queues:
      ce = self.queueDict[queue]['CE']
      ceName = self.queueDict[queue]['CEName']
      ceType = self.queueDict[queue]['CEType']
      queueName = self.queueDict[queue]['QueueName']
      siteName = self.queueDict[queue]['Site']
      siteMask = self.siteStatus.isUsableSite( siteName, 'ComputingAccess' )
      platform = self.queueDict[queue]['Platform']

      if 'CPUTime' in self.queueDict[queue]['ParametersDict'] :
        queueCPUTime = int( self.queueDict[queue]['ParametersDict']['CPUTime'] )
        self.log.warn( 'CPU time limit is not specified for queue %s, skipping...' % queue )
      if queueCPUTime > self.maxQueueLength:
        queueCPUTime = self.maxQueueLength

      # Get the working proxy
      cpuTime = queueCPUTime + 86400

      self.log.verbose( "Getting pilot proxy for %s/%s %d long" % ( self.pilotDN, self.pilotGroup, cpuTime ) )
      result = gProxyManager.getPilotProxyFromDIRACGroup( self.pilotDN, self.pilotGroup, cpuTime )
      if not result['OK']:
        return result
      self.proxy = result['Value']
      ce.setProxy( self.proxy, cpuTime - 60 )

      # Get the number of available slots on the target site/queue
      result = ce.available()
      if not result['OK']:
        self.log.warn( 'Failed to check the availability of queue %s: \n%s' % ( queue, result['Message'] ) )
      ceInfoDict = result['CEInfoDict']
      self.log.info( "CE queue report(%s_%s): Wait=%d, Run=%d, Submitted=%d, Max=%d" % \
                     ( ceName, queueName, ceInfoDict['WaitingJobs'], ceInfoDict['RunningJobs'],
                       ceInfoDict['SubmittedJobs'], ceInfoDict['MaxTotalJobs'] ) )

      totalSlots = result['Value']

      ceDict = ce.getParameterDict()
      ceDict[ 'GridCE' ] = ceName
      if not siteMask and 'Site' in ceDict:
        self.log.info( 'Site not in the mask %s' % siteName )
        self.log.info( 'Removing "Site" from matching Dict' )
        del ceDict[ 'Site' ]
      if self.vo:
        ceDict['Community'] = self.vo
      if self.voGroups:
        ceDict['OwnerGroup'] = self.voGroups

      # This is a hack to get rid of !
      ceDict['SubmitPool'] = self.defaultSubmitPools

      result = Resources.getCompatiblePlatforms( platform )
      if not result['OK']:
      ceDict['Platform'] = result['Value']

      # Get the number of eligible jobs for the target site/queue
      result = rpcMatcher.getMatchingTaskQueues( ceDict )
      if not result['OK']:
        self.log.error( 'Could not retrieve TaskQueues from TaskQueueDB', result['Message'] )
        return result
      taskQueueDict = result['Value']
      if not taskQueueDict:
        self.log.info( 'No matching TQs found' )

      totalTQJobs = 0
      tqIDList = taskQueueDict.keys()
      for tq in taskQueueDict:
        totalTQJobs += taskQueueDict[tq]['Jobs']

      pilotsToSubmit = min( totalSlots, totalTQJobs )

      # Get the number of already waiting pilots for this queue
      totalWaitingPilots = 0
      if self.pilotWaitingFlag:
        lastUpdateTime = dateTime() - self.pilotWaitingTime * second
        result = pilotAgentsDB.countPilots( { 'TaskQueueID': tqIDList,
                                              'Status': WAITING_PILOT_STATUS },
                                            None, lastUpdateTime )
        if not result['OK']:
          self.log.error( 'Failed to get Number of Waiting pilots', result['Message'] )
          totalWaitingPilots = 0
          totalWaitingPilots = result['Value']
          self.log.verbose( 'Waiting Pilots for TaskQueue %s:' % tqIDList, totalWaitingPilots )

      pilotsToSubmit = max( 0, min( totalSlots, totalTQJobs - totalWaitingPilots ) )
      self.log.info( 'Available slots=%d, TQ jobs=%d, Waiting Pilots=%d, Pilots to submit=%d' % \
                              ( totalSlots, totalTQJobs, totalWaitingPilots, pilotsToSubmit ) )

      # Limit the number of pilots to submit to MAX_PILOTS_TO_SUBMIT
      pilotsToSubmit = min( self.maxPilotsToSubmit, pilotsToSubmit )

      while pilotsToSubmit > 0:
        self.log.info( 'Going to submit %d pilots to %s queue' % ( pilotsToSubmit, queue ) )

        bundleProxy = self.queueDict[queue].get( 'BundleProxy', False )
        jobExecDir = ''
        if ceType == 'CREAM':
          jobExecDir = '.'
        jobExecDir = self.queueDict[queue].get( 'JobExecDir', jobExecDir )
        httpProxy = self.queueDict[queue].get( 'HttpProxy', '' )

        result = self.__getExecutable( queue, pilotsToSubmit, bundleProxy, httpProxy, jobExecDir )
        if not result['OK']:
          return result

        executable, pilotSubmissionChunk = result['Value']
        result = ce.submitJob( executable, '', pilotSubmissionChunk )
        os.unlink( executable )
        if not result['OK']:
          self.log.error( 'Failed submission to queue %s:\n' % queue, result['Message'] )
          pilotsToSubmit = 0

        pilotsToSubmit = pilotsToSubmit - pilotSubmissionChunk
        # Add pilots to the PilotAgentsDB assign pilots to TaskQueue proportionally to the
        # task queue priorities
        pilotList = result['Value']
        self.log.info( 'Submitted %d pilots to %s@%s' % ( len( pilotList ), queueName, ceName ) )
        stampDict = {}
        if result.has_key( 'PilotStampDict' ):
          stampDict = result['PilotStampDict']
        tqPriorityList = []
        sumPriority = 0.
        for tq in taskQueueDict:
          sumPriority += taskQueueDict[tq]['Priority']
          tqPriorityList.append( ( tq, sumPriority ) )
        rndm = random.random()*sumPriority
        tqDict = {}
        for pilotID in pilotList:
          rndm = random.random()*sumPriority
          for tq, prio in tqPriorityList:
            if rndm < prio:
              tqID = tq
          if not tqDict.has_key( tqID ):
            tqDict[tqID] = []
          tqDict[tqID].append( pilotID )

        for tqID, pilotList in tqDict.items():
          result = pilotAgentsDB.addPilotTQReference( pilotList,
                                                     stampDict )
          if not result['OK']:
            self.log.error( 'Failed add pilots to the PilotAgentsDB: ', result['Message'] )
          for pilot in pilotList:
            result = pilotAgentsDB.setPilotStatus( pilot, 'Submitted', ceName,
                                                  'Successfully submitted by the SiteDirector',
                                                  siteName, queueName )
            if not result['OK']:
              self.log.error( 'Failed to set pilot status: ', result['Message'] )

    return S_OK()
예제 #26
 def __init__( self, name = 'Monitoring/MonitoringDB', readOnly = False ):
   super( MonitoringDB, self ).__init__( 'MonitoringDB', name, CSGlobals.getSetup().lower() )
   self.__readonly = readOnly
   self.__documents = {}
예제 #27
  def _getPilotOptions( self, queue, pilotsToSubmit ):
    """ Prepare pilot options

    queueDict = self.queueDict[queue]['ParametersDict']
    pilotOptions = []

    setup = gConfig.getValue( "/DIRAC/Setup", "unknown" )
    if setup == 'unknown':
      self.log.error( 'Setup is not defined in the configuration' )
      return [ None, None ]
    pilotOptions.append( '-S %s' % setup )
    opsHelper = Operations.Operations( group = self.pilotGroup, setup = setup )

    #Installation defined?
    installationName = opsHelper.getValue( "Pilot/Installation", "" )
    if installationName:
      pilotOptions.append( '-V %s' % installationName )

    #Project defined?
    projectName = opsHelper.getValue( "Pilot/Project", "" )
    if projectName:
      pilotOptions.append( '-l %s' % projectName )
      self.log.info( 'DIRAC project will be installed by pilots' )

    #Request a release
    diracVersion = opsHelper.getValue( "Pilot/Version", [] )
    if not diracVersion:
      self.log.error( 'Pilot/Version is not defined in the configuration' )
      return [ None, None ]
    # diracVersion is a list of accepted releases
    pilotOptions.append( '-r %s' % ','.join( str( it ) for it in diracVersion ) )

    ownerDN = self.pilotDN
    ownerGroup = self.pilotGroup
    # Request token for maximum pilot efficiency
    result = gProxyManager.requestToken( ownerDN, ownerGroup, pilotsToSubmit * self.maxJobsInFillMode )
    if not result[ 'OK' ]:
      self.log.error( 'Invalid proxy token request', result['Message'] )
      return [ None, None ]
    ( token, numberOfUses ) = result[ 'Value' ]
    pilotOptions.append( '-o /Security/ProxyToken=%s' % token )
    # Use Filling mode
    pilotOptions.append( '-M %s' % min( numberOfUses, self.maxJobsInFillMode ) )

    # Since each pilot will execute min( numberOfUses, self.maxJobsInFillMode )
    # with numberOfUses tokens we can submit at most:
    #    numberOfUses / min( numberOfUses, self.maxJobsInFillMode )
    # pilots
    newPilotsToSubmit = numberOfUses / min( numberOfUses, self.maxJobsInFillMode )
    if newPilotsToSubmit != pilotsToSubmit:
      self.log.info( 'Number of pilots to submit is changed to %d after getting the proxy token' % newPilotsToSubmit )
      pilotsToSubmit = newPilotsToSubmit
    # Debug
    if self.pilotLogLevel.lower() == 'debug':
      pilotOptions.append( '-d' )
    # CS Servers
    csServers = gConfig.getValue( "/DIRAC/Configuration/Servers", [] )
    pilotOptions.append( '-C %s' % ",".join( csServers ) )

    # DIRAC Extensions to be used in pilots
    pilotExtensionsList = opsHelper.getValue( "Pilot/Extensions", [] )
    extensionsList = []
    if pilotExtensionsList:
      if pilotExtensionsList[0] != 'None':
        extensionsList = pilotExtensionsList
      extensionsList = CSGlobals.getCSExtensions()
    if extensionsList:
      pilotOptions.append( '-e %s' % ",".join( extensionsList ) )

    # Requested CPU time
    pilotOptions.append( '-T %s' % queueDict['CPUTime'] )
    # CEName
    pilotOptions.append( '-N %s' % self.queueDict[queue]['CEName'] )
    # Queue
    pilotOptions.append( '-Q %s' % self.queueDict[queue]['QueueName'] )
    # SiteName
    pilotOptions.append( '-n %s' % queueDict['Site'] )
    if 'ClientPlatform' in queueDict:
      pilotOptions.append( "-p '%s'" % queueDict['ClientPlatform'] )

    if 'SharedArea' in queueDict:
      pilotOptions.append( "-o '/LocalSite/SharedArea=%s'" % queueDict['SharedArea'] )

#     if 'SI00' in queueDict:
#       factor = float( queueDict['SI00'] ) / 250.
#       pilotOptions.append( "-o '/LocalSite/CPUScalingFactor=%s'" % factor )
#       pilotOptions.append( "-o '/LocalSite/CPUNormalizationFactor=%s'" % factor )
#     else:
#       if 'CPUScalingFactor' in queueDict:
#         pilotOptions.append( "-o '/LocalSite/CPUScalingFactor=%s'" % queueDict['CPUScalingFactor'] )
#       if 'CPUNormalizationFactor' in queueDict:
#         pilotOptions.append( "-o '/LocalSite/CPUNormalizationFactor=%s'" % queueDict['CPUNormalizationFactor'] )

    if "ExtraPilotOptions" in queueDict:
      pilotOptions.append( queueDict['ExtraPilotOptions'] )

    # Hack
    if self.defaultSubmitPools:
      pilotOptions.append( '-o /Resources/Computing/CEDefaults/SubmitPool=%s' % self.defaultSubmitPools )

    if "Tag" in queueDict:
      tagString = ','.join( queueDict['Tag'] )
      pilotOptions.append( '-o /Resources/Computing/CEDefaults/Tag=%s' % tagString )

    if self.group:
      pilotOptions.append( '-G %s' % self.group )

    return [ pilotOptions, pilotsToSubmit ]
예제 #28
  def beginExecution( self ):

    self.gridEnv = self.am_getOption( "GridEnv", getGridEnv() )
    # The SiteDirector is for a particular user community
    self.vo = self.am_getOption( "VO", '' )
    if not self.vo:
      self.vo = self.am_getOption( "Community", '' )
    if not self.vo:
      self.vo = CSGlobals.getVO()
    # The SiteDirector is for a particular user group
    self.group = self.am_getOption( "Group", '' )
    # self.voGroups contain all the eligible user groups for pilots submutted by this SiteDirector
    self.voGroups = []

    # Choose the group for which pilots will be submitted. This is a hack until
    # we will be able to match pilots to VOs.
    if not self.group:
      if self.vo:
        result = Registry.getGroupsForVO( self.vo )
        if not result['OK']:
          return result
        for group in result['Value']:
          if 'NormalUser' in Registry.getPropertiesForGroup( group ):
            self.voGroups.append( group )
      self.voGroups = [ self.group ]

    result = findGenericPilotCredentials( vo = self.vo )
    if not result[ 'OK' ]:
      return result
    self.pilotDN, self.pilotGroup = result[ 'Value' ]
    self.pilotDN = self.am_getOption( "PilotDN", self.pilotDN )
    self.pilotGroup = self.am_getOption( "PilotGroup", self.pilotGroup )

    self.platforms = []
    self.sites = []
    self.defaultSubmitPools = ''
    if self.group:
      self.defaultSubmitPools = Registry.getGroupOption( self.group, 'SubmitPools', '' )
    elif self.vo:
      self.defaultSubmitPools = Registry.getVOOption( self.vo, 'SubmitPools', '' )

    self.pilot = self.am_getOption( 'PilotScript', DIRAC_PILOT )
    self.install = DIRAC_INSTALL
    self.extraModules = self.am_getOption( 'ExtraPilotModules', [] ) + DIRAC_MODULES
    self.workingDirectory = self.am_getOption( 'WorkDirectory' )
    self.maxQueueLength = self.am_getOption( 'MaxQueueLength', 86400 * 3 )
    self.pilotLogLevel = self.am_getOption( 'PilotLogLevel', 'INFO' )
    self.maxJobsInFillMode = self.am_getOption( 'MaxJobsInFillMode', self.maxJobsInFillMode )
    self.maxPilotsToSubmit = self.am_getOption( 'MaxPilotsToSubmit', self.maxPilotsToSubmit )
    self.pilotWaitingFlag = self.am_getOption( 'PilotWaitingFlag', True )
    self.pilotWaitingTime = self.am_getOption( 'MaxPilotWaitingTime', 3600 )
    self.failedQueueCycleFactor = self.am_getOption( 'FailedQueueCycleFactor', 10 )
    self.pilotStatusUpdateCycleFactor = self.am_getOption( 'PilotStatusUpdateCycleFactor', 10 ) 

    # Flags
    self.updateStatus = self.am_getOption( 'UpdatePilotStatus', True )
    self.getOutput = self.am_getOption( 'GetPilotOutput', True )
    self.sendAccounting = self.am_getOption( 'SendPilotAccounting', True )

    # Get the site description dictionary
    siteNames = None
    if not self.am_getOption( 'Site', 'Any' ).lower() == "any":
      siteNames = self.am_getOption( 'Site', [] )
      if not siteNames:
        siteNames = None
    ceTypes = None
    if not self.am_getOption( 'CETypes', 'Any' ).lower() == "any":
      ceTypes = self.am_getOption( 'CETypes', [] )
    ces = None
    if not self.am_getOption( 'CEs', 'Any' ).lower() == "any":
      ces = self.am_getOption( 'CEs', [] )
      if not ces:
        ces = None
    result = Resources.getQueues( community = self.vo,
                                  siteList = siteNames,
                                  ceList = ces,
                                  ceTypeList = ceTypes,
                                  mode = 'Direct' )
    if not result['OK']:
      return result
    resourceDict = result['Value']
    result = self.getQueues( resourceDict )
    if not result['OK']:
      return result

    #if not siteNames:
    #  siteName = gConfig.getValue( '/DIRAC/Site', 'Unknown' )
    #  if siteName == 'Unknown':
    #    return S_OK( 'No site specified for the SiteDirector' )
    #  else:
    #    siteNames = [siteName]
    #self.siteNames = siteNames

    if self.updateStatus:
      self.log.always( 'Pilot status update requested' )
    if self.getOutput:
      self.log.always( 'Pilot output retrieval requested' )
    if self.sendAccounting:
      self.log.always( 'Pilot accounting sending requested' )

    self.log.always( 'Sites:', siteNames )
    self.log.always( 'CETypes:', ceTypes )
    self.log.always( 'CEs:', ces )
    self.log.always( 'PilotDN:', self.pilotDN )
    self.log.always( 'PilotGroup:', self.pilotGroup )
    self.log.always( 'MaxPilotsToSubmit:', self.maxPilotsToSubmit )
    self.log.always( 'MaxJobsInFillMode:', self.maxJobsInFillMode )

    self.localhost = socket.getfqdn()
    self.proxy = ''

    if self.firstPass:
      if self.queueDict:
        self.log.always( "Agent will serve queues:" )
        for queue in self.queueDict:
          self.log.always( "Site: %s, CE: %s, Queue: %s" % ( self.queueDict[queue]['Site'],
                                                           queue ) )
    self.firstPass = False
    return S_OK()
예제 #29
    def beginExecution(self):

        self.gridEnv = self.am_getOption("GridEnv", getGridEnv())
        # The SiteDirector is for a particular user community
        self.vo = self.am_getOption("Community", "")
        if not self.vo:
            self.vo = CSGlobals.getVO()
        # The SiteDirector is for a particular user group
        self.group = self.am_getOption("Group", "")
        # self.voGroups contain all the eligible user groups for pilots submutted by this SiteDirector
        self.voGroups = []

        # Choose the group for which pilots will be submitted. This is a hack until
        # we will be able to match pilots to VOs.
        if not self.group:
            if self.vo:
                result = Registry.getGroupsForVO(self.vo)
                if not result["OK"]:
                    return result
                for group in result["Value"]:
                    if "NormalUser" in Registry.getPropertiesForGroup(group):
            self.voGroups = [self.group]

        result = findGenericPilotCredentials(vo=self.vo)
        if not result["OK"]:
            return result
        self.pilotDN, self.pilotGroup = result["Value"]
        self.pilotDN = self.am_getOption("PilotDN", self.pilotDN)
        self.pilotGroup = self.am_getOption("PilotGroup", self.pilotGroup)

        self.platforms = []
        self.sites = []
        self.defaultSubmitPools = ""
        if self.group:
            self.defaultSubmitPools = Registry.getGroupOption(self.group, "SubmitPools", "")
        elif self.vo:
            self.defaultSubmitPools = Registry.getVOOption(self.vo, "SubmitPools", "")

        self.pilot = self.am_getOption("PilotScript", DIRAC_PILOT)
        self.install = DIRAC_INSTALL
        self.workingDirectory = self.am_getOption("WorkDirectory")
        self.maxQueueLength = self.am_getOption("MaxQueueLength", 86400 * 3)
        self.pilotLogLevel = self.am_getOption("PilotLogLevel", "INFO")
        self.maxJobsInFillMode = self.am_getOption("MaxJobsInFillMode", self.maxJobsInFillMode)
        self.maxPilotsToSubmit = self.am_getOption("MaxPilotsToSubmit", self.maxPilotsToSubmit)
        self.pilotWaitingFlag = self.am_getOption("PilotWaitingFlag", True)
        self.pilotWaitingTime = self.am_getOption("MaxPilotWaitingTime", 7200)

        # Flags
        self.updateStatus = self.am_getOption("UpdatePilotStatus", True)
        self.getOutput = self.am_getOption("GetPilotOutput", True)
        self.sendAccounting = self.am_getOption("SendPilotAccounting", True)

        # Get the site description dictionary
        siteNames = None
        if not self.am_getOption("Site", "Any").lower() == "any":
            siteNames = self.am_getOption("Site", [])
        ceTypes = None
        if not self.am_getOption("CETypes", "Any").lower() == "any":
            ceTypes = self.am_getOption("CETypes", [])
        ces = None
        if not self.am_getOption("CEs", "Any").lower() == "any":
            ces = self.am_getOption("CEs", [])
        result = Resources.getQueues(
            community=self.vo, siteList=siteNames, ceList=ces, ceTypeList=ceTypes, mode="Direct"
        if not result["OK"]:
            return result
        resourceDict = result["Value"]
        result = self.getQueues(resourceDict)
        if not result["OK"]:
            return result

        # if not siteNames:
        #  siteName = gConfig.getValue( '/DIRAC/Site', 'Unknown' )
        #  if siteName == 'Unknown':
        #    return S_OK( 'No site specified for the SiteDirector' )
        #  else:
        #    siteNames = [siteName]
        # self.siteNames = siteNames

        if self.updateStatus:
            self.log.always("Pilot status update requested")
        if self.getOutput:
            self.log.always("Pilot output retrieval requested")
        if self.sendAccounting:
            self.log.always("Pilot accounting sending requested")

        self.log.always("Sites:", siteNames)
        self.log.always("CETypes:", ceTypes)
        self.log.always("CEs:", ces)
        self.log.always("PilotDN:", self.pilotDN)
        self.log.always("PilotGroup:", self.pilotGroup)
        self.log.always("MaxPilotsToSubmit:", self.maxPilotsToSubmit)
        self.log.always("MaxJobsInFillMode:", self.maxJobsInFillMode)

        self.localhost = socket.getfqdn()
        self.proxy = ""

        if self.queueDict:
            self.log.always("Agent will serve queues:")
            for queue in self.queueDict:
                    "Site: %s, CE: %s, Queue: %s"
                    % (self.queueDict[queue]["Site"], self.queueDict[queue]["CEName"], queue)

        return S_OK()
예제 #30
    def getTicketsList(self, name, startDate=None, endDate=None):
        """ Return tickets of entity in name
       @param name: should be the name of the site
       @param startDate: starting date (optional)
       @param endDate: end date (optional)
        self.statusCount = {}
        self.shortDescription = {}

        # create client instance using GGUS wsdl:
        gclient = Client("https://prod-ars.ggus.eu/arsys/WSDL/public/prod-ars/GGUS")
        authInfo = gclient.factory.create("AuthenticationInfo")
        authInfo.userName = "******"
        authInfo.password = "******"
        # prepare the query string:
        extension = CSGlobals.getCSExtensions()[0].lower()
        query = "'GHD_Affected Site'=\"" + name + '" AND \'GHD_Affected VO\'="%s"' % extension
        if startDate is not None:
            query = query + " AND 'GHD_Date Of Creation'>" + str(startDate)
        if endDate is not None:
            query = query + " AND 'GHD_Date Of Creation'<" + str(endDate)

        # create the URL to get tickets relative to the site:
        # Updated from https://gus.fzk.de to https://ggus.eu
        ggusURL = (
            "affectedsite=" + name + "&"
            "orderhow=descending" % (extension, extension)

        # the query must be into a try block. Empty queries, though formally correct, raise an exception
            self.ticketList = gclient.service.TicketGetList(query)
        except WebFault:
            self.statusCount["terminal"] = 0
            self.statusCount["open"] = 0

        return S_OK((self.statusCount, ggusURL, self.shortDescription))
예제 #31
파일: CSAPI.py 프로젝트: JanEbbing/DIRAC
  def addShifter( self, shifters = None ):
    Adds or modify one or more shifters. Also, adds the shifter section in case this is not present.
    Shifter identities are used in several places, mostly for running agents

    shifters should be in the form {'ShifterRole':{'User':'******', 'Group':'aDIRACGroup'}}

    :return: S_OK/S_ERROR

    def getOpsSection():
      Where is the shifters section?
      vo = CSGlobals.getVO()
      setup = CSGlobals.getSetup()

      if vo:
        res = gConfig.getSections( '/Operations/%s/%s/Shifter' % (vo, setup) )
        if res['OK']:
          return S_OK( '/Operations/%s/%s/Shifter' % ( vo, setup ) )

        res = gConfig.getSections( '/Operations/%s/Defaults/Shifter' % vo )
        if res['OK']:
          return S_OK( '/Operations/%s/Defaults/Shifter' % vo )

        res = gConfig.getSections( '/Operations/%s/Shifter' % setup )
        if res['OK']:
          return S_OK( '/Operations/%s/Shifter' % setup )

        res = gConfig.getSections( '/Operations/Defaults/Shifter' )
        if res['OK']:
          return S_OK( '/Operations/Defaults/Shifter' )

      return S_ERROR( "No shifter section" )

    if shifters is None: shifters = {}
    if not self.__initialized['OK']:
      return self.__initialized

    # get current shifters
    opsH = Operations( )
    currentShifterRoles = opsH.getSections( 'Shifter' )
    if not currentShifterRoles['OK']:
      # we assume the shifter section is not present
      currentShifterRoles = []
      currentShifterRoles = currentShifterRoles['Value']
    currentShiftersDict = {}
    for currentShifterRole in currentShifterRoles:
      currentShifter = opsH.getOptionsDict( 'Shifter/%s' % currentShifterRole )
      if not currentShifter['OK']:
        return currentShifter
      currentShifter = currentShifter['Value']
      currentShiftersDict[currentShifterRole] = currentShifter

    # Removing from shifters what does not need to be changed
    for sRole in shifters:
      if sRole in currentShiftersDict:
        if currentShiftersDict[sRole] == shifters[sRole]:
          shifters.pop( sRole )

    # get shifters section to modify
    section = getOpsSection()

    # Is this section present?
    if not section['OK']:
      if section['Message'] == "No shifter section":
        gLogger.warn( section['Message'] )
        gLogger.info( "Adding shifter section" )
        vo = CSGlobals.getVO()
        if vo:
          section = '/Operations/%s/Defaults/Shifter' % vo
          section = '/Operations/Defaults/Shifter'
        res = self.__csMod.createSection( section )
        if not res:
          gLogger.error( "Section %s not created" % section )
          return S_ERROR( "Section %s not created" % section )
        gLogger.error( section['Message'] )
        return section
      section = section['Value']

    #add or modify shifters
    for shifter in shifters:
      self.__csMod.removeSection( section + '/' + shifter )
      self.__csMod.createSection( section + '/' + shifter )
      self.__csMod.createSection( section + '/' + shifter + '/' + 'User' )
      self.__csMod.createSection( section + '/' + shifter + '/' + 'Group' )
      self.__csMod.setOptionValue( section + '/' + shifter + '/' + 'User', shifters[shifter]['User'] )
      self.__csMod.setOptionValue( section + '/' + shifter + '/' + 'Group', shifters[shifter]['Group'] )

    self.__csModified = True
    return S_OK( True )
예제 #32
 def __getVOPath(self):
     if CSGlobals.getVO():
         return "/Operations"
     return "/Operations/%s" % self.__threadData.vo
예제 #33
    def addShifter(self, shifters=None):
    Adds or modify one or more shifters. Also, adds the shifter section in case this is not present.
    Shifter identities are used in several places, mostly for running agents

    shifters should be in the form {'ShifterRole':{'User':'******', 'Group':'aDIRACGroup'}}

    :return: S_OK/S_ERROR
        def getOpsSection():
      Where is the shifters section?
            vo = CSGlobals.getVO()
            setup = CSGlobals.getSetup()

            if vo:
                res = gConfig.getSections('/Operations/%s/%s/Shifter' %
                                          (vo, setup))
                if res['OK']:
                    return S_OK('/Operations/%s/%s/Shifter' % (vo, setup))

                res = gConfig.getSections('/Operations/%s/Defaults/Shifter' %
                if res['OK']:
                    return S_OK('/Operations/%s/Defaults/Shifter' % vo)

                res = gConfig.getSections('/Operations/%s/Shifter' % setup)
                if res['OK']:
                    return S_OK('/Operations/%s/Shifter' % setup)

                res = gConfig.getSections('/Operations/Defaults/Shifter')
                if res['OK']:
                    return S_OK('/Operations/Defaults/Shifter')

            return S_ERROR("No shifter section")

        if shifters is None: shifters = {}
        if not self.__initialized['OK']:
            return self.__initialized

        # get current shifters
        opsH = Operations()
        currentShifterRoles = opsH.getSections('Shifter')
        if not currentShifterRoles['OK']:
            # we assume the shifter section is not present
            currentShifterRoles = []
            currentShifterRoles = currentShifterRoles['Value']
        currentShiftersDict = {}
        for currentShifterRole in currentShifterRoles:
            currentShifter = opsH.getOptionsDict('Shifter/%s' %
            if not currentShifter['OK']:
                return currentShifter
            currentShifter = currentShifter['Value']
            currentShiftersDict[currentShifterRole] = currentShifter

        # Removing from shifters what does not need to be changed
        for sRole in shifters:
            if sRole in currentShiftersDict:
                if currentShiftersDict[sRole] == shifters[sRole]:

        # get shifters section to modify
        section = getOpsSection()

        # Is this section present?
        if not section['OK']:
            if section['Message'] == "No shifter section":
                gLogger.info("Adding shifter section")
                vo = CSGlobals.getVO()
                if vo:
                    section = '/Operations/%s/Defaults/Shifter' % vo
                    section = '/Operations/Defaults/Shifter'
                res = self.__csMod.createSection(section)
                if not res:
                    gLogger.error("Section %s not created" % section)
                    return S_ERROR("Section %s not created" % section)
                return section
            section = section['Value']

        #add or modify shifters
        for shifter in shifters:
            self.__csMod.removeSection(section + '/' + shifter)
            self.__csMod.createSection(section + '/' + shifter)
            self.__csMod.createSection(section + '/' + shifter + '/' + 'User')
            self.__csMod.createSection(section + '/' + shifter + '/' + 'Group')
            self.__csMod.setOptionValue(section + '/' + shifter + '/' + 'User',
                section + '/' + shifter + '/' + 'Group',

        self.__csModified = True
        return S_OK(True)