Beispiel #1
0
    def initialize(self):

        self.siteJobLimits = self.getCSOption("SiteJobLimits", False)
        self.checkPilotVersion = self.getCSOption("CheckPilotVersion", True)
        self.setup = gConfig.getValue("/DIRAC/Setup", "")
        self.vo = getVO()
        self.pilotVersion = gConfig.getValue("/Operations/%s/%s/Versions/PilotVersion" % (self.vo, self.setup), "")
Beispiel #2
0
 def __init__(self, useCertificates = False):
   """ Constructor of the RequestClient class
   """
   self.localUrl = gConfig.getValue('/Systems/DataManagement/Development/Services/RequestDB/localURL')
   self.centralUrl = gConfig.getValue('/Systems/DataManagement/Development/Services/RequestDB/centralURL')
   voBoxUrls = gConfig.getValue('/Systems/DataManagement/Development/Services/RequestDB/voBoxURLs')
   self.voBoxUrls = randomize(voBoxUrls).remove(self.localUrl)
def initializeSecurityLoggingHandler( serviceInfo ):
  global gSecurityFileLog

  serviceCS = serviceInfo [ 'serviceSectionPath' ]
  dataPath = gConfig.getValue( "%s/DataLocation" % serviceCS, "data/securityLog" )
  dataPath = dataPath.strip()
  if "/" != dataPath[0]:
    dataPath = os.path.realpath( "%s/%s" % ( gConfig.getValue( '/LocalSite/InstancePath', rootPath ), dataPath ) )
  gLogger.info( "Data will be written into %s" % dataPath )
  try:
    os.makedirs( dataPath )
  except:
    pass
  try:
    testFile = "%s/seclog.jarl.test" % dataPath
    fd = file( testFile, "w" )
    fd.close()
    os.unlink( testFile )
  except IOError:
    gLogger.fatal( "Can't write to %s" % dataPath )
    return S_ERROR( "Data location is not writable" )
  #Define globals
  gSecurityFileLog = SecurityFileLog( dataPath )
  SecurityLogClient().setLogStore( gSecurityFileLog )
  return S_OK()
Beispiel #4
0
 def _updateSiteList( self, sitesData ):
   ceSection = "/Resources/Sites"
   for grid in self.gridsToMap:
     gridSection = "%s/%s" % ( ceSection, grid )
     result = gConfig.getSections( gridSection )
     if not result[ 'OK' ]:
       gLogger.error( "Cannot get a list of sites for grid", "%s :%s" % ( grid, result[ 'Message' ] ) )
       continue
     for site in result[ 'Value' ]:
       coords = gConfig.getValue( "%s/%s/Coordinates" % ( gridSection, site ), "" )
       try:
         coords = [ float( "%.4f" % float( c.strip() ) ) for c in coords.split( ":" ) if c.strip() ]
       except Exception, e:
         print e
         gLogger.warn( "Site %s has coordinates incorrectly defined: %s" % ( site, coords ) )
         continue
       if not coords or len( coords ) != 2:
         gLogger.warn( "Site %s has coordinates incorrectly defined: %s" % ( site, coords ) )
         continue
       name = gConfig.getValue( "%s/%s/Name" % ( gridSection, site ), "" )
       if not name:
         gLogger.warn( "Site %s no name defined" % site )
         continue
       tier = gConfig.getValue( "%s/%s/MoUTierLevel" % ( gridSection, site ), "" )
       if not tier or tier.lower() == "none":
         tier = 2
       siteData = { 'longlat' : coords,
                    'name' : name,
                    'tier' : tier }
       sitesData[ site ] = siteData
Beispiel #5
0
  def export_sendSMS( self, userName, body, fromAddress ):
    """ Send an SMS with supplied body to the specified DIRAC user using the Mail utility via an SMS switch.
    """
    gLogger.verbose( 'Received signal to send the following SMS to %s:\n%s' % ( userName, body ) )
    mobile = gConfig.getValue( '/Registry/Users/%s/Mobile' % userName, '' )
    if not mobile:
      return S_ERROR( 'No registered mobile number for %s' % userName )

    csSection = PathFinder.getServiceSection( 'Framework/Notification' )
    smsSwitch = gConfig.getValue( '%s/SMSSwitch' % csSection, '' )
    if not smsSwitch:
      return S_ERROR( 'No SMS switch is defined in CS path %s/SMSSwitch' % csSection )

    address = '%s@%s' % ( mobile, smsSwitch )
    subject = 'DIRAC SMS'
    m = Mail()
    m._subject = subject
    m._message = body
    m._mailAddress = address
    if not fromAddress == 'None':
      m._fromAddress = fromAddress
    result = m._send()
    if not result['OK']:
      gLogger.warn( 'Could not send SMS to %s with the following message:\n%s' % ( userName, result['Message'] ) )
    else:
      gLogger.info( 'SMS sent successfully to %s ' % ( userName ) )
      gLogger.debug( result['Value'] )

    return result
Beispiel #6
0
  def configureFromSection( self, mySection ):
    """
      reload from CS
    """
    self.log.debug( 'Configuring from %s' % mySection )
    self.errorMailAddress = gConfig.getValue( mySection + '/ErrorMailAddress' , self.errorMailAddress )
    self.alarmMailAddress = gConfig.getValue( mySection + '/AlarmMailAddress' , self.alarmMailAddress )
    self.mailFromAddress  = gConfig.getValue( mySection + '/MailFromAddress'  , self.mailFromAddress )

    # following will do something only when call from reload including SubmitPool as mySection
    requestedRunningPods = gConfig.getValue( mySection + '/RunningPods', self.runningPods.keys() )

    for runningPodName in requestedRunningPods:
      self.log.verbose( 'Trying to configure RunningPod:', runningPodName )
      if runningPodName in self.runningPods:
        continue
      runningPodDict = virtualMachineDB.getRunningPodDict( runningPodName )
      if not runningPodDict['OK']:
        self.log.error('Error in RunningPodDict: %s' % runningPodDict['Message'])
        return runningPodDict
      self.log.verbose( 'Trying to configure RunningPodDict:', runningPodDict )
      runningPodDict = runningPodDict[ 'Value' ]
      for option in ['Image', 'MaxInstances', 'CPUPerInstance', 'Priority', 'CloudEndpoints', 'Requirements', 'CampaignStartDate', 'CampaignEndDate']:
        if option not in runningPodDict.keys():
          self.log.error( 'Missing option in "%s" RunningPod definition:' % runningPodName, option )
          continue
        
      self.runningPods[runningPodName] = {}
      self.runningPods[runningPodName]['Image']            = runningPodDict['Image']
      self.runningPods[runningPodName]['Requirements'] = runningPodDict['Requirements']
      self.runningPods[runningPodName]['MaxInstances']     = int( runningPodDict['MaxInstances'] )
      self.runningPods[runningPodName]['CPUPerInstance']   = int( runningPodDict['CPUPerInstance'] )
      self.runningPods[runningPodName]['Priority']         = int( runningPodDict['Priority'] )
      self.runningPods[runningPodName]['CloudEndpoints']   = runningPodDict['CloudEndpoints']
      self.runningPods[runningPodName]['CampaignEndDate']   = runningPodDict['CampaignEndDate']
Beispiel #7
0
  def generateToken( self, requesterDN, requesterGroup, numUses = 1, lifeTime = 0, retries = 10 ):
    """
    Generate and return a token and the number of uses for the token
    """
    if not lifeTime:
      lifeTime = gConfig.getValue( "/DIRAC/VOPolicy/TokenLifeTime", self.__defaultTokenLifetime )
    maxUses = gConfig.getValue( "/DIRAC/VOPolicy/TokenMaxUses", self.__defaultTokenMaxUses )
    numUses = max( 1, min( numUses, maxUses ) )
    m = md5.md5()
    rndData = "%s.%s.%s.%s" % ( time.time(), random.random(), numUses, lifeTime )
    m.update( rndData )
    token = m.hexdigest()
    fieldsSQL = ", ".join( ( "Token", "RequesterDN", "RequesterGroup", "ExpirationTime", "UsesLeft" ) )
    valuesSQL = ", ".join( ( self._escapeString( token )['Value'],
                              self._escapeString( requesterDN )['Value'],
                              self._escapeString( requesterGroup )['Value'],
                            "TIMESTAMPADD( SECOND, %s, UTC_TIMESTAMP() )" % lifeTime,
                            str( numUses ) ) )

    insertSQL = "INSERT INTO `ProxyDB_Tokens` ( %s ) VALUES ( %s )" % ( fieldsSQL, valuesSQL )
    result = self._update( insertSQL )
    if result[ 'OK' ]:
      return S_OK( ( token, numUses ) )
    if result[ 'Message' ].find( "uplicate entry" ) > -1:
      if retries:
        return self.generateToken( numUses, lifeTime, retries - 1 )
      return S_ERROR( "Max retries reached for token generation. Aborting" )
    return result
Beispiel #8
0
  def _ex_initialize( cls, exeName, loadName ):
    cls.__properties = { 'fullName' : exeName,
                         'loadName' : loadName,
                         'section' : PathFinder.getExecutorSection( exeName ),
                         'loadSection' : PathFinder.getExecutorSection( loadName ),
                         'messagesProcessed' : 0,
                         'reconnects' : 0,
                         'setup' : gConfig.getValue( "/DIRAC/Setup", "Unknown" ) }
    cls.__basePath = gConfig.getValue( '/LocalSite/InstancePath', rootPath )
    cls.__defaults = {}
    cls.__defaults[ 'MonitoringEnabled' ] = True
    cls.__defaults[ 'Enabled' ] = True
    cls.__defaults[ 'ControlDirectory' ] = os.path.join( cls.__basePath,
                                                          'control',
                                                          *exeName.split( "/" ) )
    cls.__defaults[ 'WorkDirectory' ] = os.path.join( cls.__basePath,
                                                       'work',
                                                       *exeName.split( "/" ) )
    cls.__defaults[ 'ReconnectRetries' ] = 10
    cls.__defaults[ 'ReconnectSleep' ] = 5
    cls.__properties[ 'shifterProxy' ] = ''
    cls.__properties[ 'shifterProxyLocation' ] = os.path.join( cls.__defaults[ 'WorkDirectory' ],
                                                               '.shifterCred' )
    cls.__mindName = False
    cls.__mindExtraArgs = False
    cls.__freezeTime = 0
    cls.__fastTrackEnabled = True
    cls.log = gLogger.getSubLogger( exeName, child = False )

    try:
      result = cls.initialize()
    except Exception, excp:
      gLogger.exception( "Exception while initializing %s" % loadName )
      return S_ERROR( "Exception while initializing: %s" % str( excp ) )
  def __getUserDetails( self ):
    """ Get details on user account
    """
    credentials = self.getRemoteCredentials()
    if credentials: 
      diracUser = credentials.get( "username" )
      diracGroup = credentials.get( "group" )
    if not ( diracUser and diracGroup ):
      return S_ERROR( 'Failed to get DIRAC user name and/or group' )
    vo = getVOForGroup( diracGroup )
    
    diracHome = ''
    if vo:
      diracHome = '/%s/user/%s/%s' % ( vo, diracUser[0], diracUser )
    
    cfgPath = self.serviceInfoDict[ 'serviceSectionPath' ]
    gLogger.debug( "cfgPath: %s" % cfgPath )
    irodsUser = gConfig.getValue( "%s/UserCredentials/%s/iRodsUser" % ( cfgPath , diracUser ) , diracUser )  
    irodsHome = gConfig.getValue( "%s/UserCredentials/%s/iRodsHome" % ( cfgPath , diracUser ) , '' ) 
    irodsGroup = gConfig.getValue( "%s/UserCredentials/%s/iRodsGroup" % ( cfgPath , diracUser ) , '' ) 
    irodsPassword = gConfig.getValue( "%s/UserCredentials/%s/iRodsPassword" % ( cfgPath , diracUser ) , '' ) 

    resultDict = {}
    resultDict['DIRACUser'] = diracUser
    resultDict['DIRACGroup'] = diracGroup
    resultDict['DIRACHome'] = diracHome
    resultDict['iRodsUser'] = irodsUser
    resultDict['iRodsGroup'] = irodsGroup
    resultDict['iRodsHome'] = irodsHome
    resultDict['iRodsPassword'] = irodsPassword
    
    return S_OK( resultDict )
def initializeReportGeneratorHandler( serviceInfo ):
  global gAccountingDB
  gAccountingDB = AccountingDB( readOnly = True )
  #Get data location
  reportSection = PathFinder.getServiceSection( "Accounting/ReportGenerator" )
  dataPath = gConfig.getValue( "%s/DataLocation" % reportSection, "data/accountingGraphs" )
  dataPath = dataPath.strip()
  if "/" != dataPath[0]:
    dataPath = os.path.realpath( "%s/%s" % ( gConfig.getValue( '/LocalSite/InstancePath', rootPath ), dataPath ) )
  gLogger.info( "Data will be written into %s" % dataPath )
  try:
    os.makedirs( dataPath )
  except:
    pass
  try:
    testFile = "%s/acc.jarl.test" % dataPath
    fd = file( testFile, "w" )
    fd.close()
    os.unlink( testFile )
  except IOError:
    gLogger.fatal( "Can't write to %s" % dataPath )
    return S_ERROR( "Data location is not writable" )
  gDataCache.setGraphsLocation( dataPath )
  gMonitor.registerActivity( "plotsDrawn", "Drawn plot images", "Accounting reports", "plots", gMonitor.OP_SUM )
  gMonitor.registerActivity( "reportsRequested", "Generated reports", "Accounting reports", "reports", gMonitor.OP_SUM )
  return S_OK()
Beispiel #11
0
def initializeStorageElementProxyHandler(serviceInfo):
  global base_path, httpFlag, httpPort, httpPath
  cfgPath = serviceInfo['serviceSectionPath']

  base_path = gConfig.getValue( "%s/BasePath" % cfgPath, base_path )
  if not base_path:
    gLogger.error( 'Failed to get the base path' )
    return S_ERROR( 'Failed to get the base path' )
  
  gLogger.info('The base path obtained is %s. Checking its existence...' % base_path)
  if not os.path.exists(base_path):
    gLogger.info('%s did not exist. Creating....' % base_path)
    os.makedirs(base_path)

  httpFlag = gConfig.getValue( "%s/HttpAccess" % cfgPath, False )
  if httpFlag:
    httpPath = '%s/httpCache' % base_path
    httpPath = gConfig.getValue( "%s/HttpCache" % cfgPath, httpPath )
    if not os.path.exists( httpPath ):
      gLogger.info('Creating HTTP cache directory %s' % (httpPath) )
      os.makedirs( httpPath )
    httpPort = gConfig.getValue( "%s/HttpPort" % cfgPath, 9180 )
    gLogger.info('Creating HTTP server thread, port:%d, path:%s' % (httpPort,httpPath) )
    httpThread = HttpThread( httpPort,httpPath )

  return S_OK()
Beispiel #12
0
def addUserToEgroup(clip):
  """Add user to e-group"""
  login = gConfig.getValue("/Security/egroupAdmin","").strip('"')
  pwd = gConfig.getValue("/Security/egroupPass","").strip('"')
  url = 'https://foundservices.cern.ch/ws/egroups/v1/EgroupsWebService/EgroupsWebService.wsdl'
  if not ( login and pwd ):
    gLogger.warn("Missing configuration parameters: username or password for WSDL interactions")
    gLogger.warn("Add options: -o /Security/egroupAdmin=<cernusername> -o /Security/egroupPass=<password>")
    gLogger.error("User registration in e-group must be done manually")
    return
  try:
    client = Client(url=url, username=login, password=pwd)
    #gLogger.notice(client)
  except suds.transport.TransportError as exc:
    gLogger.error("Failed to get the WSDL client:%s" %exc)
    gLogger.error("User registration in e-group must be done manually")
    return
  except:
    gLogger.error("Something unexpected happened with the suds client, aborting")
    return
  
  if clip.external:
    sudsUser = client.factory.create("ns0:MemberType")
    sudsUser['Type'] = 'External'
    sudsUser['Email'] = clip.email
    userl = [sudsUser]
  else:
    user = getUserInfoFromPhonebook(client, clip)
    userl = [user]
  res = client.service.AddEgroupMembers('ilc-dirac',False, userl)
  if hasattr(res, 'warnings'):
    gLogger.notice(res.warnings)
Beispiel #13
0
    def getSiteProtocols(self, site, printOutput=False):
        """Allows to check the defined protocols for each site SE.
    """
        result = self.__checkSiteIsValid(site)
        if not result["OK"]:
            return result

        siteSection = "/Resources/Sites/%s/%s/SE" % (site.split(".")[0], site)
        siteSEs = gConfig.getValue(siteSection, [])
        if not siteSEs:
            return S_ERROR("No SEs found for site %s in section %s" % (site, siteSection))

        defaultProtocols = gConfig.getValue("/Resources/StorageElements/DefaultProtocols", [])
        self.log.verbose("Default list of protocols are" ", ".join(defaultProtocols))
        seInfo = {}
        siteSEs.sort()
        for se in siteSEs:
            sections = gConfig.getSections("/Resources/StorageElements/%s/" % (se))
            if not sections["OK"]:
                return sections
            for section in sections["Value"]:
                if gConfig.getValue("/Resources/StorageElements/%s/%s/ProtocolName" % (se, section), "") == "SRM2":
                    path = "/Resources/StorageElements/%s/%s/ProtocolsList" % (se, section)
                    seProtocols = gConfig.getValue(path, [])
                    if not seProtocols:
                        seProtocols = defaultProtocols
                    seInfo[se] = seProtocols

        if printOutput:
            print "\nSummary of protocols for StorageElements at site %s" % site
            print "\nStorageElement".ljust(30) + "ProtocolsList".ljust(30) + "\n"
            for se, protocols in seInfo.items():
                print se.ljust(30) + ", ".join(protocols).ljust(30)

        return S_OK(seInfo)
  def __getProtocols( self ):
    """ returns list of protocols to use at a given site

    :warn: priority is given to a protocols list defined in the CS

    :param self: self reference
    """
    sections = gConfig.getSections( '/Resources/StorageElements/%s/' % ( self.name ) )
    self.log.debug( "GFAL2_SRM2Storage.__getProtocols: Trying to get protocols for storage %s." % self.name )
    if not sections['OK']:
      return sections

    protocolsList = []
    for section in sections['Value']:
      path = '/Resources/StorageElements/%s/%s/ProtocolName' % ( self.name, section )
      if gConfig.getValue( path, '' ) == self.protocol:
        protPath = '/Resources/StorageElements/%s/%s/ProtocolsList' % ( self.name, section )
        siteProtocols = gConfig.getValue( protPath, [] )
        if siteProtocols:
          self.log.debug( 'GFAL2_SRM2Storage.__getProtocols: Found SE protocols list to override defaults:', ', '.join( siteProtocols, ) )
          protocolsList = siteProtocols

    if not protocolsList:
      self.log.debug( "GFAL2_SRM2Storage.__getProtocols: No protocols provided, using the default protocols." )
      protocolsList = gConfig.getValue( '/Resources/StorageElements/DefaultProtocols', [] )
      self.log.debug( 'GFAL2_SRM2Storage.__getProtocols: protocolList = %s' % protocolsList )

    # if there is even no default protocol
    if not protocolsList:
      return S_ERROR( "GFAL2_SRM2Storage.__getProtocols: No local protocols defined and no defaults found." )

    return S_OK( protocolsList )
def initializeStorageElementProxyHandler( serviceInfo ):
  """ handler initialisation """

  global BASE_PATH, HTTP_FLAG, HTTP_PORT, HTTP_PATH
  cfgPath = serviceInfo['serviceSectionPath']

  BASE_PATH = gConfig.getValue( "%s/BasePath" % cfgPath, BASE_PATH )
  if not BASE_PATH:
    gLogger.error( 'Failed to get the base path' )
    return S_ERROR( 'Failed to get the base path' )
  
  gLogger.info('The base path obtained is %s. Checking its existence...' % BASE_PATH)
  if not os.path.exists(BASE_PATH):
    gLogger.info('%s did not exist. Creating....' % BASE_PATH)
    os.makedirs(BASE_PATH)

  HTTP_FLAG = gConfig.getValue( "%s/HttpAccess" % cfgPath, False )
  if HTTP_FLAG:
    HTTP_PATH = '%s/httpCache' % BASE_PATH
    HTTP_PATH = gConfig.getValue( "%s/HttpCache" % cfgPath, HTTP_PATH )
    if not os.path.exists( HTTP_PATH ):
      gLogger.info('Creating HTTP cache directory %s' % (HTTP_PATH) )
      os.makedirs( HTTP_PATH )
    HTTP_PORT = gConfig.getValue( "%s/HttpPort" % cfgPath, 9180 )
    gLogger.info('Creating HTTP server thread, port:%d, path:%s' % ( HTTP_PORT, HTTP_PATH ) )
    httpThread = HttpThread( HTTP_PORT, HTTP_PATH )

  return S_OK()
Beispiel #16
0
    def __getProtocols(self):
        """ returns list of protocols to use at a given site

    :warn: priority is given to a protocols list defined in the CS

    :param self: self reference
    """
        sections = gConfig.getSections("/Resources/StorageElements/%s/" % (self.name))
        self.log.debug("GFAL2_SRM2Storage.__getProtocols: Trying to get protocols for storage %s." % self.name)
        if not sections["OK"]:
            return sections

        protocolsList = []
        for section in sections["Value"]:
            path = "/Resources/StorageElements/%s/%s/PluginName" % (self.name, section)
            if gConfig.getValue(path, "") == self.pluginName:
                protPath = "/Resources/StorageElements/%s/%s/ProtocolsList" % (self.name, section)
                siteProtocols = gConfig.getValue(protPath, [])
                if siteProtocols:
                    self.log.debug(
                        "GFAL2_SRM2Storage.__getProtocols: Found SE protocols list to override defaults:",
                        ", ".join(siteProtocols),
                    )
                    protocolsList = siteProtocols

        if not protocolsList:
            self.log.debug("GFAL2_SRM2Storage.__getProtocols: No protocols provided, using the default protocols.")
            protocolsList = gConfig.getValue("/Resources/StorageElements/DefaultProtocols", [])
            self.log.debug("GFAL2_SRM2Storage.__getProtocols: protocolList = %s" % protocolsList)

        # if there is even no default protocol
        if not protocolsList:
            return S_ERROR("GFAL2_SRM2Storage.__getProtocols: No local protocols defined and no defaults found.")

        return S_OK(protocolsList)
Beispiel #17
0
 def __getCEName( self ):
   """ Try to get the CE name
   """
   # FIXME: this should not be part of the standard configuration (flavours discriminations should stay out)
   if self.pp.flavour in ['LCG', 'gLite', 'OSG']:
     retCode, CEName = self.executeAndGetOutput( 'glite-brokerinfo getCE',
                                                  self.pp.installEnv )
     if not retCode:
       self.pp.ceName = CEName.split( ':' )[0]
       if len( CEName.split( '/' ) ) > 1:
         self.pp.queueName = CEName.split( '/' )[1]
     elif os.environ.has_key( 'OSG_JOB_CONTACT' ):
       # OSG_JOB_CONTACT String specifying the endpoint to use within the job submission
       #                 for reaching the site (e.g. manager.mycluster.edu/jobmanager-pbs )
       CE = os.environ['OSG_JOB_CONTACT']
       self.pp.ceName = CE.split( '/' )[0]
       if len( CE.split( '/' ) ) > 1:
         self.pp.queueName = CE.split( '/' )[1]
     # configureOpts.append( '-N "%s"' % cliParams.ceName )
     else:
       # is it already present?
       from DIRAC import gConfig
       ceName = gConfig.getValue( 'LocalSite/GridCE', '' )
       ceQueue = gConfig.getValue( 'LocalSite/CEQueue', '' )
       if ceName and ceQueue:
         self.pp.ceName = ceName
         self.pp.queueName = ceQueue
       else:
         self.log.error( "Can't find ceName nor queue... have to fail!" )
         sys.exit( 1 )
   elif self.pp.flavour == "CREAM":
     if os.environ.has_key( 'CE_ID' ):
       self.pp.ceName = os.environ['CE_ID'].split( ':' )[0]
       if os.environ['CE_ID'].count( "/" ):
         self.pp.queueName = os.environ['CE_ID'].split( '/' )[1]
Beispiel #18
0
 def __init__( self,
               server = False,
               serverCert = False,
               serverKey = False,
               voName = False,
               timeout = False ):
   if timeout:
     self._secCmdTimeout = timeout
   else:
     self._secCmdTimeout = 30
   if not server:
     self._secServer = gConfig.getValue( "/DIRAC/VOPolicy/MyProxyServer", "myproxy.cern.ch" )
   else:
     self._secServer = server
   if not voName:
     self._secVO = getVO( "unknown" )
   else:
     self._secVO = voName
   ckLoc = Locations.getHostCertificateAndKeyLocation()
   if serverCert:
     self._secCertLoc = serverCert
   else:
     if ckLoc:
       self._secCertLoc = ckLoc[0]
     else:
       self._secCertLoc = "%s/etc/grid-security/servercert.pem" % DIRAC.rootPath
   if serverKey:
     self._secKeyLoc = serverKey
   else:
     if ckLoc:
       self._secKeyLoc = ckLoc[1]
     else:
       self._secKeyLoc = "%s/etc/grid-security/serverkey.pem" % DIRAC.rootPath
   self._secRunningFromTrustedHost = gConfig.getValue( "/DIRAC/VOPolicy/MyProxyTrustedHost", "True" ).lower() in ( "y", "yes", "true" )
   self._secMaxProxyHours = gConfig.getValue( "/DIRAC/VOPolicy/MyProxyMaxDelegationTime", 168 )
  def __irodsClient( self , user = None ):

    global IRODS_USER
    password = None

    cfgPath = self.serviceInfoDict[ 'serviceSectionPath' ]
    gLogger.debug( "cfgPath: %s" % cfgPath )

    if not user:
      credentials = self.getRemoteCredentials()
      if credentials and ( "username" in credentials ):
        IRODS_USER = credentials[ "username" ]
        ## TODO: should get user password somehow
    elif user == "r":
      IRODS_USER = gConfig.getValue( "%s/read" % cfgPath , IRODS_USER )
    elif user == "w":
      IRODS_USER = gConfig.getValue( "%s/write" % cfgPath , IRODS_USER )

    if not IRODS_USER:
      return False , "Failed to get iRods user"
    gLogger.debug( "iRods user: %s" % IRODS_USER )

    password = gConfig.getValue( "%s/%s" % ( cfgPath , IRODS_USER ) , password )

    conn , errMsg = rcConnect( IRODS_HOST , IRODS_PORT , IRODS_USER , IRODS_ZONE )

    status = clientLoginWithPassword( conn , password )

    if not status == 0:
      return False , "Failed to authenticate user '%s'" % IRODS_USER

    return conn , errMsg
  def getMailDict(self , names=None):
  
    """
    Convert list of usernames to dict like { e-mail : full name }
    Argument is a list. Return value is a dict
    """

    resultDict = dict()
    if not names:
      return resultDict
    
    for user in names:
      email = gConfig.getValue("/Registry/Users/%s/Email" % user , "")
      gLogger.debug("/Registry/Users/%s/Email - '%s'" % (user , email))
      emil = email.strip()
      
      if not email:
        gLogger.error("Can't find value for option /Registry/Users/%s/Email" % user)
        continue

      fname = gConfig.getValue("/Registry/Users/%s/FullName" % user , "")
      gLogger.debug("/Registry/Users/%s/FullName - '%s'" % (user , fname))
      fname = fname.strip()

      if not fname:
        fname = user
        gLogger.debug("FullName is absent, name to be used: %s" % fname)

      resultDict[ email ] = fname

    return resultDict
Beispiel #21
0
  def export_getValues( self ):
    ( frame, filename, line_number,
    function_name, lines, index ) = inspect.getouterframes( inspect.currentframe() )[0]
    print( frame, filename, line_number, function_name, lines, index )
#===============================================================================
#     basePath = '/Resources/NewResources/'
#
#     for nb in gConfig.getValue(basePath + 'toUse' ):
#       good = gConfig.getValue( basePath + 'sub' + nb + '/good', False )
#
#     successful = [ gConfig.getValue( basePath + 'sub' + nb + '/val' ) for nb in toUse if ]
#===============================================================================
    toUse = gConfig.getValue( '/Resources/NewResources/toUse', [] )
    successful = []
    failed = []
    for num in toUse :
      path = '/Resources/NewResources/sub' + num + '/'

      good = gConfig.getValue( path + 'good', False )
      if good :
        successful.append( gConfig.getValue( path + 'val', 'unknown' ) )
      else :
        failed.append( gConfig.getValue( path + 'val', 'unknown' ) )

    return S_OK( { 'Successful': successful, 'Failed' : failed } )
  def _getConfigStorageOptions( self, storageName ):
    """ Get the options associated to the StorageElement as defined in the CS
    """
    storageConfigPath = cfgPath( self.rootConfigPath, storageName )
    res = gConfig.getOptions( storageConfigPath )
    if not res['OK']:
      errStr = "StorageFactory._getStorageOptions: Failed to get storage options."
      gLogger.error( errStr, "%s: %s" % ( storageName, res['Message'] ) )
      return S_ERROR( errStr )
    options = res['Value']
    optionsDict = {}
    for option in options:

      if option in [ 'ReadAccess', 'WriteAccess', 'CheckAccess', 'RemoveAccess']:
        continue
      optionConfigPath = cfgPath( storageConfigPath, option )
      if option in [ 'VO' ]:
        optionsDict[option] = gConfig.getValue( optionConfigPath, [] )
      else:
        optionsDict[option] = gConfig.getValue( optionConfigPath, '' )

    res = self.resourceStatus.getStorageElementStatus( storageName )
    if not res[ 'OK' ]:
      errStr = "StorageFactory._getStorageOptions: Failed to get storage status"
      gLogger.error( errStr, "%s: %s" % ( storageName, res['Message'] ) )
      return S_ERROR( errStr )

    # For safety, we did not add the ${statusType}Access keys
    # this requires modifications in the StorageElement class

    # We add the dictionary with the statusTypes and values
    # { 'statusType1' : 'status1', 'statusType2' : 'status2' ... }
    optionsDict.update( res[ 'Value' ][ storageName ] )

    return S_OK( optionsDict )
Beispiel #23
0
  def execute(self):
    """The PilotAgent execution method.
    """

    self.pilotStalledDays = self.am_getOption('PilotStalledDays', 3)
    self.gridEnv = self.am_getOption('GridEnv')
    if not self.gridEnv:
      # No specific option found, try a general one
      setup = gConfig.getValue('/DIRAC/Setup', '')
      if setup:
        instance = gConfig.getValue('/DIRAC/Setups/%s/WorkloadManagement' % setup, '')
        if instance:
          self.gridEnv = gConfig.getValue('/Systems/WorkloadManagement/%s/GridEnv' % instance, '')
    result = self.pilotDB._getConnection()
    if result['OK']:
      connection = result['Value']
    else:
      return result

    # Now handle pilots not updated in the last N days (most likely the Broker is no
    # longer available) and declare them Deleted.
    result = self.handleOldPilots(connection)

    connection.close()

    result = self.WMSAdministrator.clearPilots(self.clearPilotsDelay, self.clearAbortedDelay)
    if not result['OK']:
      self.log.warn('Failed to clear old pilots in the PilotAgentsDB')

    return S_OK()
Beispiel #24
0
  def execute( self ):
    """ Main execution method
    """

    monitoredSetups = gConfig.getValue('/Operations/lhcb/Lemon/MonitoredSetups', ['LHCb-Production'])
    self.monitoringEnabled = self.setup in monitoredSetups

    if not self.monitoringEnabled:
      self._log("Framework/LemonAgent", self.NON_CRITICAL, self.OK, "Monitoring not enabled for this setup: " + self.setup +". Exiting.");
      return S_OK()

    hostsInMaintenance = gConfig.getValue('/Operations/lhcb/Lemon/HostsInMaintenance',[]);
    if gethostname() in hostsInMaintenance:
      self._log("Framework/LemonAgent", self.NON_CRITICAL, self.OK, "I am in maintenance mode, exiting.");
      return S_OK()

    result = self.admClient.getOverallStatus()

    if not result or not result['OK']:
      self._log("Framework/LemonAgent", self.CRITICAL, self.FAILURE, "Can not obtain result!!");
      return S_OK()

    services = result[ 'Value' ][ 'Services' ]
    agents = result[ 'Value' ][ 'Agents' ]
    self._processResults(services);
    self._processResults(agents);

    return S_OK()
Beispiel #25
0
  def configure( self, csSection, submitPool ):
    """
     Here goes common configuration for all PilotDirectors
    """
    self.configureFromSection( csSection )
    self.reloadConfiguration( csSection, submitPool )

    setup = gConfig.getValue( '/DIRAC/Setup', '' )
    section = cfgPath( 'Operations', self.virtualOrganization, setup, 'Versions' )
    self.installVersion = gConfig.getValue( cfgPath( section, 'PilotVersion' ),
                                         self.installVersion )
    self.installInstallation = gConfig.getValue( cfgPath( section, 'PilotInstallation' ),
                                         self.installInstallation )

    self.log.info( '===============================================' )
    self.log.info( 'Configuration:' )
    self.log.info( '' )
    self.log.info( ' Install script: ', self.install )
    self.log.info( ' Pilot script:   ', self.pilot )
    self.log.info( ' Install Ver:    ', self.installVersion )
    if self.installInstallation:
      self.log.info( ' Installation:        ', self.installInstallation )
    if self.extraPilotOptions:
      self.log.info( ' Exta Options:   ', ' '.join( self.extraPilotOptions ) )
    self.log.info( ' ListMatch:      ', self.enableListMatch )
    self.log.info( ' Private %:      ', self.privatePilotFraction * 100 )
    if self.enableListMatch:
      self.log.info( ' ListMatch Delay:', self.listMatchDelay )
    self.listMatchCache.purgeExpired()
Beispiel #26
0
def initializePlottingHandler( serviceInfo ):

  #Get data location
  plottingSection = PathFinder.getServiceSection( "Framework/Plotting" )
  dataPath = gConfig.getValue( "%s/DataLocation" % plottingSection, "data/graphs" )
  dataPath = dataPath.strip()
  if "/" != dataPath[0]:
    dataPath = os.path.realpath( "%s/%s" % ( gConfig.getValue( '/LocalSite/InstancePath', rootPath ), dataPath ) )
  gLogger.info( "Data will be written into %s" % dataPath )
  try:
    os.makedirs( dataPath )
  except:
    pass
  try:
    testFile = "%s/plot__.test" % dataPath
    fd = file( testFile, "w" )
    fd.close()
    os.unlink( testFile )
  except IOError:
    gLogger.fatal( "Can't write to %s" % dataPath )
    return S_ERROR( "Data location is not writable" )

  gPlotCache.setPlotsLocation( dataPath )
  gMonitor.registerActivity( "plotsDrawn", "Drawn plot images", "Plotting requests", "plots", gMonitor.OP_SUM )
  return S_OK()
Beispiel #27
0
  def getSiteProtocols( self, site, printOutput = False ):
    """Allows to check the defined protocols for each site SE.
    """
    result = self.__checkSiteIsValid( site )
    if not result['OK']:
      return result

    siteSection = '/Resources/Sites/%s/%s/SE' % ( site.split( '.' )[0], site )
    siteSEs = gConfig.getValue( siteSection, [] )
    if not siteSEs:
      return S_ERROR( 'No SEs found for site %s in section %s' % ( site, siteSection ) )

    defaultProtocols = gConfig.getValue( '/Resources/StorageElements/DefaultProtocols', [] )
    self.log.verbose( 'Default list of protocols are' ', '.join( defaultProtocols ) )
    seInfo = {}
    siteSEs.sort()
    for se in siteSEs:
      sections = gConfig.getSections( '/Resources/StorageElements/%s/' % ( se ) )
      if not sections['OK']:
        return sections
      for section in sections['Value']:
        if gConfig.getValue( '/Resources/StorageElements/%s/%s/ProtocolName' % ( se, section ), '' ) == 'SRM2':
          path = '/Resources/StorageElements/%s/%s/ProtocolsList' % ( se, section )
          seProtocols = gConfig.getValue( path, [] )
          if not seProtocols:
            seProtocols = defaultProtocols
          seInfo[se] = seProtocols

    if printOutput:
      print '\nSummary of protocols for StorageElements at site %s' % site
      print '\nStorageElement'.ljust( 30 ) + 'ProtocolsList'.ljust( 30 ) + '\n'
      for se, protocols in seInfo.items():
        print se.ljust( 30 ) + ', '.join( protocols ).ljust( 30 )

    return S_OK( seInfo )
Beispiel #28
0
    def __init__(self):
        """Module initialization.
    """
        super(UploadLogFile, self).__init__()
        self.version = __RCSID__
        self.log = gLogger.getSubLogger("UploadLogFile")
        self.PRODUCTION_ID = None
        self.JOB_ID = None
        self.workflow_commons = None
        self.request = None
        self.logFilePath = ""
        self.logLFNPath = ""
        self.logdir = ""
        self.logSE = self.ops.getValue("/LogStorage/LogSE", "LogSE")
        self.root = gConfig.getValue("/LocalSite/Root", os.getcwd())
        self.logSizeLimit = self.ops.getValue("/LogFiles/SizeLimit", 20 * 1024 * 1024)
        self.logExtensions = []
        self.failoverSEs = gConfig.getValue("/Resources/StorageElementGroups/Tier1-Failover", [])
        self.diracLogo = self.ops.getValue(
            "/SAM/LogoURL", "https://lhcbweb.pic.es/DIRAC/images/logos/DIRAC-logo-transp.png"
        )
        self.rm = ReplicaManager()

        self.experiment = "CLIC"
        self.enable = True
        self.failoverTest = False  # flag to put log files to failover by default
        self.jobID = ""
Beispiel #29
0
  def _getAccessParams( self, element ):
    '''
      get the access host and port for the specified ce.
    '''

    _basePath = 'Resources/Sites'
    
    domains = gConfig.getSections( _basePath )
    if not domains[ 'OK' ]:
      return domains
    domains = domains[ 'Value' ]
    
    for domain in domains:
      sites = gConfig.getSections( '%s/%s' % ( _basePath, domain ) )
      if not sites[ 'OK' ]:
        return sites
      sites = sites[ 'Value' ]
      
      for site in sites:
        ces = gConfig.getValue( '%s/%s/%s/CE' % ( _basePath, domain, site ), '' ).split(',')
        ces = map(lambda str : str.strip(), ces)

        if element in ces:
          host = gConfig.getValue('%s/%s/%s/CEs/%s/SSHHost' % ( _basePath, domain, site, element ))
          if host:
            idx = host.find('/')
            if idx != -1: host = host[ 0 : idx ]
            return S_OK((host, 22))
          else:
            return S_OK((element, 8443))
          
    return S_ERROR('%s is not a vaild CE.' % element)
                  
def initializeMonitoringHandler( serviceInfo ):
  #Check that the path is writable
  monitoringSection = PathFinder.getServiceSection( "Framework/Monitoring" )
  #Get data location
  dataPath = gConfig.getValue( "%s/DataLocation" % monitoringSection, "data/monitoring" )
  dataPath = dataPath.strip()
  if "/" != dataPath[0]:
    dataPath = os.path.realpath( "%s/%s" % ( gConfig.getValue( '/LocalSite/InstancePath', rootPath ), dataPath ) )
  gLogger.info( "Data will be written into %s" % dataPath )
  try:
    os.makedirs( dataPath )
  except:
    pass
  try:
    testFile = "%s/mon.jarl.test" % dataPath
    fd = file( testFile, "w" )
    fd.close()
    os.unlink( testFile )
  except IOError:
    gLogger.fatal( "Can't write to %s" % dataPath )
    return S_ERROR( "Data location is not writable" )
  #Define globals
  gServiceInterface.initialize( dataPath )
  if not gServiceInterface.initializeDB():
    return S_ERROR( "Can't start db engine" )
  gMonitor.registerActivity( "cachedplots", "Cached plot images", "Monitoring plots", "plots", gMonitor.OP_SUM )
  gMonitor.registerActivity( "drawnplots", "Drawn plot images", "Monitoring plots", "plots", gMonitor.OP_SUM )
  return S_OK()
    def _updateSoftwarePy2(self, version, rootPath, diracOSVersion):
        """Update the local DIRAC software installation to version"""
        # Check that we have a sane local configuration
        result = gConfig.getOptionsDict("/LocalInstallation")
        if not result["OK"]:
            return S_ERROR(
                "Invalid installation - missing /LocalInstallation section in the configuration"
            )
        elif not result["Value"]:
            return S_ERROR(
                "Invalid installation - empty /LocalInstallation section in the configuration"
            )

        if rootPath and not os.path.exists(rootPath):
            return S_ERROR('Path "%s" does not exists' % rootPath)

        installer = None
        if not find_executable("dirac-install"):
            installer = tempfile.NamedTemporaryFile(suffix=".py", mode="wb")
            with requests.get(
                    "https://raw.githubusercontent.com/DIRACGrid/management/master/dirac-install.py",
                    stream=True) as r:
                if not r.ok:
                    return S_ERROR(
                        "Failed to download dirac-install from management repo"
                    )
                for chunk in r.iter_content(chunk_size=1024**2):
                    installer.write(chunk)
            installer.flush()
            self.log.info("Downloaded dirac-install.py py2 installer to",
                          installer.name)
            cmdList = ["python", installer.name, "-r", version, "-t", "server"]
        else:
            cmdList = ["dirac-install", "-r", version, "-t", "server"]

        if rootPath:
            cmdList.extend(["-P", rootPath])

        # Check if there are extensions
        extensionList = getCSExtensions()
        # By default we do not install WebApp
        if "WebApp" in extensionList or []:
            extensionList.remove("WebApp")

        webPortal = gConfig.getValue("/LocalInstallation/WebApp", False)
        if webPortal and "WebAppDIRAC" not in extensionList:
            extensionList.append("WebAppDIRAC")

        cmdList += ["-e", ",".join(extensionList)]

        project = gConfig.getValue("/LocalInstallation/Project")
        if project:
            cmdList += ["-l", project]

        targetPath = gConfig.getValue(
            "/LocalInstallation/TargetPath",
            gConfig.getValue("/LocalInstallation/RootPath", ""))
        if targetPath and os.path.exists(targetPath + "/etc/dirac.cfg"):
            cmdList.append(targetPath + "/etc/dirac.cfg")
        else:
            return S_ERROR("Local configuration not found")

        result = systemCall(240, cmdList)
        if installer:
            installer.close()
        if not result["OK"]:
            return result
        status = result["Value"][0]
        if status == 0:
            return S_OK()
        # Get error messages
        error = [
            line.strip() for line in result["Value"][1].split("\n")
            if "error" in line.lower()
        ]
        return S_ERROR("\n".join(
            error or "Failed to update software to %s" % version))
Beispiel #32
0
def main():
    global excludedHosts
    Script.registerSwitch(
        "e:", "exclude=",
        "Comma separated list of hosts to be excluded from the scanning process",
        setExcludedHosts)
    Script.parseCommandLine(ignoreErrors=False)

    componentType = ""

    # Get my setup
    mySetup = gConfig.getValue("DIRAC/Setup")

    # Retrieve information from all the hosts
    client = SystemAdministratorIntegrator(exclude=excludedHosts)
    resultAll = client.getOverallStatus()
    if not resultAll["OK"]:
        gLogger.error(resultAll["Message"])
        DIRACexit(-1)

    # Retrieve user installing the component
    result = getProxyInfo()
    if result["OK"]:
        user = result["Value"]["username"]
    else:
        DIRACexit(-1)
    if not user:
        user = "******"

    for host in resultAll["Value"]:
        if not resultAll["Value"][host]["OK"]:
            # If the host cannot be contacted, exclude it and send message
            excludedHosts.append(host)

            result = NotificationClient().sendMail(
                Operations().getValue("EMail/Production", []),
                "Unreachable host",
                "\ndirac-populate-component-db: Could not fill the database with the components from unreachable host %s\n"
                % host,
            )
            if not result["OK"]:
                gLogger.error(
                    "Can not send unreachable host notification mail: %s" %
                    result["Message"])

    resultHosts = client.getHostInfo()
    if not resultHosts["OK"]:
        gLogger.error(resultHosts["Message"])
        DIRACexit(-1)
    resultInfo = client.getInfo()
    if not resultInfo["OK"]:
        gLogger.error(resultInfo["Message"])
        DIRACexit(-1)
    resultMySQL = client.getMySQLStatus()
    if not resultMySQL["OK"]:
        gLogger.error(resultMySQL["Message"])
        DIRACexit(-1)
    resultAllDB = client.getDatabases()
    if not resultAllDB["OK"]:
        gLogger.error(resultAllDB["Message"])
        DIRACexit(-1)
    resultAvailableDB = client.getAvailableDatabases()
    if not resultAvailableDB["OK"]:
        gLogger.error(resultAvailableDB["Message"])
        DIRACexit(-1)

    records = []
    finalSet = list(set(resultAll["Value"]) - set(excludedHosts))
    for host in finalSet:
        hasMySQL = True
        result = resultAll["Value"][host]
        hostResult = resultHosts["Value"][host]
        infoResult = resultInfo["Value"][host]
        mySQLResult = resultMySQL["Value"][host]
        allDBResult = resultAllDB["Value"][host]
        availableDBResult = resultAvailableDB["Value"][host]

        if not result["OK"]:
            gLogger.error("Host %s: %s" % (host, result["Message"]))
            continue
        if not hostResult["OK"]:
            gLogger.error("Host %s: %s" % (host, hostResult["Message"]))
            continue
        if not infoResult["OK"]:
            gLogger.error("Host %s: %s" % (host, infoResult["Message"]))
            continue
        if mySQLResult["OK"]:
            if not allDBResult["OK"]:
                gLogger.error("Host %s: %s" % (host, allDBResult["Message"]))
                continue
            if not availableDBResult["OK"]:
                gLogger.error("Host %s: %s" %
                              (host, availableDBResult["Message"]))
                continue
        else:
            hasMySQL = False

        setup = infoResult["Value"]["Setup"]
        if setup != mySetup:
            continue

        cpu = hostResult["Value"]["CPUModel"].strip()
        rDict = result["Value"]
        # Components other than databases
        for compType in rDict:
            if componentType and componentType != compType:
                continue
            for system in rDict[compType]:
                components = sorted(rDict[compType][system])
                for component in components:
                    record = {"Installation": {}, "Component": {}, "Host": {}}
                    if rDict[compType][system][component][
                            "Installed"] and component != "ComponentMonitoring":
                        runitStatus = str(
                            rDict[compType][system][component]["RunitStatus"])
                        if runitStatus != "Unknown":
                            module = str(
                                rDict[compType][system][component]["Module"])
                            record["Component"]["System"] = system
                            record["Component"]["Module"] = module
                            # Transform 'Services' into 'service', 'Agents' into 'agent' ...
                            record["Component"]["Type"] = compType.lower()[:-1]
                            record["Host"]["HostName"] = host
                            record["Host"]["CPU"] = cpu
                            record["Installation"]["Instance"] = component
                            record["Installation"][
                                "InstallationTime"] = datetime.utcnow()
                            record["Installation"]["InstalledBy"] = user
                            records.append(record)

        # Databases
        csClient = CSAPI()
        cfg = csClient.getCurrentCFG()["Value"]

        if hasMySQL:
            allDB = allDBResult["Value"]
            availableDB = availableDBResult["Value"]

            for db in allDB:
                # Check for DIRAC only databases
                if db in availableDB and db != "InstalledComponentsDB":
                    # Check for 'installed' databases
                    isSection = cfg.isSection(
                        "Systems/" + availableDB[db]["System"] + "/" +
                        cfg.getOption("DIRAC/Setups/" + setup + "/" +
                                      availableDB[db]["System"]) +
                        "/Databases/" + db + "/")
                    if isSection:
                        record = {
                            "Installation": {},
                            "Component": {},
                            "Host": {}
                        }
                        record["Component"]["System"] = availableDB[db][
                            "System"]
                        record["Component"]["Module"] = db
                        record["Component"]["Type"] = "DB"
                        record["Host"]["HostName"] = host
                        record["Host"]["CPU"] = cpu
                        record["Installation"]["Instance"] = db
                        record["Installation"][
                            "InstallationTime"] = datetime.utcnow()
                        record["Installation"]["InstalledBy"] = user
                        records.append(record)

    monitoringClient = ComponentMonitoringClient()

    # Add the installations to the database
    for record in records:
        result = MonitoringUtilities.monitorInstallation(
            record["Component"]["Type"],
            record["Component"]["System"],
            record["Installation"]["Instance"],
            record["Component"]["Module"],
            record["Host"]["CPU"],
            record["Host"]["HostName"],
        )
        if not result["OK"]:
            gLogger.error(result["Message"])
Beispiel #33
0
    def __getPilotOptions(self, queue, pilotsToSubmit):
        """ Prepare pilot options
    """

        queueDict = self.queueDict[queue]['ParametersDict']
        pilotOptions = []

        setup = gConfig.getValue("/DIRAC/Setup", "unknown")
        if setup == 'unknown':
            self.log.error('Setup is not defined in the configuration')
            return [None, None]
        pilotOptions.append('-S %s' % setup)
        opsHelper = Operations.Operations(group=self.pilotGroup, setup=setup)

        #Installation defined?
        installationName = opsHelper.getValue("Pilot/Installation", "")
        if installationName:
            pilotOptions.append('-V %s' % installationName)

        #Project defined?
        projectName = opsHelper.getValue("Pilot/Project", "")
        if projectName:
            pilotOptions.append('-l %s' % projectName)
        else:
            self.log.info('DIRAC project will be installed by pilots')

        #Request a release
        diracVersion = opsHelper.getValue("Pilot/Version", [])
        if not diracVersion:
            self.log.error('Pilot/Version is not defined in the configuration')
            return [None, None]
        #diracVersion is a list of accepted releases. Just take the first one
        pilotOptions.append('-r %s' % diracVersion[0])

        ownerDN = self.pilotDN
        ownerGroup = self.pilotGroup
        # Request token for maximum pilot efficiency
        result = gProxyManager.requestToken(
            ownerDN, ownerGroup, pilotsToSubmit * self.maxJobsInFillMode)
        if not result['OK']:
            self.log.error('Invalid proxy token request', result['Message'])
            return [None, None]
        (token, numberOfUses) = result['Value']
        pilotOptions.append('-o /Security/ProxyToken=%s' % token)
        # Use Filling mode
        pilotOptions.append('-M %s' %
                            min(numberOfUses, self.maxJobsInFillMode))

        # Since each pilot will execute min( numberOfUses, self.maxJobsInFillMode )
        # with numberOfUses tokens we can submit at most:
        #    numberOfUses / min( numberOfUses, self.maxJobsInFillMode )
        # pilots
        newPilotsToSubmit = numberOfUses / min(numberOfUses,
                                               self.maxJobsInFillMode)
        if newPilotsToSubmit != pilotsToSubmit:
            self.log.info(
                'Number of pilots to submit is changed to %d after getting the proxy token'
                % newPilotsToSubmit)
            pilotsToSubmit = newPilotsToSubmit
        # Debug
        if self.pilotLogLevel.lower() == 'debug':
            pilotOptions.append('-d')
        # CS Servers
        csServers = gConfig.getValue("/DIRAC/Configuration/Servers", [])
        pilotOptions.append('-C %s' % ",".join(csServers))
        # DIRAC Extensions
        extensionsList = CSGlobals.getCSExtensions()
        if extensionsList:
            pilotOptions.append('-e %s' % ",".join(extensionsList))
        # Requested CPU time
        pilotOptions.append('-T %s' % queueDict['CPUTime'])
        # CEName
        pilotOptions.append('-N %s' % self.queueDict[queue]['CEName'])
        # SiteName
        pilotOptions.append('-n %s' % queueDict['Site'])
        if 'ClientPlatform' in queueDict:
            pilotOptions.append("-p '%s'" % queueDict['ClientPlatform'])

        if 'SharedArea' in queueDict:
            pilotOptions.append("-o '/LocalSite/SharedArea=%s'" %
                                queueDict['SharedArea'])

        if 'SI00' in queueDict:
            factor = float(queueDict['SI00']) / 250.
            pilotOptions.append("-o '/LocalSite/CPUScalingFactor=%s'" % factor)
            pilotOptions.append("-o '/LocalSite/CPUNormalizationFactor=%s'" %
                                factor)
        else:
            if 'CPUScalingFactor' in queueDict:
                pilotOptions.append("-o '/LocalSite/CPUScalingFactor=%s'" %
                                    queueDict['CPUScalingFactor'])
            if 'CPUNormalizationFactor' in queueDict:
                pilotOptions.append(
                    "-o '/LocalSite/CPUNormalizationFactor=%s'" %
                    queueDict['CPUNormalizationFactor'])

        # Hack
        if self.defaultSubmitPools:
            pilotOptions.append(
                '-o /Resources/Computing/CEDefaults/SubmitPool=%s' %
                self.defaultSubmitPools)

        if self.group:
            pilotOptions.append('-G %s' % self.group)

        self.log.verbose("pilotOptions: ", ' '.join(pilotOptions))

        return [pilotOptions, pilotsToSubmit]
Beispiel #34
0
    def initialize(self, loops=0):
        """Sets default parameters and creates CE instance
    """
        # Disable monitoring, logLevel INFO, limited cycles
        self.am_setOption('MonitoringEnabled', False)
        self.am_setOption('MaxCycles', loops)

        ceType = self.am_getOption('CEType', self.ceName)
        localCE = gConfig.getValue('/LocalSite/LocalCE', '')
        if localCE:
            self.log.info('Defining CE from local configuration',
                          '= %s' % localCE)
            ceType = localCE

        # Create backend Computing Element
        ceFactory = ComputingElementFactory()
        self.ceName = ceType
        ceInstance = ceFactory.getCE(ceType)
        if not ceInstance['OK']:
            self.log.warn("Can't instantiate a CE", ceInstance['Message'])
            return ceInstance
        self.computingElement = ceInstance['Value']

        result = self.computingElement.getDescription()
        if not result['OK']:
            self.log.warn("Can not get the CE description")
            return result
        if isinstance(result['Value'], list):
            ceDict = result['Value'][0]
        else:
            ceDict = result['Value']
        self.timeLeft = ceDict.get('CPUTime', self.timeLeft)
        self.timeLeft = gConfig.getValue(
            '/Resources/Computing/CEDefaults/MaxCPUTime', self.timeLeft)

        self.initTimes = os.times()
        # Localsite options
        self.siteName = gConfig.getValue('/LocalSite/Site', self.siteName)
        self.pilotReference = gConfig.getValue('/LocalSite/PilotReference',
                                               self.pilotReference)
        self.defaultProxyLength = gConfig.getValue(
            '/Registry/DefaultProxyLifeTime', self.defaultProxyLength)
        # Agent options
        # This is the factor to convert raw CPU to Normalized units (based on the CPU Model)
        self.cpuFactor = gConfig.getValue('/LocalSite/CPUNormalizationFactor',
                                          self.cpuFactor)
        self.jobSubmissionDelay = self.am_getOption('SubmissionDelay',
                                                    self.jobSubmissionDelay)
        self.fillingMode = self.am_getOption('FillingModeFlag',
                                             self.fillingMode)
        self.minimumTimeLeft = self.am_getOption('MinimumTimeLeft',
                                                 self.minimumTimeLeft)
        self.stopOnApplicationFailure = self.am_getOption(
            'StopOnApplicationFailure', self.stopOnApplicationFailure)
        self.stopAfterFailedMatches = self.am_getOption(
            'StopAfterFailedMatches', self.stopAfterFailedMatches)
        self.extraOptions = gConfig.getValue(
            '/AgentJobRequirements/ExtraOptions', self.extraOptions)
        # Timeleft
        self.timeLeftUtil = TimeLeft()
        return S_OK()
Beispiel #35
0
    def setSiteProtocols(self, site, protocolsList, printOutput=False):
        """
    Allows to set the defined protocols for each SE for a given site.
    """
        result = self.__checkSiteIsValid(site)
        if not result['OK']:
            return result

        siteSection = '/Resources/Sites/%s/%s/SE' % (site.split('.')[0], site)
        siteSEs = gConfig.getValue(siteSection, [])
        if not siteSEs:
            return S_ERROR('No SEs found for site %s in section %s' %
                           (site, siteSection))

        defaultProtocols = gConfig.getValue(
            '/Resources/StorageElements/DefaultProtocols', [])
        self.log.verbose('Default list of protocols are',
                         ', '.join(defaultProtocols))

        for protocol in protocolsList:
            if protocol not in defaultProtocols:
                return S_ERROR(
                    'Requested to set protocol %s in list but %s is not '
                    'in default list of protocols:\n%s' %
                    (protocol, protocol, ', '.join(defaultProtocols)))

        modifiedCS = False
        result = promptUser(
            'Do you want to add the following default protocols:'
            ' %s for SE(s):\n%s' %
            (', '.join(protocolsList), ', '.join(siteSEs)))
        if not result['OK']:
            return result
        if result['Value'].lower() != 'y':
            self.log.always('No protocols will be added')
            return S_OK()

        for se in siteSEs:
            sections = gConfig.getSections('/Resources/StorageElements/%s/' %
                                           (se))
            if not sections['OK']:
                return sections
            for section in sections['Value']:
                if gConfig.getValue(
                        '/Resources/StorageElements/%s/%s/ProtocolName' %
                    (se, section), '') == 'SRM2':
                    path = '/Resources/StorageElements/%s/%s/ProtocolsList' % (
                        se, section)
                    self.log.verbose('Setting %s to %s' %
                                     (path, ', '.join(protocolsList)))
                    result = self.csSetOption(path, ', '.join(protocolsList))
                    if not result['OK']:
                        return result
                    modifiedCS = True

        if modifiedCS:
            result = self.csCommitChanges(False)
            if not result['OK']:
                return S_ERROR('CS Commit failed with message = %s' %
                               (result['Message']))
            else:
                if printOutput:
                    print 'Successfully committed changes to CS'
        else:
            if printOutput:
                print 'No modifications to CS required'

        return S_OK()
Beispiel #36
0
 def __disabled(self):
     return gConfig.getValue("%s/DisableMonitoring" % self.cfgSection, "false").lower() in \
         ("yes", "y", "true", "1")
Beispiel #37
0
    def execute(self):
        """ The main agent execution method
    """

        self.log.verbose('Waking up Stalled Job Agent')

        wms_instance = getSystemInstance('WorkloadManagement')
        if not wms_instance:
            return S_ERROR(
                'Can not get the WorkloadManagement system instance')
        wrapperSection = cfgPath('Systems', 'WorkloadManagement', wms_instance,
                                 'JobWrapper')

        stalledTime = self.am_getOption('StalledTimeHours', 2)
        failedTime = self.am_getOption('FailedTimeHours', 6)
        self.stalledJobsTolerantSites = self.am_getOption(
            'StalledJobsTolerantSites', [])
        self.stalledJobsToleranceTime = self.am_getOption(
            'StalledJobsToleranceTime', 0)

        self.submittingTime = self.am_getOption('SubmittingTime',
                                                self.submittingTime)
        self.matchedTime = self.am_getOption('MatchedTime', self.matchedTime)
        self.rescheduledTime = self.am_getOption('RescheduledTime',
                                                 self.rescheduledTime)

        self.log.verbose('StalledTime = %s cycles' % (stalledTime))
        self.log.verbose('FailedTime = %s cycles' % (failedTime))

        watchdogCycle = gConfig.getValue(
            cfgPath(wrapperSection, 'CheckingTime'), 30 * 60)
        watchdogCycle = max(
            watchdogCycle,
            gConfig.getValue(cfgPath(wrapperSection, 'MinCheckingTime'),
                             20 * 60))

        # Add half cycle to avoid race conditions
        stalledTime = int(watchdogCycle * (stalledTime + 0.5))
        failedTime = int(watchdogCycle * (failedTime + 0.5))

        result = self._markStalledJobs(stalledTime)
        if not result['OK']:
            self.log.error('Failed to detect stalled jobs', result['Message'])

        # Note, jobs will be revived automatically during the heartbeat signal phase and
        # subsequent status changes will result in jobs not being selected by the
        # stalled job agent.

        result = self._failStalledJobs(failedTime)
        if not result['OK']:
            self.log.error('Failed to process stalled jobs', result['Message'])

        result = self._failSubmittingJobs()
        if not result['OK']:
            self.log.error('Failed to process jobs being submitted',
                           result['Message'])

        result = self._kickStuckJobs()
        if not result['OK']:
            self.log.error('Failed to kick stuck jobs', result['Message'])

        return S_OK('Stalled Job Agent cycle complete')
Beispiel #38
0
  def __getSelectionData(self):
    callback = {}
    group = credentials.getSelectedGroup()
    user = str(credentials.getUsername())
    if len(request.params) > 0:
      tmp = {}
      for i in request.params:
        tmp[i] = str(request.params[i])
      callback["extra"] = tmp
      if callback["extra"].has_key("prod"):
        callback["extra"]["prod"] = callback["extra"]["prod"].zfill(8)
        if callback["extra"]["prod"] == "00000000":
          callback["extra"]["prod"] = ""
      gLogger.info(" - ",callback["extra"])
    if user == "Anonymous":
      callback["prod"] = [["Insufficient rights"]]
    else:
      RPC = getRPCClient("WorkloadManagement/JobMonitoring")
      result = RPC.getProductionIds()
      if result["OK"]:
        prod = []
        prods = result["Value"]
        if len(prods)>0:
          prod.append([str("All")])
          tmp = []
          for keys in prods:
            try:
              id = str(int(keys)).zfill(8)
            except:
              id = str(keys)
            tmp.append(str(id))
          tmp.sort(reverse=True)
          for i in tmp:
            prod.append([str(i)])
        else:
          prod = [["Nothing to display"]]
      else:
        gLogger.error("RPC.getProductionIds() return error: %s" % result["Message"])
        prod = [["Error happened on service side"]]
      callback["prod"] = prod
###
    RPC = getRPCClient("WorkloadManagement/JobMonitoring")
    result = RPC.getSites()
    if result["OK"]:
      tier1 = gConfig.getValue("/Website/PreferredSites",[]) # Always return a list
      site = []
      if len(result["Value"])>0:
        s = list(result["Value"])
        site.append([str("All")])
        for i in tier1:
          site.append([str(i)])
        for i in s:
          if i not in tier1:
            site.append([str(i)])    
      else:
        site = [["Nothing to display"]]
    else:
      gLogger.error("RPC.getSites() return error: %s" % result["Message"])
      site = [["Error happened on service side"]]
    callback["site"] = site
###
    result = RPC.getStates()
    if result["OK"]:
      stat = []
      if len(result["Value"])>0:
        stat.append([str("All")])
        for i in result["Value"]:
          stat.append([str(i)])
      else:
        stat = [["Nothing to display"]]
    else:
      gLogger.error("RPC.getStates() return error: %s" % result["Message"])
      stat = [["Error happened on service side"]]
    callback["status"] = stat
###
    result = RPC.getMinorStates()
    if result["OK"]:
      stat = []
      if len(result["Value"])>0:
        stat.append([str("All")])
        for i in result["Value"]:
          i = i.replace(",",";")
          stat.append([i])
      else:
        stat = [["Nothing to display"]]
    else:
      gLogger.error("RPC.getMinorStates() return error: %s" % result["Message"])
      stat = [["Error happened on service side"]]
    callback["minorstat"] = stat
###
    result = RPC.getApplicationStates()
    if result["OK"]:
      app = []
      if len(result["Value"])>0:
        app.append([str("All")])
        for i in result["Value"]:
          i = i.replace(",",";")
          app.append([i])
      else:
        app = [["Nothing to display"]]
    else:
      gLogger.error("RPC.getApplicationstates() return error: %s" % result["Message"])
      app = [["Error happened on service side"]]
    callback["app"] = app
###
    result = RPC.getJobTypes()
    if result["OK"]:
      types = []
      if len(result["Value"])>0:
        types.append([str("All")])
        for i in result["Value"]:
          i = i.replace(",",";")
          types.append([i])
      else:
        types = [["Nothing to display"]]
    else:
      gLogger.error("RPC.getJobTypes() return error: %s" % result["Message"])
      types = [["Error happened on service side"]]
    callback["types"] = types
###
    groupProperty = credentials.getProperties(group)
    if user == "Anonymous":
      callback["owner"] = [["Insufficient rights"]]
    elif ( "JobAdministrator" or "JobSharing" ) not in groupProperty:
      callback["owner"] = [["All"],[str(credentials.getUsername())]]
    else:
      result = RPC.getOwners()
      if result["OK"]:
        owner = []
        if len(result["Value"])>0:
          owner.append([str("All")])
          for i in result["Value"]:
            owner.append([str(i)])
        else:
          owner = [["Nothing to display"]]
      else:
        gLogger.error("RPC.getOwners() return error: %s" % result["Message"])
        owner = [["Error happened on service side"]]
      callback["owner"] = owner
    return callback
    def getCSDict(self, includeMasterCS=True):
        """Gets minimal info for running a pilot, from the CS

        :returns: pilotDict (containing pilots run info)
        :rtype: S_OK, S_ERROR, value is pilotDict
        """

        pilotDict = {
            "timestamp": datetime.datetime.utcnow().isoformat(),
            "Setups": {},
            "CEs": {},
            "GenericPilotDNs": [],
        }

        self.log.info("-- Getting the content of the CS --")

        # These are in fact not only setups: they may be "Defaults" sections, or VOs, in multi-VOs installations
        setupsRes = gConfig.getSections("/Operations/")
        if not setupsRes["OK"]:
            self.log.error("Can't get sections from Operations",
                           setupsRes["Message"])
            return setupsRes
        setupsInOperations = setupsRes["Value"]

        # getting the setup(s) in this CS, and comparing with what we found in Operations
        setupsInDIRACRes = gConfig.getSections("DIRAC/Setups")
        if not setupsInDIRACRes["OK"]:
            self.log.error("Can't get sections from DIRAC/Setups",
                           setupsInDIRACRes["Message"])
            return setupsInDIRACRes
        setupsInDIRAC = setupsInDIRACRes["Value"]

        # Handling the case of multi-VO CS
        if not set(setupsInDIRAC).intersection(set(setupsInOperations)):
            vos = list(setupsInOperations)
            for vo in vos:
                setupsFromVOs = gConfig.getSections("/Operations/%s" % vo)
                if not setupsFromVOs["OK"]:
                    continue
                else:
                    setupsInOperations = setupsFromVOs["Value"]

        self.log.verbose("From Operations/[Setup]/Pilot")

        for setup in setupsInOperations:
            self._getPilotOptionsPerSetup(setup, pilotDict)

        self.log.verbose("From Resources/Sites")
        sitesSection = gConfig.getSections("/Resources/Sites/")
        if not sitesSection["OK"]:
            self.log.error("Can't get sections from Resources",
                           sitesSection["Message"])
            return sitesSection

        for grid in sitesSection["Value"]:
            gridSection = gConfig.getSections("/Resources/Sites/" + grid)
            if not gridSection["OK"]:
                self.log.error("Can't get sections from Resources",
                               gridSection["Message"])
                return gridSection

            for site in gridSection["Value"]:
                ceList = gConfig.getSections(
                    cfgPath("/Resources", "Sites", grid, site, "CEs"))
                if not ceList["OK"]:
                    # Skip but log it
                    self.log.error("Site has no CEs! - skipping", site)
                    continue

                for ce in ceList["Value"]:
                    # This CEType is like 'HTCondor' or 'ARC' etc.
                    ceType = gConfig.getValue(
                        cfgPath("/Resources", "Sites", grid, site, "CEs", ce,
                                "CEType"))
                    if ceType is None:
                        # Skip but log it
                        self.log.error("CE has no option CEType!",
                                       ce + " at " + site)
                        pilotDict["CEs"][ce] = {"Site": site}
                    else:
                        pilotDict["CEs"][ce] = {
                            "Site": site,
                            "GridCEType": ceType
                        }

                    # This LocalCEType is like 'InProcess' or 'Pool' or 'Pool/Singularity' etc.
                    # It can be in the queue and/or the CE level
                    localCEType = gConfig.getValue(
                        cfgPath("/Resources", "Sites", grid, site, "CEs", ce,
                                "LocalCEType"))
                    if localCEType is not None:
                        pilotDict["CEs"][ce].setdefault(
                            "LocalCEType", localCEType)

                    res = gConfig.getSections(
                        cfgPath("/Resources", "Sites", grid, site, "CEs", ce,
                                "Queues"))
                    if not res["OK"]:
                        # Skip but log it
                        self.log.error("No queues found for CE",
                                       ce + ": " + res["Message"])
                        continue
                    queueList = res["Value"]
                    for queue in queueList:
                        localCEType = gConfig.getValue(
                            cfgPath("/Resources", "Sites", grid, site, "CEs",
                                    ce, "Queues", queue, "LocalCEType"))
                        if localCEType is not None:
                            pilotDict["CEs"][ce].setdefault(
                                queue, {"LocalCEType": localCEType})

        defaultSetup = gConfig.getValue("/DIRAC/DefaultSetup")
        if defaultSetup:
            pilotDict["DefaultSetup"] = defaultSetup

        self.log.debug("From DIRAC/Configuration")
        configurationServers = gConfig.getServersList()
        if not includeMasterCS:
            masterCS = gConfigurationData.getMasterServer()
            configurationServers = list(
                set(configurationServers) - set([masterCS]))
        pilotDict["ConfigurationServers"] = configurationServers

        self.log.debug("Got pilotDict", str(pilotDict))

        return S_OK(pilotDict)
Beispiel #40
0
def getNumberOfProcessors(siteName=None, gridCE=None, queue=None):
    """gets the number of processors on a certain CE/queue/node (what the pilot administers)

    The siteName/gridCE/queue parameters are normally not necessary.

    Tries to find it in this order:
    1) from the /Resources/Computing/CEDefaults/NumberOfProcessors (which is what the pilot fills up)
    2) if not present from JobFeatures
    3) if not present looks in CS for "NumberOfProcessors" Queue or CE option
    4) if not present but there's WholeNode tag, look what the WN provides using multiprocessing.cpu_count()
    5) return 1
    """

    # 1) from /Resources/Computing/CEDefaults/NumberOfProcessors
    gLogger.info("Getting numberOfProcessors from /Resources/Computing/CEDefaults/NumberOfProcessors")
    numberOfProcessors = gConfig.getValue("/Resources/Computing/CEDefaults/NumberOfProcessors", 0)
    if numberOfProcessors:
        return numberOfProcessors

    # 2) from MJF
    gLogger.info("Getting numberOfProcessors from MJF")
    numberOfProcessors = getProcessorFromMJF()
    if numberOfProcessors:
        return numberOfProcessors
    gLogger.info("NumberOfProcessors could not be found in MJF")

    # 3) looks in CS for "NumberOfProcessors" Queue or CE or site option
    if not siteName:
        siteName = gConfig.getValue("/LocalSite/Site", "")
    if not gridCE:
        gridCE = gConfig.getValue("/LocalSite/GridCE", "")
    if not queue:
        queue = gConfig.getValue("/LocalSite/CEQueue", "")
    if not (siteName and gridCE and queue):
        gLogger.error("Could not find NumberOfProcessors: missing siteName or gridCE or queue. Returning '1'")
        return 1

    grid = siteName.split(".")[0]
    csPaths = [
        "/Resources/Sites/%s/%s/CEs/%s/Queues/%s/NumberOfProcessors" % (grid, siteName, gridCE, queue),
        "/Resources/Sites/%s/%s/CEs/%s/NumberOfProcessors" % (grid, siteName, gridCE),
        "/Resources/Sites/%s/%s/Cloud/%s/VMTypes/%s/NumberOfProcessors" % (grid, siteName, gridCE, queue),
        "/Resources/Sites/%s/%s/Cloud/%s/NumberOfProcessors" % (grid, siteName, gridCE),
        "/Resources/Sites/%s/%s/NumberOfProcessors" % (grid, siteName),
    ]
    for csPath in csPaths:
        gLogger.info("Looking in", csPath)
        numberOfProcessors = gConfig.getValue(csPath, 0)
        if numberOfProcessors:
            return numberOfProcessors

    # 4) looks in CS for tags
    gLogger.info("Getting tags" "for %s: %s: %s" % (siteName, gridCE, queue))
    # Tags of the CE
    tags = fromChar(
        gConfig.getValue("/Resources/Sites/%s/%s/CEs/%s/Tag" % (siteName.split(".")[0], siteName, gridCE), "")
    ) + fromChar(
        gConfig.getValue("/Resources/Sites/%s/%s/Cloud/%s/Tag" % (siteName.split(".")[0], siteName, gridCE), "")
    )
    # Tags of the Queue
    tags += fromChar(
        gConfig.getValue(
            "/Resources/Sites/%s/%s/CEs/%s/Queues/%s/Tag" % (siteName.split(".")[0], siteName, gridCE, queue), ""
        )
    ) + fromChar(
        gConfig.getValue(
            "/Resources/Sites/%s/%s/Cloud/%s/VMTypes/%s/Tag" % (siteName.split(".")[0], siteName, gridCE, queue), ""
        )
    )
    gLogger.info("NumberOfProcessors could not be found in CS")
    if "WholeNode" in tags:
        gLogger.info("Found WholeNode tag, using multiprocessing.cpu_count()")
        return multiprocessing.cpu_count()

    # 5) return the default
    return 1
Beispiel #41
0
 def getCSOption( self, optionName, defaultValue = None ):
   cs_path = getDatabaseSection( self.fullname )
   return gConfig.getValue( "/%s/%s" % ( cs_path, optionName ), defaultValue )
Beispiel #42
0
    Script.showHelp()

email = True
for switch in Script.getUnprocessedSwitches():
  if switch[0] == "email":
    email = getBoolean( switch[1] )

args = Script.getPositionalArgs()

if len( args ) < 2:
  Script.showHelp()

diracAdmin = DiracAdmin()
exitCode = 0
errorList = []
setup = gConfig.getValue( '/DIRAC/Setup', '' )
if not setup:
  print('ERROR: Could not contact Configuration Service')
  exitCode = 2
  DIRACExit( exitCode )

#result = promptUser( 'All the elements that are associated with this site will be banned, are you sure about this action?' )
#if not result['OK'] or result['Value'] is 'n':
#  print 'Script stopped'
#  DIRACExit( 0 )

site = args[0]
comment = args[1]
result = diracAdmin.banSite( site, comment, printOutput = True )
if not result['OK']:
  errorList.append( ( site, result['Message'] ) )
Beispiel #43
0
    localConfigFile = os.path.expandvars('$WORKSPACE')+'/ServerInstallDIR/etc/dirac.cfg'
  elif os.path.isfile( './etc/dirac.cfg' ):
    localConfigFile = './etc/dirac.cfg'
  else:
    print "Local CFG file not found"
    exit( 2 )

localCfg.loadFromFile( localConfigFile )
if not localCfg.isSection( '/LocalSite' ):
  localCfg.createNewSection( '/LocalSite' )
localCfg.setOption( '/LocalSite/CPUTimeLeft', 5000 )
localCfg.setOption( '/DIRAC/Security/UseServerCertificate', False )

if not sMod:
  if not setup:
    setup = gConfig.getValue('/DIRAC/Setup')
    if not setup:
      setup = 'JenkinsSetup'
  if not vo:
    vo = gConfig.getValue('/DIRAC/VirtualOrganization')
    if not vo:
      vo = 'dirac'

  if not localCfg.isSection( '/DIRAC/VOPolicy' ):
    localCfg.createNewSection( '/DIRAC/VOPolicy' )
  if not localCfg.isSection( '/DIRAC/VOPolicy/%s' % vo ):
    localCfg.createNewSection( '/DIRAC/VOPolicy/%s' % vo )
  if not localCfg.isSection( '/DIRAC/VOPolicy/%s/%s' % ( vo, setup ) ):
    localCfg.createNewSection( '/DIRAC/VOPolicy/%s/%s' % ( vo, setup ) )
  localCfg.setOption( '/DIRAC/VOPolicy/%s/%s/SoftwareDistModule' % ( vo, setup ), '' )
Beispiel #44
0
  def __init__( self, *args, **kwargs ):
    """ c'tor """
    # # call base class ctor
    AgentModule.__init__( self, *args, **kwargs )
    # # ProcessPool related stuff
    self.__requestsPerCycle = self.am_getOption( "RequestsPerCycle", self.__requestsPerCycle )
    self.log.info( "Requests/cycle = %d" % self.__requestsPerCycle )
    self.__minProcess = self.am_getOption( "MinProcess", self.__minProcess )
    self.log.info( "ProcessPool min process = %d" % self.__minProcess )
    self.__maxProcess = self.am_getOption( "MaxProcess", 4 )
    self.log.info( "ProcessPool max process = %d" % self.__maxProcess )
    self.__queueSize = self.am_getOption( "ProcessPoolQueueSize", self.__queueSize )
    self.log.info( "ProcessPool queue size = %d" % self.__queueSize )
    self.__poolTimeout = int( self.am_getOption( "ProcessPoolTimeout", self.__poolTimeout ) )
    self.log.info( "ProcessPool timeout = %d seconds" % self.__poolTimeout )
    self.__poolSleep = int( self.am_getOption( "ProcessPoolSleep", self.__poolSleep ) )
    self.log.info( "ProcessPool sleep time = %d seconds" % self.__poolSleep )
    self.__bulkRequest = self.am_getOption( "BulkRequest", 0 )
    self.log.info( "Bulk request size = %d" % self.__bulkRequest )

    # # keep config path and agent name
    self.agentName = self.am_getModuleParam( "fullName" )
    self.__configPath = PathFinder.getAgentSection( self.agentName )

    # # operation handlers over here
    opHandlersPath = "%s/%s" % ( self.__configPath, "OperationHandlers" )
    opHandlers = gConfig.getSections( opHandlersPath )
    if not opHandlers["OK"]:
      self.log.error( opHandlers["Message" ] )
      raise AgentConfigError( "OperationHandlers section not found in CS under %s" % self.__configPath )
    opHandlers = opHandlers["Value"]


    self.timeOuts = dict()

    # # handlers dict
    self.handlersDict = dict()
    for opHandler in opHandlers:
      opHandlerPath = "%s/%s/Location" % ( opHandlersPath, opHandler )
      opLocation = gConfig.getValue( opHandlerPath, "" )
      if not opLocation:
        self.log.error( "%s not set for %s operation handler" % ( opHandlerPath, opHandler ) )
        continue
      self.timeOuts[opHandler] = { "PerFile": self.__fileTimeout, "PerOperation": self.__operationTimeout }

      opTimeout = gConfig.getValue( "%s/%s/TimeOut" % ( opHandlersPath, opHandler ), 0 )
      if opTimeout:
        self.timeOuts[opHandler]["PerOperation"] = opTimeout
      fileTimeout = gConfig.getValue( "%s/%s/TimeOutPerFile" % ( opHandlersPath, opHandler ), 0 )
      if fileTimeout:
        self.timeOuts[opHandler]["PerFile"] = fileTimeout

      self.handlersDict[opHandler] = opLocation

    self.log.info( "Operation handlers:" )
    for item in enumerate ( self.handlersDict.items() ):
      opHandler = item[1][0]
      self.log.info("[%s] %s: %s (timeout: %d s + %d s per file)" % (item[0], item[1][0], item[1][1],
                                                                     self.timeOuts[opHandler]['PerOperation'],
                                                                     self.timeOuts[opHandler]['PerFile']))

    # # common monitor activity
    gMonitor.registerActivity( "Iteration", "Agent Loops",
                               "RequestExecutingAgent", "Loops/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "Processed", "Request Processed",
                               "RequestExecutingAgent", "Requests/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "Done", "Request Completed",
                               "RequestExecutingAgent", "Requests/min", gMonitor.OP_SUM )
    # # create request dict
    self.__requestCache = dict()

    # ?? Probably should be removed
    self.FTSMode = self.am_getOption( "FTSMode", False )
Beispiel #45
0
    def getServicePorts(self, setup='', printOutput=False):
        """Checks the service ports for the specified setup.  If not given this is
       taken from the current installation (/DIRAC/Setup)

       Example usage:

         >>> print diracAdmin.getServicePorts()
         {'OK': True, 'Value':''}

       :return: S_OK,S_ERROR

    """
        if not setup:
            setup = gConfig.getValue('/DIRAC/Setup', '')

        setupList = gConfig.getSections('/DIRAC/Setups', [])
        if not setupList['OK']:
            return S_ERROR('Could not get /DIRAC/Setups sections')
        setupList = setupList['Value']
        if setup not in setupList:
            return S_ERROR('Setup %s is not in allowed list: %s' %
                           (setup, ', '.join(setupList)))

        serviceSetups = gConfig.getOptionsDict('/DIRAC/Setups/%s' % setup)
        if not serviceSetups['OK']:
            return S_ERROR('Could not get /DIRAC/Setups/%s options' % setup)
        serviceSetups = serviceSetups['Value']  # dict
        systemList = gConfig.getSections('/Systems')
        if not systemList['OK']:
            return S_ERROR('Could not get Systems sections')
        systemList = systemList['Value']
        result = {}
        for system in systemList:
            if system in serviceSetups:
                path = '/Systems/%s/%s/Services' % (system,
                                                    serviceSetups[system])
                servicesList = gConfig.getSections(path)
                if not servicesList['OK']:
                    self.log.warn('Could not get sections in %s' % path)
                else:
                    servicesList = servicesList['Value']
                    if not servicesList:
                        servicesList = []
                    self.log.verbose('System: %s ServicesList: %s' %
                                     (system, ', '.join(servicesList)))
                    for service in servicesList:
                        spath = '%s/%s/Port' % (path, service)
                        servicePort = gConfig.getValue(spath, 0)
                        if servicePort:
                            self.log.verbose('Found port for %s/%s = %s' %
                                             (system, service, servicePort))
                            result['%s/%s' % (system, service)] = servicePort
                        else:
                            self.log.warn('No port found for %s' % spath)
            else:
                self.log.warn('%s is not defined in /DIRAC/Setups/%s' %
                              (system, setup))

        if printOutput:
            print self.pPrint.pformat(result)

        return S_OK(result)
  def _getCSDict(self):
    """ Gets minimal info for running a pilot, from the CS
    :returns: pilotDict (containing pilots run info)
    :rtype: S_OK, S_ERROR, value is pilotDict
    """

    pilotDict = {'Setups': {}, 'CEs': {}, 'GenericPilotDNs': []}

    self.log.info('-- Getting the content of the CS --')

    # These are in fact not only setups: they may be "Defaults" sections, or VOs, in multi-VOs installations
    setupsRes = gConfig.getSections('/Operations/')
    if not setupsRes['OK']:
      self.log.error("Can't get sections from Operations", setupsRes['Message'])
      return setupsRes
    setupsInOperations = setupsRes['Value']

    # getting the setup(s) in this CS, and comparing with what we found in Operations
    setupsInDIRACRes = gConfig.getSections('DIRAC/Setups')
    if not setupsInDIRACRes['OK']:
      self.log.error("Can't get sections from DIRAC/Setups", setupsInDIRACRes['Message'])
      return setupsInDIRACRes
    setupsInDIRAC = setupsInDIRACRes['Value']

    # Handling the case of multi-VO CS
    if not set(setupsInDIRAC).intersection(set(setupsInOperations)):
      vos = list(setupsInOperations)
      for vo in vos:
        setupsFromVOs = gConfig.getSections('/Operations/%s' % vo)
        if not setupsFromVOs['OK']:
          continue
        else:
          setupsInOperations = setupsFromVOs['Value']

    self.log.verbose('From Operations/[Setup]/Pilot')

    for setup in setupsInOperations:
      self._getPilotOptionsPerSetup(setup, pilotDict)

    self.log.verbose('From Resources/Sites')
    sitesSection = gConfig.getSections('/Resources/Sites/')
    if not sitesSection['OK']:
      self.log.error("Can't get sections from Resources", sitesSection['Message'])
      return sitesSection

    for grid in sitesSection['Value']:
      gridSection = gConfig.getSections('/Resources/Sites/' + grid)
      if not gridSection['OK']:
        self.log.error("Can't get sections from Resources", gridSection['Message'])
        return gridSection

      for site in gridSection['Value']:
        ceList = gConfig.getSections('/Resources/Sites/' + grid + '/' + site + '/CEs/')
        if not ceList['OK']:
          # Skip but log it
          self.log.error('Site has no CEs! - skipping', site)
          continue

        for ce in ceList['Value']:
          ceType = gConfig.getValue('/Resources/Sites/' + grid + '/' + site + '/CEs/' + ce + '/CEType')
          localCEType = gConfig.getValue('/Resources/Sites/' + grid + '/' + site + '/CEs/' + ce + '/LocalCEType')

          if ceType is None:
            # Skip but log it
            self.log.error('CE has no option CEType!', ce + ' at ' + site)
            pilotDict['CEs'][ce] = {'Site': site}
          else:
            pilotDict['CEs'][ce] = {'Site': site, 'GridCEType': ceType}

          if localCEType is not None:
            pilotDict['CEs'][ce].setdefault('LocalCEType', localCEType)

    defaultSetup = gConfig.getValue('/DIRAC/DefaultSetup')
    if defaultSetup:
      pilotDict['DefaultSetup'] = defaultSetup

    self.log.debug('From DIRAC/Configuration')
    pilotDict['ConfigurationServers'] = gConfig.getServersList()

    self.log.debug("Got pilotDict", str(pilotDict))

    return S_OK(pilotDict)
    def do_install(self, args):
        """ 
        Install various DIRAC components 
    
        usage:
        
          install mysql
          install db <database>
          install service <system> <service>
          install agent <system> <agent>
    """
        argss = args.split()
        if not argss:
            print self.do_install.__doc__
            return

        option = argss[0]
        del argss[0]
        if option == "mysql":
            print "Installing MySQL database, this can take a while ..."
            client = SystemAdministratorClient(self.host, self.port)
            if InstallTools.mysqlPassword == 'LocalConfig':
                InstallTools.mysqlPassword = ''
            InstallTools.getMySQLPasswords()
            result = client.installMySQL(InstallTools.mysqlRootPwd,
                                         InstallTools.mysqlPassword)
            if not result['OK']:
                self.__errMsg(result['Message'])
            else:
                print "MySQL:", result['Value']
                print "You might need to restart SystemAdministrator service to take new settings into account"
        elif option == "db":
            if not argss:
                print self.do_install.__doc__
                return
            database = argss[0]
            client = SystemAdministratorClient(self.host, self.port)

            result = client.getAvailableDatabases()
            if not result['OK']:
                self.__errMsg("Can not get database list: %s" %
                              result['Message'])
                return
            if not result['Value'].has_key(database):
                self.__errMsg("Unknown database %s: " % database)
                return
            system = result['Value'][database]['System']
            setup = gConfig.getValue('/DIRAC/Setup', '')
            if not setup:
                self.__errMsg("Unknown current setup")
                return
            instance = gConfig.getValue(
                '/DIRAC/Setups/%s/%s' % (setup, system), '')
            if not instance:
                self.__errMsg("No instance defined for system %s" % system)
                self.__errMsg(
                    "\tAdd new instance with 'add instance %s <instance_name>'"
                    % system)
                return

            if not InstallTools.mysqlPassword:
                InstallTools.mysqlPassword = '******'
            InstallTools.getMySQLPasswords()
            result = client.installDatabase(database,
                                            InstallTools.mysqlRootPwd)
            if not result['OK']:
                self.__errMsg(result['Message'])
                return
            extension, system = result['Value']
            # result = client.addDatabaseOptionsToCS( system, database )
            InstallTools.mysqlHost = self.host
            result = client.getInfo()
            if not result['OK']:
                self.__errMsg(result['Message'])
            hostSetup = result['Value']['Setup']
            result = InstallTools.addDatabaseOptionsToCS(
                gConfig, system, database, hostSetup)
            if not result['OK']:
                self.__errMsg(result['Message'])
                return
            print "Database %s from %s/%s installed successfully" % (
                database, extension, system)
        elif option == "service" or option == "agent":
            if len(argss) < 2:
                print self.do_install.__doc__
                return

            system = argss[0]
            component = argss[1]
            client = SystemAdministratorClient(self.host, self.port)
            # First need to update the CS
            # result = client.addDefaultOptionsToCS( option, system, component )
            InstallTools.host = self.host
            result = client.getInfo()
            if not result['OK']:
                self.__errMsg(result['Message'])
                return
            hostSetup = result['Value']['Setup']
            result = InstallTools.addDefaultOptionsToCS(
                gConfig, option, system, component, getCSExtensions(),
                hostSetup)
            if not result['OK']:
                self.__errMsg(result['Message'])
                return
            # Then we can install and start the component
            result = client.setupComponent(option, system, component)
            if not result['OK']:
                self.__errMsg(result['Message'])
                return
            compType = result['Value']['ComponentType']
            runit = result['Value']['RunitStatus']
            print "%s %s_%s is installed, runit status: %s" % (
                compType, system, component, runit)
        else:
            print "Unknown option:", option
Beispiel #48
0
    def execute(self):
        """The JobAgent execution method.
    """
        if self.jobCount:
            # Temporary mechanism to pass a shutdown message to the agent
            if os.path.exists('/var/lib/dirac_drain'):
                return self.__finish('Node is being drained by an operator')
            # Only call timeLeft utility after a job has been picked up
            self.log.info('Attempting to check CPU time left for filling mode')
            if self.fillingMode:
                if self.timeLeftError:
                    self.log.warn(
                        "Disabling filling mode as errors calculating time left",
                        self.timeLeftError)
                    return self.__finish(self.timeLeftError)
                self.log.info('normalized CPU units remaining in slot',
                              self.timeLeft)
                if self.timeLeft <= self.minimumTimeLeft:
                    return self.__finish('No more time left')
                # Need to update the Configuration so that the new value is published in the next matching request
                result = self.computingElement.setCPUTimeLeft(
                    cpuTimeLeft=self.timeLeft)
                if not result['OK']:
                    return self.__finish(result['Message'])

                # Update local configuration to be used by submitted job wrappers
                localCfg = CFG()
                if self.extraOptions:
                    localConfigFile = os.path.join('.', self.extraOptions)
                else:
                    localConfigFile = os.path.join(rootPath, "etc",
                                                   "dirac.cfg")
                localCfg.loadFromFile(localConfigFile)
                if not localCfg.isSection('/LocalSite'):
                    localCfg.createNewSection('/LocalSite')
                localCfg.setOption('/LocalSite/CPUTimeLeft', self.timeLeft)
                localCfg.writeToFile(localConfigFile)

            else:
                return self.__finish('Filling Mode is Disabled')

        self.log.verbose('Job Agent execution loop')
        result = self.computingElement.available()
        if not result['OK']:
            self.log.info('Resource is not available', result['Message'])
            return self.__finish('CE Not Available')

        ceInfoDict = result['CEInfoDict']
        runningJobs = ceInfoDict.get("RunningJobs")
        availableSlots = result['Value']

        if not availableSlots:
            if runningJobs:
                self.log.info('No available slots',
                              '%d running jobs' % runningJobs)
                return S_OK('Job Agent cycle complete with %d running jobs' %
                            runningJobs)
            else:
                self.log.info('CE is not available')
                return self.__finish('CE Not Available')

        result = self.computingElement.getDescription()
        if not result['OK']:
            return result

        # We can have several prioritized job retrieval strategies
        if isinstance(result['Value'], dict):
            ceDictList = [result['Value']]
        elif isinstance(result['Value'], list):
            # This is the case for Pool ComputingElement, and parameter 'MultiProcessorStrategy'
            ceDictList = result['Value']

        for ceDict in ceDictList:

            # Add pilot information
            gridCE = gConfig.getValue('LocalSite/GridCE', 'Unknown')
            if gridCE != 'Unknown':
                ceDict['GridCE'] = gridCE
            if 'PilotReference' not in ceDict:
                ceDict['PilotReference'] = str(self.pilotReference)
            ceDict['PilotBenchmark'] = self.cpuFactor
            ceDict['PilotInfoReportedFlag'] = self.pilotInfoReportedFlag

            # Add possible job requirements
            result = gConfig.getOptionsDict('/AgentJobRequirements')
            if result['OK']:
                requirementsDict = result['Value']
                ceDict.update(requirementsDict)
                self.log.info('Requirements:', requirementsDict)

            self.log.verbose('CE dict', ceDict)

            # here finally calling the matcher
            start = time.time()
            jobRequest = MatcherClient().requestJob(ceDict)
            matchTime = time.time() - start
            self.log.info('MatcherTime', '= %.2f (s)' % (matchTime))
            if jobRequest['OK']:
                break

        self.stopAfterFailedMatches = self.am_getOption(
            'StopAfterFailedMatches', self.stopAfterFailedMatches)

        if not jobRequest['OK']:
            if re.search('No match found', jobRequest['Message']):
                self.log.notice('Job request OK, but no match found',
                                ': %s' % (jobRequest['Message']))
                self.matchFailedCount += 1
                if self.matchFailedCount > self.stopAfterFailedMatches:
                    return self.__finish(
                        'Nothing to do for more than %d cycles' %
                        self.stopAfterFailedMatches)
                return S_OK(jobRequest['Message'])
            elif jobRequest['Message'].find("seconds timeout") != -1:
                self.log.error('Timeout while requesting job',
                               jobRequest['Message'])
                self.matchFailedCount += 1
                if self.matchFailedCount > self.stopAfterFailedMatches:
                    return self.__finish(
                        'Nothing to do for more than %d cycles' %
                        self.stopAfterFailedMatches)
                return S_OK(jobRequest['Message'])
            elif jobRequest['Message'].find(
                    "Pilot version does not match") != -1:
                errorMsg = 'Pilot version does not match the production version'
                self.log.error(errorMsg,
                               jobRequest['Message'].replace(errorMsg, ''))
                return S_ERROR(jobRequest['Message'])
            else:
                self.log.notice('Failed to get jobs',
                                ': %s' % (jobRequest['Message']))
                self.matchFailedCount += 1
                if self.matchFailedCount > self.stopAfterFailedMatches:
                    return self.__finish(
                        'Nothing to do for more than %d cycles' %
                        self.stopAfterFailedMatches)
                return S_OK(jobRequest['Message'])

        # Reset the Counter
        self.matchFailedCount = 0

        matcherInfo = jobRequest['Value']
        if not self.pilotInfoReportedFlag:
            # Check the flag after the first access to the Matcher
            self.pilotInfoReportedFlag = matcherInfo.get(
                'PilotInfoReportedFlag', False)
        jobID = matcherInfo['JobID']
        matcherParams = ['JDL', 'DN', 'Group']
        for param in matcherParams:
            if param not in matcherInfo:
                self.__report(jobID, 'Failed',
                              'Matcher did not return %s' % (param))
                return self.__finish('Matcher Failed')
            elif not matcherInfo[param]:
                self.__report(jobID, 'Failed',
                              'Matcher returned null %s' % (param))
                return self.__finish('Matcher Failed')
            else:
                self.log.verbose('Matcher returned',
                                 '%s = %s ' % (param, matcherInfo[param]))

        jobJDL = matcherInfo['JDL']
        jobGroup = matcherInfo['Group']
        ownerDN = matcherInfo['DN']

        optimizerParams = {}
        for key in matcherInfo:
            if key not in matcherParams:
                optimizerParams[key] = matcherInfo[key]

        parameters = self._getJDLParameters(jobJDL)
        if not parameters['OK']:
            self.__report(jobID, 'Failed', 'Could Not Extract JDL Parameters')
            self.log.warn('Could Not Extract JDL Parameters',
                          parameters['Message'])
            return self.__finish('JDL Problem')

        params = parameters['Value']
        if 'JobID' not in params:
            msg = 'Job has not JobID defined in JDL parameters'
            self.__report(jobID, 'Failed', msg)
            self.log.warn(msg)
            return self.__finish('JDL Problem')
        else:
            jobID = params['JobID']

        if 'JobType' not in params:
            self.log.warn('Job has no JobType defined in JDL parameters')
            jobType = 'Unknown'
        else:
            jobType = params['JobType']

        if 'CPUTime' not in params:
            self.log.warn(
                'Job has no CPU requirement defined in JDL parameters')

        # Job requirements for determining the number of processors
        # the minimum number of processors requested
        processors = int(
            params.get('NumberOfProcessors',
                       int(params.get('MinNumberOfProcessors', 1))))
        # the maximum number of processors allowed to the payload
        maxNumberOfProcessors = int(params.get('MaxNumberOfProcessors', 0))
        # need or not the whole node for the job
        wholeNode = 'WholeNode' in params
        mpTag = 'MultiProcessor' in params.get('Tags', [])

        if self.extraOptions:
            params['Arguments'] += ' ' + self.extraOptions
            params['ExtraOptions'] = self.extraOptions

        self.log.verbose('Job request successful: \n', jobRequest['Value'])
        self.log.info(
            'Received', 'JobID=%s, JobType=%s, OwnerDN=%s, JobGroup=%s' %
            (jobID, jobType, ownerDN, jobGroup))
        self.jobCount += 1
        try:
            jobReport = JobReport(jobID, 'JobAgent@%s' % self.siteName)
            jobReport.setJobParameter('MatcherServiceTime',
                                      str(matchTime),
                                      sendFlag=False)

            if 'BOINC_JOB_ID' in os.environ:
                # Report BOINC environment
                for thisp in ('BoincUserID', 'BoincHostID',
                              'BoincHostPlatform', 'BoincHostName'):
                    jobReport.setJobParameter(thisp,
                                              gConfig.getValue(
                                                  '/LocalSite/%s' % thisp,
                                                  'Unknown'),
                                              sendFlag=False)

            jobReport.setJobStatus('Matched', 'Job Received by Agent')
            result = self._setupProxy(ownerDN, jobGroup)
            if not result['OK']:
                return self._rescheduleFailedJob(jobID, result['Message'],
                                                 self.stopOnApplicationFailure)
            proxyChain = result.get('Value')

            # Save the job jdl for external monitoring
            self.__saveJobJDLRequest(jobID, jobJDL)

            software = self._checkInstallSoftware(jobID, params, ceDict)
            if not software['OK']:
                self.log.error('Failed to install software for job',
                               '%s' % (jobID))
                errorMsg = software['Message']
                if not errorMsg:
                    errorMsg = 'Failed software installation'
                return self._rescheduleFailedJob(jobID, errorMsg,
                                                 self.stopOnApplicationFailure)

            self.log.debug('Before self._submitJob() (%sCE)' % (self.ceName))
            result = self._submitJob(jobID, params, ceDict, optimizerParams,
                                     proxyChain, processors, wholeNode,
                                     maxNumberOfProcessors, mpTag)
            if not result['OK']:
                self.__report(jobID, 'Failed', result['Message'])
                return self.__finish(result['Message'])
            elif 'PayloadFailed' in result:
                # Do not keep running and do not overwrite the Payload error
                message = 'Payload execution failed with error code %s' % result[
                    'PayloadFailed']
                if self.stopOnApplicationFailure:
                    return self.__finish(message,
                                         self.stopOnApplicationFailure)
                else:
                    self.log.info(message)

            self.log.debug('After %sCE submitJob()' % (self.ceName))
        except Exception as subExcept:  # pylint: disable=broad-except
            self.log.exception("Exception in submission",
                               "",
                               lException=subExcept,
                               lExcInfo=True)
            return self._rescheduleFailedJob(
                jobID, 'Job processing failed with exception',
                self.stopOnApplicationFailure)

        # Sum all times but the last one (elapsed_time) and remove times at init (is this correct?)
        cpuTime = sum(os.times()[:-1]) - sum(self.initTimes[:-1])

        result = self.timeLeftUtil.getTimeLeft(cpuTime, processors)
        if result['OK']:
            self.timeLeft = result['Value']
        else:
            if result['Message'] != 'Current batch system is not supported':
                self.timeLeftError = result['Message']
            else:
                # if the batch system is not defined, use the process time and the CPU normalization defined locally
                self.timeLeft = self._getCPUTimeLeft()

        return S_OK('Job Agent cycle complete')
    def export_updateSoftware(self, version, rootPath="", gridVersion=""):
        """ Update the local DIRAC software installation to version
    """

        # Check that we have a sane local configuration
        result = gConfig.getOptionsDict('/LocalInstallation')
        if not result['OK']:
            return S_ERROR(
                'Invalid installation - missing /LocalInstallation section in the configuration'
            )
        elif not result['Value']:
            return S_ERROR(
                'Invalid installation - empty /LocalInstallation section in the configuration'
            )

        if rootPath and not os.path.exists(rootPath):
            return S_ERROR('Path "%s" does not exists' % rootPath)
        # For LHCb we need to check Oracle client
        installOracleClient = False
        oracleFlag = gConfig.getValue('/LocalInstallation/InstallOracleClient',
                                      'unknown')
        if oracleFlag.lower() in ['yes', 'true', '1']:
            installOracleClient = True
        elif oracleFlag.lower() == "unknown":
            result = systemCall(0, ['python', '-c', 'import cx_Oracle'])
            if result['OK'] and result['Value'][0] == 0:
                installOracleClient = True

        cmdList = ['dirac-install', '-r', version, '-t', 'server']
        if rootPath:
            cmdList.extend(['-P', rootPath])

        # Check if there are extensions
        extensionList = getCSExtensions()
        webFlag = gConfig.getValue('/LocalInstallation/WebPortal', False)
        if webFlag:
            extensionList.append('Web')
        if extensionList:
            cmdList += ['-e', ','.join(extensionList)]

        # Are grid middleware bindings required ?
        if gridVersion:
            cmdList.extend(['-g', gridVersion])

        targetPath = gConfig.getValue(
            '/LocalInstallation/TargetPath',
            gConfig.getValue('/LocalInstallation/RootPath', ''))
        if targetPath and os.path.exists(targetPath + '/etc/dirac.cfg'):
            cmdList.append(targetPath + '/etc/dirac.cfg')
        else:
            return S_ERROR('Local configuration not found')

        result = systemCall(0, cmdList)
        if not result['OK']:
            return result
        status = result['Value'][0]
        if status != 0:
            # Get error messages
            error = []
            output = result['Value'][1].split('\n')
            for line in output:
                line = line.strip()
                if 'error' in line.lower():
                    error.append(line)
            if error:
                message = '\n'.join(error)
            else:
                message = "Failed to update software to %s" % version
            return S_ERROR(message)

        # Check if there is a MySQL installation and fix the server scripts if necessary
        if os.path.exists(InstallTools.mysqlDir):
            startupScript = os.path.join(InstallTools.instancePath, 'mysql',
                                         'share', 'mysql', 'mysql.server')
            if not os.path.exists(startupScript):
                startupScript = os.path.join(InstallTools.instancePath, 'pro',
                                             'mysql', 'share', 'mysql',
                                             'mysql.server')
            if os.path.exists(startupScript):
                InstallTools.fixMySQLScripts(startupScript)

        # For LHCb we need to check Oracle client
        if installOracleClient:
            result = systemCall(0, 'install_oracle-client.sh')
            if not result['OK']:
                return result
            status = result['Value'][0]
            if status != 0:
                # Get error messages
                error = result['Value'][1].split('\n')
                error.extend(result['Value'][2].split('\n'))
                error.append('Failed to install Oracle client module')
                return S_ERROR('\n'.join(error))
        return S_OK()
Beispiel #50
0
    def web_getSelectionData(self):
        callback = {}

        RPC = RPCClient("WorkloadManagement/WMSAdministrator")
        result = yield self.threadTask(RPC.getSiteSummarySelectors)
        gLogger.info("\033[0;31m ++++++: \033[0m %s" % result)
        if result["OK"]:
            result = result["Value"]
            if len(result.get("Status", [])) > 0:
                status = []
                status.append([str("All")])
                for i in result["Status"]:
                    status.append([str(i)])
            else:
                status = [["Nothing to display"]]
            callback["status"] = status
            if len(result.get("GridType", [])) > 0:
                gridtype = []
                gridtype.append([str("All")])
                for i in result["GridType"]:
                    gridtype.append([str(i)])
            else:
                gridtype = [["Nothing to display"]]
            callback["gridtype"] = gridtype
            if len(result.get("MaskStatus", [])) > 0:
                maskstatus = []
                maskstatus.append([str("All")])
                for i in result["MaskStatus"]:
                    maskstatus.append([str(i)])
            else:
                maskstatus = [["Nothing to display"]]
            callback["maskstatus"] = maskstatus
            if len(result.get("Site", [])) > 0:
                s = list(result["Site"])
                tier1 = gConfig.getValue("/Website/PreferredSites", [])
                site = list()
                site.append(["All"])
                for i in tier1:
                    site.append([str(i)])
                for i in s:
                    if i not in tier1:
                        site.append([str(i)])
            else:
                site = [["Error during RPC call"]]
            callback["site"] = site
            if len(result.get("Country", [])) > 0:
                country = []
                country.append(["All"])
                countryCode = self.__getCountries()
                for i in result["Country"]:
                    if countryCode.has_key(i):
                        j = countryCode[i]
                    country.append([str(j)])
            else:
                country = [["Nothing to display"]]
            country.sort()
            callback["country"] = country
        else:
            callback["status"] = [["Error during RPC call"]]
            callback["gridtype"] = [["Error during RPC call"]]
            callback["maskstatus"] = [["Error during RPC call"]]
            callback["site"] = [["Error during RPC call"]]
            callback["country"] = [["Error during RPC call"]]


###
        self.finish(callback)
Beispiel #51
0
from DIRAC.Core.Base import Script

Script.setUsageMessage("""
Get the currently defined user data volume quotas

Usage:
   %s [options]
""" % Script.scriptName)

Script.parseCommandLine(ignoreErrors=False)

import DIRAC
from DIRAC import gLogger, gConfig
from DIRAC.Core.Security.ProxyInfo import getProxyInfo

res = getProxyInfo(False, False)
if not res['OK']:
    gLogger.error("Failed to get client proxy information.", res['Message'])
    DIRAC.exit(2)
proxyInfo = res['Value']
username = proxyInfo['username']

try:
    quota = gConfig.getValue('/Registry/DefaultStorageQuota', 0.)
    quota = gConfig.getValue('/Registry/Users/%s/Quota' % username, quota)
    gLogger.notice('Current quota found to be %.1f GB' % quota)
    DIRAC.exit(0)
except Exception, x:
    gLogger.exception("Failed to convert retrieved quota", '', x)
    DIRAC.exit(-1)
Beispiel #52
0
  def loadModule( self, modName, hideExceptions = False, parentModule = False ):
    """
      Load module name.
      name must take the form [DIRAC System Name]/[DIRAC module]
    """
    while modName and modName[0] == "/":
      modName = modName[1:]
    if modName in self.__modules:
      return S_OK()
    modList = modName.split( "/" )
    if len( modList ) != 2:
      return S_ERROR( "Can't load %s: Invalid module name" % ( modName ) )
    csSection = self.__sectionFinder( modName )
    loadGroup = gConfig.getValue( "%s/Load" % csSection, [] )
    #Check if it's a load group
    if loadGroup:
      gLogger.info( "Found load group %s. Will load %s" % ( modName, ", ".join( loadGroup ) ) )
      for loadModName in loadGroup:
        if loadModName.find( "/" ) == -1:
          loadModName = "%s/%s" % ( modList[0], loadModName )
        result = self.loadModule( loadModName, hideExceptions = hideExceptions, parentModule = False )
        if not result[ 'OK' ]:
          return result
      return S_OK()
    #Normal load
    loadName = gConfig.getValue( "%s/Module" % csSection, "" )
    if not loadName:
      loadName = modName
      gLogger.info( "Loading %s" % ( modName ) )
    else:
      if loadName.find( "/" ) == -1:
        loadName = "%s/%s" % ( modList[0], loadName )
      gLogger.info( "Loading %s (%s)" % ( modName, loadName ) )
    #If already loaded, skip
    loadList = loadName.split( "/" )
    if len( loadList ) != 2:
      return S_ERROR( "Can't load %s: Invalid module name" % ( loadName ) )
    system, module = loadList
    #Load
    className = module
    if self.__modSuffix:
      className = "%s%s" % ( className, self.__modSuffix )
    if loadName not in self.__loadedModules:
      #Check if handler is defined
      loadCSSection = self.__sectionFinder( loadName )
      handlerPath = gConfig.getValue( "%s/HandlerPath" % loadCSSection, "" )
      if handlerPath:
        gLogger.info( "Trying to %s from CS defined path %s" % ( loadName, handlerPath ) )
        gLogger.verbose( "Found handler for %s: %s" % ( loadName, handlerPath ) )
        handlerPath = handlerPath.replace( "/", "." )
        if handlerPath.find( ".py", len( handlerPath ) -3 ) > -1:
          handlerPath = handlerPath[ :-3 ]
        className = List.fromChar( handlerPath, "." )[-1]
        result = self.__recurseImport( handlerPath )
        if not result[ 'OK' ]:
          return S_ERROR( "Cannot load user defined handler %s: %s" % ( handlerPath, result[ 'Message' ] ) )
        gLogger.verbose( "Loaded %s" % handlerPath )
      elif parentModule:
        gLogger.info( "Trying to autodiscover %s from parent" % loadName )
        #If we've got a parent module, load from there.
        modImport = module
        if self.__modSuffix:
          modImport = "%s%s" % ( modImport, self.__modSuffix )
        result = self.__recurseImport( modImport, parentModule, hideExceptions = hideExceptions )
      else:
        #Check to see if the module exists in any of the root modules
        gLogger.info( "Trying to autodiscover %s" % loadName )
        rootModulesToLook = getInstalledExtensions()
        for rootModule in rootModulesToLook:
          importString = '%s.%sSystem.%s.%s' % ( rootModule, system, self.__importLocation, module )
          if self.__modSuffix:
            importString = "%s%s" % ( importString, self.__modSuffix )
          gLogger.verbose( "Trying to load %s" % importString )
          result = self.__recurseImport( importString, hideExceptions = hideExceptions )
          #Error while loading
          if not result[ 'OK' ]:
            return result
          #Something has been found! break :)
          if result[ 'Value' ]:
            gLogger.verbose( "Found %s" % importString )
            break
      #Nothing found
      if not result[ 'Value' ]:
        return S_ERROR( "Could not find %s" % loadName )
      modObj = result[ 'Value' ]
      try:
        #Try to get the class from the module
        modClass = getattr( modObj, className )
      except AttributeError:
        location = ""
        if '__file__' in dir( modObj ):
          location = modObj.__file__
        else:
          location = modObj.__path__
        gLogger.exception( "%s module does not have a %s class!" % ( location, module ) )
        return S_ERROR( "Cannot load %s" % module )
      #Check if it's subclass
      if not issubclass( modClass, self.__superClass ):
        return S_ERROR( "%s has to inherit from %s" % ( loadName, self.__superClass.__name__ ) )
      self.__loadedModules[ loadName ] = { 'classObj' : modClass, 'moduleObj' : modObj }
      #End of loading of 'loadName' module

    #A-OK :)
    self.__modules[ modName ] = self.__loadedModules[ loadName ].copy()
    #keep the name of the real code module
    self.__modules[ modName ][ 'modName' ] = modName
    self.__modules[ modName ][ 'loadName' ] = loadName
    gLogger.notice( "Loaded module %s" % modName )

    return S_OK()
Beispiel #53
0
records = []

if vo is None and not allVOsFlag:
    result = getVOfromProxyGroup()
    if not result['OK']:
        gLogger.error('Failed to determine the user VO')
        DIRAC.exit(-1)
    vo = result['Value']

print(allVOsFlag, noVOFlag, vo)

for se, statusDict in res['Value'].items():

    # Check if the SE is allowed for the user VO
    if not allVOsFlag:
        voList = gConfig.getValue('/Resources/StorageElements/%s/VO' % se, [])
        if noVOFlag and voList:
            continue
        if voList and vo not in voList:
            continue

    record = [se]
    for status in fields[1:]:
        value = statusDict.get(status, 'Unknown')
        record.append(value)
    records.append(record)

printTable(fields, records, numbering=False, sortField='SE')

DIRAC.exit(0)
Beispiel #54
0
    def __init__(self, name, protocols=None, vo=None):
        """ c'tor

    :param str name: SE name
    :param list protocols: requested protocols
    :param vo
    """

        self.methodName = None

        if vo:
            self.vo = vo
        else:
            result = getVOfromProxyGroup()
            if not result['OK']:
                return
            self.vo = result['Value']
        self.opHelper = Operations(vo=self.vo)

        proxiedProtocols = gConfig.getValue(
            '/LocalSite/StorageElements/ProxyProtocols', "").split(',')
        useProxy = (gConfig.getValue(
            "/Resources/StorageElements/%s/AccessProtocol.1/Protocol" % name,
            "UnknownProtocol") in proxiedProtocols)

        if not useProxy:
            useProxy = gConfig.getValue(
                '/LocalSite/StorageElements/%s/UseProxy' % name, False)
        if not useProxy:
            useProxy = self.opHelper.getValue(
                '/Services/StorageElements/%s/UseProxy' % name, False)

        self.valid = True
        if protocols == None:
            res = StorageFactory(useProxy=useProxy,
                                 vo=self.vo).getStorages(name, protocolList=[])
        else:
            res = StorageFactory(useProxy=useProxy, vo=self.vo).getStorages(
                name, protocolList=protocols)
        if not res['OK']:
            self.valid = False
            self.name = name
            self.errorReason = res['Message']
        else:
            factoryDict = res['Value']
            self.name = factoryDict['StorageName']
            self.options = factoryDict['StorageOptions']
            self.localProtocols = factoryDict['LocalProtocols']
            self.remoteProtocols = factoryDict['RemoteProtocols']
            self.storages = factoryDict['StorageObjects']
            self.protocolOptions = factoryDict['ProtocolOptions']
            self.turlProtocols = factoryDict['TurlProtocols']

        self.log = gLogger.getSubLogger("SE[%s]" % self.name)

        self.readMethods = [
            'getFile', 'getAccessUrl', 'getTransportURL', 'prestageFile',
            'prestageFileStatus', 'getDirectory'
        ]

        self.writeMethods = [
            'retransferOnlineFile', 'putFile', 'replicateFile', 'pinFile',
            'releaseFile', 'createDirectory', 'putDirectory'
        ]

        self.removeMethods = ['removeFile', 'removeDirectory']

        self.checkMethods = [
            'exists',
            'getDirectoryMetadata',
            'getDirectorySize',
            'getFileSize',
            'getFileMetadata',
            'listDirectory',
            'isDirectory',
            'isFile',
        ]

        self.okMethods = [
            'getLocalProtocols', 'getPfnForProtocol', 'getPfnForLfn',
            'getPfnPath', 'getProtocols', 'getRemoteProtocols',
            'getStorageElementName', 'getStorageElementOption',
            'getStorageParameters', 'isLocalSE'
        ]
Beispiel #55
0
def getSiteUpdates(vo, bdiiInfo=None, log=None):
    """ Get all the necessary updates for the already defined sites and CEs
  """
    def addToChangeSet(entry, changeSet):
        _section, _option, value, new_value = entry
        if new_value and new_value != value:
            changeSet.add(entry)

    if log is None:
        log = gLogger

    ceBdiiDict = bdiiInfo
    if bdiiInfo is None:
        result = getBdiiCEInfo(vo)
        if not result['OK']:
            return result
        ceBdiiDict = result['Value']

    changeSet = set()
    for site in ceBdiiDict:
        result = getDIRACSiteName(site)
        if not result['OK']:
            continue
        siteNames = result['Value']
        for siteName in siteNames:
            siteSection = cfgPath('/Resources', 'Sites',
                                  siteName.split('.')[0], siteName)
            result = gConfig.getOptionsDict(siteSection)
            if not result['OK']:
                continue
            siteDict = result['Value']
            # Current CS values
            coor = siteDict.get('Coordinates', 'Unknown')
            mail = siteDict.get('Mail', 'Unknown').replace(' ', '')
            description = siteDict.get('Description', 'Unknown')
            description = description.replace(' ,', ',')

            longitude = ceBdiiDict[site].get('GlueSiteLongitude', '').strip()
            latitude = ceBdiiDict[site].get('GlueSiteLatitude', '').strip()

            # Current BDII value
            newcoor = ''
            if longitude and latitude:
                newcoor = "%s:%s" % (longitude, latitude)
            newmail = ceBdiiDict[site].get('GlueSiteSysAdminContact',
                                           '').replace('mailto:', '').strip()
            newdescription = ceBdiiDict[site].get('GlueSiteDescription',
                                                  '').strip()
            # Adding site data to the changes list
            addToChangeSet((siteSection, 'Coordinates', coor, newcoor),
                           changeSet)
            addToChangeSet((siteSection, 'Mail', mail, newmail), changeSet)
            addToChangeSet(
                (siteSection, 'Description', description, newdescription),
                changeSet)

            ces = gConfig.getValue(cfgPath(siteSection, 'CE'), [])
            for ce in ces:
                ceSection = cfgPath(siteSection, 'CEs', ce)
                ceDict = {}
                result = gConfig.getOptionsDict(ceSection)
                if result['OK']:
                    ceDict = result['Value']
                else:
                    if ceBdiiDict[site]['CEs'].get(ce, None):
                        log.notice("Adding new CE %s to site %s/%s" %
                                   (ce, siteName, site))
                ceInfo = ceBdiiDict[site]['CEs'].get(ce, None)
                if ceInfo is None:
                    ceType = ceDict.get('CEType', '')
                    continue

                # Current CS CE info
                arch = ceDict.get('architecture', 'Unknown')
                OS = ceDict.get('OS', 'Unknown')
                si00 = ceDict.get('SI00', 'Unknown')
                ceType = ceDict.get('CEType', 'Unknown')
                ram = ceDict.get('MaxRAM', 'Unknown')
                submissionMode = ceDict.get('SubmissionMode', 'Unknown')

                # Current BDII CE info
                newarch = ceBdiiDict[site]['CEs'][ce].get(
                    'GlueHostArchitecturePlatformType', '').strip()
                systemName = ceInfo.get('GlueHostOperatingSystemName',
                                        '').strip()
                systemVersion = ceInfo.get('GlueHostOperatingSystemVersion',
                                           '').strip()
                systemRelease = ceInfo.get('GlueHostOperatingSystemRelease',
                                           '').strip()
                newOS = ''
                if systemName and systemVersion and systemRelease:
                    newOS = '_'.join(
                        (systemName, systemVersion, systemRelease))
                newsi00 = ceInfo.get('GlueHostBenchmarkSI00', '').strip()
                newCEType = 'Unknown'
                for queue in ceInfo['Queues']:
                    queueDict = ceInfo['Queues'][queue]
                    newCEType = queueDict.get('GlueCEImplementationName',
                                              '').strip()
                    if newCEType:
                        break
                if newCEType == 'ARC-CE':
                    newCEType = 'ARC'

                newSubmissionMode = None
                if newCEType in ['ARC', 'CREAM']:
                    newSubmissionMode = "Direct"
                newRAM = ceInfo.get('GlueHostMainMemoryRAMSize', '').strip()
                # Protect from unreasonable values
                if newRAM and int(newRAM) > 150000:
                    newRAM = ''

                # Adding CE data to the change list
                addToChangeSet((ceSection, 'architecture', arch, newarch),
                               changeSet)
                addToChangeSet((ceSection, 'OS', OS, newOS), changeSet)
                addToChangeSet((ceSection, 'SI00', si00, newsi00), changeSet)
                addToChangeSet((ceSection, 'CEType', ceType, newCEType),
                               changeSet)
                addToChangeSet((ceSection, 'MaxRAM', ram, newRAM), changeSet)
                if submissionMode == "Unknown" and newSubmissionMode:
                    addToChangeSet((ceSection, 'SubmissionMode',
                                    submissionMode, newSubmissionMode),
                                   changeSet)

                queues = ceInfo['Queues'].keys()
                for queue in queues:
                    queueInfo = ceInfo['Queues'][queue]
                    queueStatus = queueInfo['GlueCEStateStatus']
                    queueSection = cfgPath(ceSection, 'Queues', queue)
                    queueDict = {}
                    result = gConfig.getOptionsDict(queueSection)
                    if result['OK']:
                        queueDict = result['Value']
                    else:
                        if queueStatus.lower() == "production":
                            log.notice("Adding new queue %s to CE %s" %
                                       (queue, ce))
                        else:
                            continue

                    # Current CS queue info
                    maxCPUTime = queueDict.get('maxCPUTime', 'Unknown')
                    si00 = queueDict.get('SI00', 'Unknown')
                    maxTotalJobs = queueDict.get('MaxTotalJobs', 'Unknown')

                    # Current BDII queue info
                    newMaxCPUTime = queueInfo.get('GlueCEPolicyMaxCPUTime', '')
                    if newMaxCPUTime == "4" * len(
                            newMaxCPUTime) or newMaxCPUTime == "9" * len(
                                newMaxCPUTime):
                        newMaxCPUTime = ''
                    newSI00 = ''
                    caps = queueInfo['GlueCECapability']
                    if type(caps) == type(''):
                        caps = [caps]
                    for cap in caps:
                        if 'CPUScalingReferenceSI00' in cap:
                            newSI00 = cap.split('=')[-1]

                    # Adding queue info to the CS
                    addToChangeSet((queueSection, 'maxCPUTime', maxCPUTime,
                                    newMaxCPUTime), changeSet)
                    addToChangeSet((queueSection, 'SI00', si00, newSI00),
                                   changeSet)
                    if maxTotalJobs == "Unknown":
                        newTotalJobs = min(
                            1000,
                            int(
                                int(queueInfo.get('GlueCEInfoTotalCPUs', 0)) /
                                2))
                        newWaitingJobs = max(2, int(newTotalJobs * 0.1))
                        newTotalJobs = str(newTotalJobs)
                        newWaitingJobs = str(newWaitingJobs)
                        addToChangeSet(
                            (queueSection, 'MaxTotalJobs', '', newTotalJobs),
                            changeSet)
                        addToChangeSet((queueSection, 'MaxWaitingJobs', '',
                                        newWaitingJobs), changeSet)

                    # Updating eligible VO list
                    VOs = set()
                    if queueDict.get('VO', ''):
                        VOs = set([
                            q.strip()
                            for q in queueDict.get('VO', '').split(',') if q
                        ])
                    if not vo in VOs:
                        VOs.add(vo)
                        VOs = list(VOs)
                        newVOs = ','.join(VOs)
                        addToChangeSet((queueSection, 'VO', '', newVOs),
                                       changeSet)

    return S_OK(changeSet)
Beispiel #56
0
def __getGlue2ShareInfo(host, shareInfoLists):
    """ get information from endpoints, which are the CE at a Site

  :param str host: BDII host to query
  :param dict shareInfoDict: dictionary of GLUE2 parameters belonging to the ComputingShare
  :returns: result structure S_OK/S_ERROR
  """
    executionEnvironments = []
    for _siteName, shareInfoDicts in shareInfoLists.items():
        for shareInfoDict in shareInfoDicts:
            executionEnvironment = shareInfoDict.get(
                'GLUE2ComputingShareExecutionEnvironmentForeignKey', [])
            if not executionEnvironment:
                sLog.error(
                    'No entry for GLUE2ComputingShareExecutionEnvironmentForeignKey',
                    pformat(shareInfoDict))
                continue
            if isinstance(executionEnvironment, six.string_types):
                executionEnvironment = [executionEnvironment]
            executionEnvironments.extend(executionEnvironment)
    resExeInfo = __getGlue2ExecutionEnvironmentInfo(host,
                                                    executionEnvironments)
    if not resExeInfo['OK']:
        sLog.error(
            "Cannot get execution environment info for:",
            str(executionEnvironments)[:100] + "  " + resExeInfo['Message'])
        return resExeInfo
    exeInfos = resExeInfo['Value']

    siteDict = {}
    for siteName, shareInfoDicts in shareInfoLists.items():
        siteDict[siteName] = {'CEs': {}}
        cesDict = siteDict[siteName]['CEs']
        for shareInfoDict in shareInfoDicts:
            ceInfo = {}
            ceInfo['MaxWaitingJobs'] = shareInfoDict.get(
                'GLUE2ComputingShareMaxWaitingJobs', '-1')  # This is not used
            ceInfo['Queues'] = {}
            queueInfo = {}
            queueInfo['GlueCEStateStatus'] = shareInfoDict[
                'GLUE2ComputingShareServingState']
            queueInfo['GlueCEPolicyMaxCPUTime'] = str(
                int(
                    int(
                        shareInfoDict.get('GLUE2ComputingShareMaxCPUTime',
                                          86400)) / 60))
            queueInfo['GlueCEPolicyMaxWallClockTime'] = str(
                int(
                    int(
                        shareInfoDict.get('GLUE2ComputingShareMaxWallTime',
                                          86400)) / 60))
            queueInfo['GlueCEInfoTotalCPUs'] = shareInfoDict.get(
                'GLUE2ComputingShareMaxRunningJobs', '10000')
            queueInfo['GlueCECapability'] = ['CPUScalingReferenceSI00=2552']

            try:
                maxNOPfromCS = gConfig.getValue(
                    '/Resources/Computing/CEDefaults/GLUE2ComputingShareMaxSlotsPerJob_limit',
                    8)
                maxNOPfromGLUE = int(
                    shareInfoDict.get('GLUE2ComputingShareMaxSlotsPerJob', 1))
                numberOfProcs = min(maxNOPfromGLUE, maxNOPfromCS)
                queueInfo['NumberOfProcessors'] = numberOfProcs
                if numberOfProcs != maxNOPfromGLUE:
                    sLog.info(
                        'Limited NumberOfProcessors for', '%s from %s to %s' %
                        (siteName, maxNOPfromGLUE, numberOfProcs))
            except ValueError:
                sLog.error(
                    "Bad content for GLUE2ComputingShareMaxSlotsPerJob:",
                    siteName + ' ' +
                    shareInfoDict.get('GLUE2ComputingShareMaxSlotsPerJob'))
                queueInfo['NumberOfProcessors'] = 1

            executionEnvironment = shareInfoDict.get(
                'GLUE2ComputingShareExecutionEnvironmentForeignKey', [])
            if isinstance(executionEnvironment, six.string_types):
                executionEnvironment = [executionEnvironment]
            resExeInfo = __getGlue2ExecutionEnvironmentInfoForSite(
                siteName, executionEnvironment, exeInfos)
            if not resExeInfo['OK']:
                continue

            exeInfo = resExeInfo.get('Value')
            if not exeInfo:
                sLog.error(
                    'Using dummy values. Did not find information for execution environment',
                    siteName)
                exeInfo = {
                    'GlueHostMainMemoryRAMSize':
                    '1999',  # intentionally identifiably dummy value
                    'GlueHostOperatingSystemVersion': '',
                    'GlueHostOperatingSystemName': '',
                    'GlueHostOperatingSystemRelease': '',
                    'GlueHostArchitecturePlatformType': 'x86_64',
                    'GlueHostBenchmarkSI00':
                    '2500',  # needed for the queue to be used by the sitedirector
                    'MANAGER':
                    'manager:unknownBatchSystem',  # need some value for ARC
                }
            else:
                sLog.info('Found information for execution environment for',
                          siteName)

            # sometimes the time is still in hours
            maxCPUTime = int(queueInfo['GlueCEPolicyMaxCPUTime'])
            if maxCPUTime in [12, 24, 36, 48, 168]:
                queueInfo['GlueCEPolicyMaxCPUTime'] = str(maxCPUTime * 60)
                queueInfo['GlueCEPolicyMaxWallClockTime'] = str(
                    int(queueInfo['GlueCEPolicyMaxWallClockTime']) * 60)

            ceInfo.update(exeInfo)
            shareEndpoints = shareInfoDict.get('GLUE2ShareEndpointForeignKey',
                                               [])
            if isinstance(shareEndpoints, six.string_types):
                shareEndpoints = [shareEndpoints]
            for endpoint in shareEndpoints:
                ceType = endpoint.rsplit('.', 1)[1]
                # get queue Name, in CREAM this is behind GLUE2entityOtherInfo...
                if ceType == 'CREAM':
                    for otherInfo in shareInfoDict['GLUE2EntityOtherInfo']:
                        if otherInfo.startswith('CREAMCEId'):
                            queueName = otherInfo.split('/', 1)[1]
                            # creamCEs are EOL soon, ignore any info they have
                            if queueInfo.pop('NumberOfProcessors', 1) != 1:
                                sLog.verbose(
                                    'Ignoring MaxSlotsPerJob option for CreamCE',
                                    endpoint)

                # HTCondorCE, htcondorce
                elif ceType.lower().endswith('htcondorce'):
                    ceType = 'HTCondorCE'
                    queueName = 'condor'

                else:
                    sLog.error(
                        'Unknown CE Type, please check the available information',
                        ceType)
                    continue

                queueInfo['GlueCEImplementationName'] = ceType
                ceName = endpoint.split('_', 1)[0]
                cesDict.setdefault(ceName, {})
                existingQueues = dict(cesDict[ceName].get('Queues', {}))
                existingQueues[queueName] = queueInfo
                ceInfo['Queues'] = existingQueues
                cesDict[ceName].update(ceInfo)

            # ARC CEs do not have endpoints, we have to try something else to get the information about the queue etc.
            try:
                if not shareEndpoints and shareInfoDict[
                        'GLUE2ShareID'].startswith('urn:ogf'):
                    exeInfo = dict(exeInfo)  # silence pylint about tuples
                    queueInfo['GlueCEImplementationName'] = 'ARC'
                    managerName = exeInfo.pop('MANAGER',
                                              '').split(' ',
                                                        1)[0].rsplit(':', 1)[1]
                    managerName = managerName.capitalize(
                    ) if managerName == 'condor' else managerName
                    queueName = 'nordugrid-%s-%s' % (
                        managerName,
                        shareInfoDict['GLUE2ComputingShareMappingQueue'])
                    ceName = shareInfoDict['GLUE2ShareID'].split(
                        'ComputingShare:')[1].split(':')[0]
                    cesDict.setdefault(ceName, {})
                    existingQueues = dict(cesDict[ceName].get('Queues', {}))
                    existingQueues[queueName] = queueInfo
                    ceInfo['Queues'] = existingQueues
                    cesDict[ceName].update(ceInfo)
            except Exception:
                sLog.error('Exception in ARC part for site:', siteName)

    return S_OK(siteDict)
Beispiel #57
0
def getSRMUpdates(vo, bdiiInfo=None):

    changeSet = set()

    def addToChangeSet(entry, changeSet):
        _section, _option, value, new_value = entry
        if new_value and new_value != value:
            changeSet.add(entry)

    result = getGridSRMs(vo, bdiiInfo=bdiiInfo)
    if not result['OK']:
        return result
    srmBdiiDict = result['Value']

    result = getSEsFromCS()
    if not result['OK']:
        return result
    seDict = result['Value']

    result = getVOs()
    if result['OK']:
        csVOs = set(result['Value'])
    else:
        csVOs = set([vo])

    for seHost, diracSE in seDict.items():
        seSection = '/Resources/StorageElements/%s' % diracSE[0]
        # Look up existing values first
        description = gConfig.getValue(cfgPath(seSection, 'Description'),
                                       'Unknown')
        backend = gConfig.getValue(cfgPath(seSection, 'BackendType'),
                                   'Unknown')
        vos = gConfig.getValue(cfgPath(seSection, 'VO'),
                               'Unknown').replace(' ', '')
        size = gConfig.getValue(cfgPath(seSection, 'TotalSize'), 'Unknown')
        # Look up current BDII values
        srmDict = {}
        seBdiiDict = {}
        for site in srmBdiiDict:
            if seHost in srmBdiiDict[site]:
                srmDict = srmBdiiDict[site][seHost]['SRM']
                seBdiiDict = srmBdiiDict[site][seHost]['SE']
                break

        if not srmDict or not seBdiiDict:
            continue

        newDescription = seBdiiDict.get('GlueSEName', 'Unknown')
        newBackend = seBdiiDict.get('GlueSEImplementationName', 'Unknown')
        newSize = seBdiiDict.get('GlueSESizeTotal', 'Unknown')
        addToChangeSet((seSection, 'Description', description, newDescription),
                       changeSet)
        addToChangeSet((seSection, 'BackendType', backend, newBackend),
                       changeSet)
        addToChangeSet((seSection, 'TotalSize', size, newSize), changeSet)

        # Evaluate VOs if no space token defined, otherwise this is VO specific
        spaceToken = ''
        for i in range(1, 10):
            protocol = gConfig.getValue(
                cfgPath(seSection, 'AccessProtocol.%d' % i, 'Protocol'), '')
            if protocol.lower() == 'srm':
                spaceToken = gConfig.getValue(
                    cfgPath(seSection, 'AccessProtocol.%d' % i, 'SpaceToken'),
                    '')
                break
        if not spaceToken:
            bdiiVOs = srmDict.get('GlueServiceAccessControlBaseRule', [])
            bdiiVOs = set([re.sub('^VO:', '', rule) for rule in bdiiVOs])
            seVOs = csVOs.intersection(bdiiVOs)
            newVOs = ','.join(seVOs)
            addToChangeSet((seSection, 'VO', vos, newVOs), changeSet)

    return S_OK(changeSet)
Beispiel #58
0
def fillVisList(vdict, num):

  # Assuming that, if there's only one element in the list of output visibility flags, every step will catch that flag
  if len( vdict ) == 1:
    val = vdict[vdict.keys()[0]]
    vdict = dict( [(str(i), val) for i in range(int(vdict.keys()[0]), int(vdict.keys()[0])+num)] )
  # Another assumption: if the number of steps is bigger than that of vis flags, then extend the list with the last flag available
  # to fill the "holes"
  #if len(vlist) < len(slist):
  #  vlist.extend( vlist[-1] * (len(slist) - len(vlist)) )

  return vdict

gLogger = gLogger.getSubLogger( 'LaunchingRequest_run.py' )
currentSetup = gConfig.getValue( 'DIRAC/Setup' )

pr = ProductionRequest()

stepsList = [ '{{p1Step}}' ]
stepsList.append( '{{p2Step}}' )
stepsList.append( '{{p3Step}}' )
stepsList.append( '{{p4Step}}' )
stepsList.append( '{{p5Step}}' )
stepsList.append( '{{p6Step}}' )
stepsList.append( '{{p7Step}}' )
stepsList.append( '{{p8Step}}' )
stepsList.append( '{{p9Step}}' )
stepsList.append( '{{p10Step}}' )
stepsList.append( '{{p11Step}}' )
stepsList.append( '{{p12Step}}' )
Beispiel #59
0
    def sync(self):
        """ sync. 
    
    Reads from ComponentsMonitoringDB and prepares entries on RSS ComponentStatus
    table.  
    
    """

        #TODO: delete from RSS if not anymore on ComponentsMonitoringDB

        setup = gConfig.getValue('DIRAC/Setup')

        components = self.compoDB.getComponentsStatus({'Setup': setup})
        if not components['OK']:
            return components
        components = components['Value'][0][setup]

        for agentName, agentsList in components['agent'].iteritems():

            for agentDict in agentsList:

                if agentDict['Status'] == 'Error':
                    self.log.warn('%(ComponentName)s %(Message)s' % agentDict)
                    continue

                res = self.rsClient.addIfNotThereStatusElement(
                    'Component',
                    'Status',
                    name=agentName,
                    statusType=agentDict['Host'],
                    status='Unknown',
                    elementType='Agent',
                    reason='Synchronized',
                )
                if not res['OK']:
                    return res

        for serviceName, servicesList in components['service'].iteritems():

            for serviceDict in servicesList:

                if serviceDict['Status'] == 'Error':
                    self.log.warn('%(ComponentName)s %(Message)s' %
                                  serviceDict)
                    continue

                res = self.rsClient.addIfNotThereStatusElement(
                    'Component',
                    'Status',
                    name=serviceName,
                    statusType='%(Host)s:%(Port)s' % serviceDict,
                    status='Unknown',
                    elementType='Service',
                    reason='Synchronized',
                )
                if not res['OK']:
                    return res

        return S_OK()


#...............................................................................
#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF
Beispiel #60
0
    def getQueues(self, resourceDict):
        """ Get the list of relevant CEs and their descriptions
    """

        self.queueDict = {}
        ceFactory = ComputingElementFactory()

        for site in resourceDict:
            for ce in resourceDict[site]:
                ceDict = resourceDict[site][ce]
                qDict = ceDict.pop('Queues')
                for queue in qDict:
                    queueName = '%s_%s' % (ce, queue)
                    self.queueDict[queueName] = {}
                    self.queueDict[queueName]['ParametersDict'] = qDict[queue]
                    self.queueDict[queueName]['ParametersDict'][
                        'Queue'] = queue
                    self.queueDict[queueName]['ParametersDict']['Site'] = site
                    self.queueDict[queueName]['ParametersDict'][
                        'GridEnv'] = self.gridEnv
                    self.queueDict[queueName]['ParametersDict'][
                        'Setup'] = gConfig.getValue('/DIRAC/Setup', 'unknown')
                    # Evaluate the CPU limit of the queue according to the Glue convention
                    # To Do: should be a utility
                    if "maxCPUTime" in self.queueDict[queueName]['ParametersDict'] and \
                       "SI00" in self.queueDict[queueName]['ParametersDict']:
                        maxCPUTime = float(self.queueDict[queueName]
                                           ['ParametersDict']['maxCPUTime'])
                        # For some sites there are crazy values in the CS
                        maxCPUTime = max(maxCPUTime, 0)
                        maxCPUTime = min(maxCPUTime, 86400 * 12.5)
                        si00 = float(self.queueDict[queueName]
                                     ['ParametersDict']['SI00'])
                        queueCPUTime = 60. / 250. * maxCPUTime * si00
                        self.queueDict[queueName]['ParametersDict'][
                            'CPUTime'] = int(queueCPUTime)
                    qwDir = os.path.join(self.workingDirectory, queue)
                    if not os.path.exists(qwDir):
                        os.makedirs(qwDir)
                    self.queueDict[queueName]['ParametersDict'][
                        'WorkingDirectory'] = qwDir

                    platform = ''
                    if "Platform" in self.queueDict[queueName][
                            'ParametersDict']:
                        platform = self.queueDict[queueName]['ParametersDict'][
                            'Platform']
                    elif "Platform" in ceDict:
                        platform = ceDict['Platform']
                    elif "OS" in ceDict:
                        architecture = ceDict.get('architecture', 'x86_64')
                        OS = ceDict['OS']
                        platform = '_'.join([architecture, OS])
                    if platform and not platform in self.platforms:
                        self.platforms.append(platform)

                    if not "Platform" in self.queueDict[queueName][
                            'ParametersDict'] and platform:
                        result = Resources.getDIRACPlatform(platform)
                        if result['OK']:
                            self.queueDict[queueName]['ParametersDict'][
                                'Platform'] = result['Value']

                    ceQueueDict = dict(ceDict)
                    ceQueueDict.update(
                        self.queueDict[queueName]['ParametersDict'])
                    result = ceFactory.getCE(ceName=ce,
                                             ceType=ceDict['CEType'],
                                             ceParametersDict=ceQueueDict)
                    if not result['OK']:
                        return result
                    self.queueDict[queueName]['CE'] = result['Value']
                    self.queueDict[queueName]['CEName'] = ce
                    self.queueDict[queueName]['CEType'] = ceDict['CEType']
                    self.queueDict[queueName]['Site'] = site
                    self.queueDict[queueName]['QueueName'] = queue
                    result = self.queueDict[queueName]['CE'].isValid()
                    if not result['OK']:
                        self.log.fatal(result['Message'])
                        return result
                    if 'BundleProxy' in self.queueDict[queueName][
                            'ParametersDict']:
                        self.queueDict[queueName]['BundleProxy'] = True
                    elif 'BundleProxy' in ceDict:
                        self.queueDict[queueName]['BundleProxy'] = True

                    if site not in self.sites:
                        self.sites.append(site)

        return S_OK()