コード例 #1
0
  def initialize( self ):
    """ Agent initialization.

        The extensions MUST provide in the initialize method the following data members:
        - TransformationClient objects (self.transClient),
        - set the shifterProxy if different from the default one set here ('ProductionManager')
        - list of transformation types to be looked (self.transType)
    """

    gMonitor.registerActivity( "SubmittedTasks", "Automatically submitted tasks", "Transformation Monitoring", "Tasks",
                               gMonitor.OP_ACUM )

    self.pluginLocation = self.am_getOption( 'PluginLocation', 'DIRAC.TransformationSystem.Client.TaskManagerPlugin' )

    # Default clients
    self.transClient = TransformationClient()

    # Bulk submission flag
    self.bulkSubmissionFlag = self.am_getOption( 'BulkSubmission', False )

    # setting up the threading
    maxNumberOfThreads = self.am_getOption( 'maxNumberOfThreads', 15 )
    threadPool = ThreadPool( maxNumberOfThreads, maxNumberOfThreads )
    self.log.verbose( "Multithreaded with %d threads" % maxNumberOfThreads )

    for i in xrange( maxNumberOfThreads ):
      threadPool.generateJobAndQueueIt( self._execute, [i] )

    return S_OK()
コード例 #2
0
    def initialize(self):
        """ Agent initialization.

        The extensions MUST provide in the initialize method the following data members:
        - TransformationClient objects (self.transClient),
        - set the shifterProxy if different from the default one set here ('ProductionManager')
        - list of transformation types to be looked (self.transType)
    """

        gMonitor.registerActivity("SubmittedTasks",
                                  "Automatically submitted tasks",
                                  "Transformation Monitoring", "Tasks",
                                  gMonitor.OP_ACUM)

        self.pluginLocation = self.am_getOption(
            'PluginLocation',
            'DIRAC.TransformationSystem.Client.TaskManagerPlugin')

        # Default clients
        self.transClient = TransformationClient()

        # Bulk submission flag
        self.bulkSubmissionFlag = self.am_getOption('BulkSubmission', False)

        # setting up the threading
        maxNumberOfThreads = self.am_getOption('maxNumberOfThreads', 15)
        threadPool = ThreadPool(maxNumberOfThreads, maxNumberOfThreads)
        self.log.verbose("Multithreaded with %d threads" % maxNumberOfThreads)

        for i in xrange(maxNumberOfThreads):
            threadPool.generateJobAndQueueIt(self._execute, [i])

        return S_OK()
コード例 #3
0
class MyProxyRenewalAgent(AgentModule):

  def initialize(self):

    requiredLifeTime = self.am_getOption( "MinimumLifeTime", 3600 )
    renewedLifeTime = self.am_getOption( "RenewedLifeTime", 54000 )
    myProxyServer = gConfig.getValue( "/DIRAC/VOPolicy/MyProxyServer" , "myproxy.cern.ch" )
    self.proxyDB = ProxyDB( requireVoms = True,
                            useMyProxy = True
                          )

    gLogger.info( "Minimum Life time      : %s" % requiredLifeTime )
    gLogger.info( "Life time on renew     : %s" % renewedLifeTime )
    gLogger.info( "MyProxy server         : %s" % self.proxyDB.getMyProxyServer() )
    gLogger.info( "MyProxy max proxy time : %s" % self.proxyDB.getMyProxyMaxLifeTime() )

    self.__threadPool = ThreadPool( 1, 10 )
    return S_OK()

  def __renewProxyForCredentials( self, userDN, userGroup ):
    lifeTime = self.am_getOption( "RenewedLifeTime", 54000 )
    gLogger.info( "Renewing for %s@%s %s secs" % ( userDN, userGroup, lifeTime ) )
    retVal = self.proxyDB.renewFromMyProxy( userDN,
                                            userGroup,
                                            lifeTime = lifeTime )
    if not retVal[ 'OK' ]:
      gLogger.error( "Failed to renew for %s@%s : %s" %( userDN, userGroup, retVal[ 'Message' ] ) )
    else:
      gLogger.info( "Renewed proxy for %s@%s" % ( userDN, userGroup ) )

  def __treatRenewalCallback( self, oTJ, exceptionList ):
    gLogger.exception( lException = exceptionList )

  def execute(self):
    """ The main agent execution method
    """
    self.proxyDB.purgeLogs()
    gLogger.info( "Purging expired requests" )
    retVal = self.proxyDB.purgeExpiredRequests()
    if retVal[ 'OK' ]:
      gLogger.info( " purged %s requests" % retVal[ 'Value' ] )
    gLogger.info( "Purging expired proxies" )
    retVal = self.proxyDB.purgeExpiredProxies()
    if retVal[ 'OK' ]:
      gLogger.info( " purged %s proxies" % retVal[ 'Value' ] )
    retVal = self.proxyDB.getCredentialsAboutToExpire( self.am_getOption( "MinimumLifeTime" , 3600 ) )
    if not retVal[ 'OK' ]:
      return retVal
    data = retVal[ 'Value' ]
    gLogger.info( "Renewing %s proxies..." % len( data ) )
    for record in data:
      userDN = record[0]
      userGroup = record[1]
      self.__threadPool.generateJobAndQueueIt( self.__renewProxyForCredentials,
                                               args = ( userDN, userGroup ),
                                               oExceptionCallback = self.__treatRenewalCallback )
    self.__threadPool.processAllResults()
    return S_OK()
コード例 #4
0
class MyProxyRenewalAgent(AgentModule):

  def initialize(self):

    requiredLifeTime = self.am_getOption( "MinimumLifeTime", 3600 )
    renewedLifeTime = self.am_getOption( "RenewedLifeTime", 54000 )
    self.proxyDB = ProxyDB( useMyProxy = True )

    gLogger.info( "Minimum Life time      : %s" % requiredLifeTime )
    gLogger.info( "Life time on renew     : %s" % renewedLifeTime )
    gLogger.info( "MyProxy server         : %s" % self.proxyDB.getMyProxyServer() )
    gLogger.info( "MyProxy max proxy time : %s" % self.proxyDB.getMyProxyMaxLifeTime() )

    self.__threadPool = ThreadPool( 1, 10 )
    return S_OK()

  def __renewProxyForCredentials( self, userDN, userGroup ):
    lifeTime = self.am_getOption( "RenewedLifeTime", 54000 )
    gLogger.info( "Renewing for %s@%s %s secs" % ( userDN, userGroup, lifeTime ) )
    retVal = self.proxyDB.renewFromMyProxy( userDN,
                                            userGroup,
                                            lifeTime = lifeTime )
    if not retVal[ 'OK' ]:
      gLogger.error( "Failed to renew proxy", "for %s@%s : %s" %( userDN, userGroup, retVal[ 'Message' ] ) )
    else:
      gLogger.info( "Renewed proxy for %s@%s" % ( userDN, userGroup ) )

  def __treatRenewalCallback( self, oTJ, exceptionList ):
    gLogger.exception( lException = exceptionList )

  def execute(self):
    """ The main agent execution method
    """
    self.proxyDB.purgeLogs()
    gLogger.info( "Purging expired requests" )
    retVal = self.proxyDB.purgeExpiredRequests()
    if retVal[ 'OK' ]:
      gLogger.info( " purged %s requests" % retVal[ 'Value' ] )
    gLogger.info( "Purging expired proxies" )
    retVal = self.proxyDB.purgeExpiredProxies()
    if retVal[ 'OK' ]:
      gLogger.info( " purged %s proxies" % retVal[ 'Value' ] )
    retVal = self.proxyDB.getCredentialsAboutToExpire( self.am_getOption( "MinimumLifeTime" , 3600 ) )
    if not retVal[ 'OK' ]:
      return retVal
    data = retVal[ 'Value' ]
    gLogger.info( "Renewing %s proxies..." % len( data ) )
    for record in data:
      userDN = record[0]
      userGroup = record[1]
      self.__threadPool.generateJobAndQueueIt( self.__renewProxyForCredentials,
                                               args = ( userDN, userGroup ),
                                               oExceptionCallback = self.__treatRenewalCallback )
    self.__threadPool.processAllResults()
    return S_OK()
コード例 #5
0
    def initialize(self):
        """ standard initialize
    """
        # few parameters
        self.pluginLocation = self.am_getOption(
            'PluginLocation',
            'DIRAC.TransformationSystem.Agent.TransformationPlugin')
        self.transformationStatus = self.am_getOption(
            'transformationStatus', ['Active', 'Completing', 'Flush'])
        # Prepare to change the name of the CS option as MaxFiles is ambiguous
        self.maxFiles = self.am_getOption('MaxFilesToProcess',
                                          self.am_getOption('MaxFiles', 5000))

        agentTSTypes = self.am_getOption('TransformationTypes', [])
        if agentTSTypes:
            self.transformationTypes = sorted(agentTSTypes)
        else:
            dataProc = Operations().getValue('Transformations/DataProcessing',
                                             ['MCSimulation', 'Merge'])
            dataManip = Operations().getValue(
                'Transformations/DataManipulation', ['Replication', 'Removal'])
            self.transformationTypes = sorted(dataProc + dataManip)

        # clients
        self.transfClient = TransformationClient()

        # for caching using a pickle file
        self.workDirectory = self.am_getWorkDirectory()
        self.cacheFile = os.path.join(self.workDirectory, 'ReplicaCache.pkl')
        self.controlDirectory = self.am_getControlDirectory()

        # remember the offset if any in TS
        self.lastFileOffset = {}

        # Validity of the cache
        self.replicaCache = {}
        self.replicaCacheValidity = self.am_getOption('ReplicaCacheValidity',
                                                      2)

        self.noUnusedDelay = self.am_getOption('NoUnusedDelay', 6)

        # Get it threaded
        maxNumberOfThreads = self.am_getOption('maxThreadsInPool', 1)
        threadPool = ThreadPool(maxNumberOfThreads, maxNumberOfThreads)
        self.log.info("Multithreaded with %d threads" % maxNumberOfThreads)

        for i in xrange(maxNumberOfThreads):
            threadPool.generateJobAndQueueIt(self._execute, [i])

        self.log.info("Will treat the following transformation types: %s" %
                      str(self.transformationTypes))

        return S_OK()
コード例 #6
0
class SystemAdministratorIntegrator:
    def __init__(self, **kwargs):
        """ Constructor  
    """
        if 'hosts' in kwargs:
            self.__hosts = kwargs['hosts']
            del kwargs['hosts']
        else:
            result = Registry.getHosts()
            if result['OK']:
                self.__hosts = result['Value']
            else:
                self.__hosts = []

        self.__kwargs = dict(kwargs)
        self.__pool = ThreadPool(len(self.__hosts))
        self.__resultDict = {}

    def __getattr__(self, name):
        self.call = name
        return self.execute

    def __executeClient(self, host, method, *parms, **kwargs):
        """ Execute RPC method on a given host 
    """
        hostName = Registry.getHostOption(host, 'Host', host)
        client = SystemAdministratorClient(hostName, **self.__kwargs)
        result = getattr(client, method)(*parms, **kwargs)
        result['Host'] = host
        return result

    def __processResult(self, id_, result):
        """ Collect results in the final structure
    """
        host = result['Host']
        del result['Host']
        self.__resultDict[host] = result

    def execute(self, *args, **kwargs):
        """ Main execution method
    """
        self.__resultDict = {}
        for host in self.__hosts:
            self.__pool.generateJobAndQueueIt(self.__executeClient,
                                              args=[host, self.call] +
                                              list(args),
                                              kwargs=kwargs,
                                              oCallback=self.__processResult)

        self.__pool.processAllResults()
        return S_OK(self.__resultDict)
コード例 #7
0
class SystemAdministratorIntegrator:

  def __init__( self, **kwargs ):
    """ Constructor  
    """
    if 'hosts' in kwargs:
      self.__hosts = kwargs['hosts']
      del kwargs['hosts']
    else:  
      result = Registry.getHosts()
      if result['OK']:
        self.__hosts = result['Value']
      else:
        self.__hosts = []
      
    self.__kwargs = dict( kwargs )  
    self.__pool = ThreadPool( len( self.__hosts ) )  
    self.__resultDict = {}
      
  def __getattr__( self, name ):
    self.call = name
    return self.execute

  def __executeClient( self, host, method, *parms, **kwargs ):
    """ Execute RPC method on a given host 
    """        
    hostName = Registry.getHostOption( host, 'Host', host)
    client = SystemAdministratorClient( hostName, **self.__kwargs )
    result = getattr( client, method )( *parms, **kwargs )
    result['Host'] = host   
    return result
    
  def __processResult( self, id_, result ):
    """ Collect results in the final structure
    """
    host = result['Host']
    del result['Host']
    self.__resultDict[host] = result  
       
  def execute(self, *args, **kwargs ):
    """ Main execution method
    """
    self.__resultDict = {}
    for host in self.__hosts:
      self.__pool.generateJobAndQueueIt( self.__executeClient,
                                         args = [ host, self.call ] + list(args),
                                         kwargs = kwargs,
                                         oCallback = self.__processResult )
    
    self.__pool.processAllResults()
    return S_OK( self.__resultDict )    
コード例 #8
0
ファイル: ServiceInterface.py プロジェクト: pmusset/DIRAC
    def _updateServiceConfiguration(self, urlSet, fromMaster=False):
        """
    Update configuration in a set of service in parallel

    :param set urlSet: a set of service URLs
    :param fromMaster: flag to force updating from the master CS
    :return: Nothing
    """
        pool = ThreadPool(len(urlSet))
        for url in urlSet:
            pool.generateJobAndQueueIt(self._forceServiceUpdate,
                                       args=[url, fromMaster],
                                       kwargs={},
                                       oCallback=self.__processResults)
        pool.processAllResults()
コード例 #9
0
ファイル: ServiceInterface.py プロジェクト: sparsh35/DIRAC
  def __updateServiceConfiguration(self, urlSet, fromMaster=False):
    """
    Update configuration in a set of service in parallel

    :param set urlSet: a set of service URLs
    :param fromMaster: flag to force updating from the master CS
    :return: S_OK/S_ERROR, Value Successful/Failed dict with service URLs
    """
    pool = ThreadPool(len(urlSet))
    for url in urlSet:
      pool.generateJobAndQueueIt(self.__forceServiceUpdate,
                                 args=[url, fromMaster],
                                 kwargs={},
                                 oCallback=self.__processResults)
    pool.processAllResults()
    return S_OK(self.__updateResultDict)
コード例 #10
0
  def initialize(self):
    """ standard initialize
    """
    # few parameters
    self.pluginLocation = self.am_getOption('PluginLocation',
                                            'DIRAC.TransformationSystem.Agent.TransformationPlugin')
    self.transformationStatus = self.am_getOption('transformationStatus', ['Active', 'Completing', 'Flush'])
    # Prepare to change the name of the CS option as MaxFiles is ambiguous
    self.maxFiles = self.am_getOption('MaxFilesToProcess', self.am_getOption('MaxFiles', 5000))

    agentTSTypes = self.am_getOption('TransformationTypes', [])
    if agentTSTypes:
      self.transformationTypes = sorted(agentTSTypes)
    else:
      dataProc = Operations().getValue('Transformations/DataProcessing', ['MCSimulation', 'Merge'])
      dataManip = Operations().getValue('Transformations/DataManipulation', ['Replication', 'Removal'])
      self.transformationTypes = sorted(dataProc + dataManip)

    # clients
    self.transfClient = TransformationClient()

    # for caching using a pickle file
    self.workDirectory = self.am_getWorkDirectory()
    self.cacheFile = os.path.join(self.workDirectory, 'ReplicaCache.pkl')
    self.controlDirectory = self.am_getControlDirectory()

    # remember the offset if any in TS
    self.lastFileOffset = {}

    # Validity of the cache
    self.replicaCache = {}
    self.replicaCacheValidity = self.am_getOption('ReplicaCacheValidity', 2)

    self.noUnusedDelay = self.am_getOption('NoUnusedDelay', 6)

    # Get it threaded
    maxNumberOfThreads = self.am_getOption('maxThreadsInPool', 1)
    threadPool = ThreadPool(maxNumberOfThreads, maxNumberOfThreads)
    self.log.info("Multithreaded with %d threads" % maxNumberOfThreads)

    for i in xrange(maxNumberOfThreads):
      threadPool.generateJobAndQueueIt(self._execute, [i])

    self.log.info("Will treat the following transformation types: %s" % str(self.transformationTypes))

    return S_OK()
コード例 #11
0
ファイル: TransformationAgent.py プロジェクト: corionma/DIRAC
  def initialize( self ):
    """ standard initialize
    """
    # few parameters
    self.pluginLocation = self.am_getOption( 'PluginLocation',
                                             'DIRAC.TransformationSystem.Agent.TransformationPlugin' )
    self.transformationStatus = self.am_getOption( 'transformationStatus', ['Active', 'Completing', 'Flush'] )
    self.maxFiles = self.am_getOption( 'MaxFiles', 5000 )

    agentTSTypes = self.am_getOption( 'TransformationTypes', [] )
    if agentTSTypes:
      self.transformationTypes = sorted( agentTSTypes )
    else:
      dataProc = Operations().getValue( 'Transformations/DataProcessing', ['MCSimulation', 'Merge'] )
      dataManip = Operations().getValue( 'Transformations/DataManipulation', ['Replication', 'Removal'] )
      self.transformationTypes = sorted( dataProc + dataManip )



    # clients
    self.transfClient = TransformationClient()

    # shifter
    self.am_setOption( 'shifterProxy', 'ProductionManager' )

    # for caching using a pickle file
    self.__readCache()
    self.workDirectory = self.am_getWorkDirectory()
    self.cacheFile = os.path.join( self.workDirectory, 'ReplicaCache.pkl' )
    self.controlDirectory = self.am_getControlDirectory()
    self.replicaCacheValidity = self.am_getOption( 'ReplicaCacheValidity', 2 )
    self.noUnusedDelay = self.am_getOption( 'NoUnusedDelay', 6 )
    self.dateWriteCache = datetime.datetime.utcnow()

    # Get it threaded
    maxNumberOfThreads = self.am_getOption( 'maxThreadsInPool', 1 )
    threadPool = ThreadPool( maxNumberOfThreads, maxNumberOfThreads )
    self.log.info( "Multithreaded with %d threads" % maxNumberOfThreads )

    for i in xrange( maxNumberOfThreads ):
      threadPool.generateJobAndQueueIt( self._execute, [i] )

    self.log.info( "Will treat the following transformation types: %s" % str( self.transformationTypes ) )

    return S_OK()
コード例 #12
0
ファイル: TransformationAgent.py プロジェクト: hanyl/DIRAC
  def initialize( self ):
    """ standard initialize
    """

    self.__readCache()
    self.dateWriteCache = datetime.datetime.utcnow()

    self.am_setOption( 'shifterProxy', 'ProductionManager' )

    # Get it threaded
    maxNumberOfThreads = self.am_getOption( 'maxThreadsInPool', 1 )
    threadPool = ThreadPool( maxNumberOfThreads, maxNumberOfThreads )
    self.log.info( "Multithreaded with %d threads" % maxNumberOfThreads )

    for i in xrange( maxNumberOfThreads ):
      threadPool.generateJobAndQueueIt( self._execute, [i] )

    return S_OK()
コード例 #13
0
  def initialize(self):
    """Sets default parameters
    """
    self.jobDB = JobDB()
    self.logDB = JobLoggingDB()
    self.am_setOption('PollingTime', 60 * 60)
    if not self.am_getOption('Enable', True):
      self.log.info('Stalled Job Agent running in disabled mode')

    # setting up the threading
    maxNumberOfThreads = self.am_getOption('MaxNumberOfThreads', 15)
    threadPool = ThreadPool(maxNumberOfThreads, maxNumberOfThreads)
    self.log.verbose("Multithreaded with %d threads" % maxNumberOfThreads)

    for _ in range(maxNumberOfThreads):
      threadPool.generateJobAndQueueIt(self._execute)

    return S_OK()
コード例 #14
0
  def initialize(self):
    """ Make the necessary initializations.
        The ThreadPool is created here, the _execute() method is what each thread will execute.
    """

    self.fullUpdatePeriod = self.am_getOption('FullUpdatePeriod', self.fullUpdatePeriod)
    self.bkUpdateLatency = self.am_getOption('BKUpdateLatency', self.bkUpdateLatency)
    self.debug = self.am_getOption('verbose', self.debug)

    self.pickleFile = os.path.join(self.am_getWorkDirectory(), self.pickleFile)
    self.chunkSize = self.am_getOption('maxFilesPerChunk', self.chunkSize)

    self.pluginsWithNoRunInfo = Operations().getValue('TransformationPlugins/PluginsWithNoRunInfo',
                                                      self.pluginsWithNoRunInfo)

    self._logInfo('Full Update Period: %d seconds' % self.fullUpdatePeriod)
    self._logInfo('BK update latency : %d seconds' % self.bkUpdateLatency)
    self._logInfo('Plugins with no run info: %s' % ', '.join(self.pluginsWithNoRunInfo))

    self.transClient = TransformationClient()
    self.bkClient = BookkeepingClient()

    try:
      with open(self.pickleFile, 'r') as pf:
        self.timeLog = pickle.load(pf)
        self.fullTimeLog = pickle.load(pf)
        self.bkQueries = pickle.load(pf)
      self._logInfo("successfully loaded Log from", self.pickleFile, "initialize")
    except (EOFError, IOError):
      self._logInfo("failed loading Log from", self.pickleFile, "initialize")
      self.timeLog = {}
      self.fullTimeLog = {}
      self.bkQueries = {}

    maxNumberOfThreads = self.am_getOption('maxThreadsInPool', 1)
    threadPool = ThreadPool(maxNumberOfThreads, maxNumberOfThreads)

    for i in xrange(maxNumberOfThreads):
      threadPool.generateJobAndQueueIt(self._execute, [i])

    gMonitor.registerActivity("Iteration", "Agent Loops", AGENT_NAME, "Loops/min", gMonitor.OP_SUM)
    return S_OK()
コード例 #15
0
ファイル: TransformationAgent.py プロジェクト: sposs/DIRAC
  def initialize( self ):
    """ standard initialize
    """

    self.__readCache()
    self.dateWriteCache = datetime.datetime.utcnow()

    self.am_setOption( 'shifterProxy', 'ProductionManager' )

    # Get it threaded
    maxNumberOfThreads = self.am_getOption( 'maxThreadsInPool', 1 )
    threadPool = ThreadPool( maxNumberOfThreads, maxNumberOfThreads )
    self.log.info( "Multithreaded with %d threads" % maxNumberOfThreads )

    for i in xrange( maxNumberOfThreads ):
      threadPool.generateJobAndQueueIt( self._execute, [i] )

    self.log.info( "Will treat the following transformation types: %s" % str( self.transformationTypes ) )

    return S_OK()
コード例 #16
0
  def __init__( self, **kwargs ):
    """ Constructor  
    """
    if 'hosts' in kwargs:
      self.__hosts = kwargs['hosts']
      del kwargs['hosts']
    else:  
      result = Registry.getHosts()
      if result['OK']:
        self.__hosts = result['Value']
      else:
        self.__hosts = []
      # Excluded hosts
      if 'exclude' in kwargs:
        self.__hosts = list ( set( self.__hosts ) - set( kwargs[ 'exclude' ] ) )

    # Ping the hosts to remove those that don't have a SystemAdministrator service
    sysAdminHosts = []
    self.silentHosts = []
    self.__resultDict = {}
    self.__kwargs = {}
    pool = ThreadPool( len( self.__hosts ) )
    for host in self.__hosts:
      pool.generateJobAndQueueIt( self.__executeClient,
                                         args = [ host, "ping" ],
                                         kwargs = {},
                                         oCallback = self.__processResult )

    pool.processAllResults()
    for host, result in self.__resultDict.items():
      if result['OK']:
        sysAdminHosts.append( host )
      else:
        self.silentHosts.append( host )
    del pool

    self.__hosts = sysAdminHosts
      
    self.__kwargs = dict( kwargs )  
    self.__pool = ThreadPool( len( self.__hosts ) )  
    self.__resultDict = {}
コード例 #17
0
    def __init__(self, **kwargs):
        """ Constructor
    """
        if 'hosts' in kwargs:
            self.__hosts = kwargs['hosts']
            del kwargs['hosts']
        else:
            result = Registry.getHosts()
            if result['OK']:
                self.__hosts = result['Value']
            else:
                self.__hosts = []
            # Excluded hosts
            if 'exclude' in kwargs:
                self.__hosts = list(set(self.__hosts) - set(kwargs['exclude']))

        # Ping the hosts to remove those that don't have a SystemAdministrator service
        sysAdminHosts = []
        self.silentHosts = []
        self.__resultDict = {}
        self.__kwargs = {}
        pool = ThreadPool(len(self.__hosts))
        for host in self.__hosts:
            pool.generateJobAndQueueIt(self.__executeClient,
                                       args=[host, "ping"],
                                       kwargs={},
                                       oCallback=self.__processResult)

        pool.processAllResults()
        for host, result in self.__resultDict.items():
            if result['OK']:
                sysAdminHosts.append(host)
            else:
                self.silentHosts.append(host)
        del pool

        self.__hosts = sysAdminHosts

        self.__kwargs = dict(kwargs)
        self.__pool = ThreadPool(len(self.__hosts))
        self.__resultDict = {}
コード例 #18
0
class SeSInspectorAgent(AgentModule):
  """ Class SeSInspectorAgent is in charge of going through Services
      table, and pass Service and Status to the PEP
  """

#############################################################################

  def initialize(self):
    """ Standard constructor
    """
    
    try:
      self.rsDB = ResourceStatusDB()
      self.rmDB = ResourceManagementDB()
      
      self.ServicesToBeChecked = Queue.Queue()
      self.ServiceNamesInCheck = []
      
      self.maxNumberOfThreads = self.am_getOption( 'maxThreadsInPool', 1 )
      self.threadPool = ThreadPool( self.maxNumberOfThreads,
                                    self.maxNumberOfThreads )

      if not self.threadPool:
        self.log.error('Can not create Thread Pool')
        return S_ERROR('Can not create Thread Pool')
      
      self.setup = getSetup()['Value']

      self.VOExtension = getExt()

      configModule = __import__(self.VOExtension+"DIRAC.ResourceStatusSystem.Policy.Configurations", 
                                globals(), locals(), ['*'])
      
      self.Services_check_freq = copy.deepcopy(configModule.Services_check_freq)
      
      self.nc = NotificationClient()

      self.diracAdmin = DiracAdmin()

      self.csAPI = CSAPI()      
      
      for i in xrange(self.maxNumberOfThreads):
        self.threadPool.generateJobAndQueueIt(self._executeCheck, args = (None, ) )  
        
      return S_OK()

    except Exception:
      errorStr = "SeSInspectorAgent initialization"
      gLogger.exception(errorStr)
      return S_ERROR(errorStr)


#############################################################################

  def execute(self):
    """ 
    The main SSInspectorAgent execution method.
    Calls :meth:`DIRAC.ResourceStatusSystem.DB.ResourceStatusDB.getResourcesToCheck` and 
    put result in self.ServicesToBeChecked (a Queue) and in self.ServiceNamesInCheck (a list)
    """
    
    try:

      res = self.rsDB.getStuffToCheck('Services', self.Services_check_freq) 
   
      for resourceTuple in res:
        if resourceTuple[0] in self.ServiceNamesInCheck:
          break
        resourceL = ['Service']
        for x in resourceTuple:
          resourceL.append(x)
        self.ServiceNamesInCheck.insert(0, resourceL[1])
        self.ServicesToBeChecked.put(resourceL)

      return S_OK()

    except Exception, x:
      errorStr = where(self, self.execute)
      gLogger.exception(errorStr,lException=x)
      return S_ERROR(errorStr)
コード例 #19
0
ファイル: SeSInspectorAgent.py プロジェクト: zenglzh/DIRAC
class SeSInspectorAgent( AgentModule ):
  '''
    The SeSInspector agent ( ServiceInspectorAgent ) is one of the four
    InspectorAgents of the RSS.

    This Agent takes care of the Service. In order to do so, it gathers
    the eligible ones and then evaluates their statuses with the PEP.

    If you want to know more about the SeSInspectorAgent, scroll down to the
    end of the file.
  '''

  # Too many public methods
  # pylint: disable-msg=R0904

  def initialize( self ):

    # Attribute defined outside __init__ 
    # pylint: disable-msg=W0201

    try:
      self.rsClient      = ResourceStatusClient()
      self.servicesFreqs = CS.getTypedDictRootedAtOperations( 'CheckingFreqs/ServicesFreqs' )
      self.queue         = Queue.Queue()

      self.maxNumberOfThreads = self.am_getOption( 'maxThreadsInPool', 1 )
      self.threadPool         = ThreadPool( self.maxNumberOfThreads,
                                            self.maxNumberOfThreads )
      if not self.threadPool:
        self.log.error( 'Can not create Thread Pool' )
        return S_ERROR( 'Can not create Thread Pool' )

      for _i in xrange( self.maxNumberOfThreads ):
        self.threadPool.generateJobAndQueueIt( self._executeCheck )

      return S_OK()

    except Exception:
      errorStr = "SeSInspectorAgent initialization"
      self.log.exception( errorStr )
      return S_ERROR( errorStr )

  def execute( self ):

    try:
      
      kwargs = { 'meta' : {} }
      kwargs['meta']['columns'] = [ 'ServiceName', 'StatusType', 'Status',
                                    'FormerStatus', 'SiteType',
                                    'ServiceType', 'TokenOwner' ]
      kwargs[ 'tokenOwner' ]    = 'RS_SVC'

      resQuery = self.rsClient.getStuffToCheck( 'Service', self.servicesFreqs, **kwargs )
      if not resQuery[ 'OK' ]:
        self.log.error( resQuery[ 'Message' ] )
        return resQuery

      resQuery = resQuery[ 'Value' ]
      self.log.info( 'Found %d candidates to be checked.' % len( resQuery ) )

      for service in resQuery:
        resourceL = [ 'Service' ] + service
        # Here we peek INSIDE the Queue to know if the item is already
        # here. It's ok _here_ since (i.e. I know what I'm doing):
        # - It is a read only operation.
        # - We do not need exact accuracy, it's ok to have 2 times the same item in the queue sometimes.
        if resourceL not in self.queue.queue:
          self.queue.put( resourceL )

      return S_OK()

    except Exception, x:
      errorStr = where( self, self.execute )
      self.log.exception( errorStr, lException = x )
      return S_ERROR( errorStr )
コード例 #20
0
ファイル: Service.py プロジェクト: sparsh35/DIRAC
class Service(object):

    SVC_VALID_ACTIONS = {
        'RPC': 'export',
        'FileTransfer': 'transfer',
        'Message': 'msg',
        'Connection': 'Message'
    }
    SVC_SECLOG_CLIENT = SecurityLogClient()

    def __init__(self, serviceData):
        """
      Init the variables for the service

      :param serviceData: dict with modName, standalone, loadName, moduleObj, classObj. e.g.:
        {'modName': 'Framework/serviceName',
        'standalone': True,
        'loadName': 'Framework/serviceName',
        'moduleObj': <module 'serviceNameHandler' from '/home/DIRAC/FrameworkSystem/Service/serviceNameHandler.pyo'>,
        'classObj': <class 'serviceNameHandler.serviceHandler'>}

        Standalone is true if there is only one service started
        If it's false, every service is linked to a different MonitoringClient
    """
        self._svcData = serviceData
        self._name = serviceData['modName']
        self._startTime = Time.dateTime()
        self._validNames = [serviceData['modName']]
        if serviceData['loadName'] not in self._validNames:
            self._validNames.append(serviceData['loadName'])
        self._cfg = ServiceConfiguration(list(self._validNames))
        if serviceData['standalone']:
            self._monitor = gMonitor
        else:
            self._monitor = MonitoringClient()
        self.__monitorLastStatsUpdate = time.time()
        self._stats = {'queries': 0, 'connections': 0}
        self._authMgr = AuthManager(
            "%s/Authorization" %
            PathFinder.getServiceSection(serviceData['loadName']))
        self._transportPool = getGlobalTransportPool()
        self.__cloneId = 0
        self.__maxFD = 0

    def setCloneProcessId(self, cloneId):
        self.__cloneId = cloneId
        self._monitor.setComponentName("%s-Clone:%s" % (self._name, cloneId))

    def _isMetaAction(self, action):
        referedAction = Service.SVC_VALID_ACTIONS[action]
        if referedAction in Service.SVC_VALID_ACTIONS:
            return referedAction
        return False

    def initialize(self):
        # Build the URLs
        self._url = self._cfg.getURL()
        if not self._url:
            return S_ERROR("Could not build service URL for %s" % self._name)
        gLogger.verbose("Service URL is %s" % self._url)
        # Load handler
        result = self._loadHandlerInit()
        if not result['OK']:
            return result
        self._handler = result['Value']
        # Initialize lock manager
        self._lockManager = LockManager(self._cfg.getMaxWaitingPetitions())
        self._initMonitoring()
        # TODO: remove ThreadPool
        if useThreadPoolExecutor:
            self._threadPool = ThreadPoolExecutor(
                max(0, self._cfg.getMaxThreads()))
        else:
            self._threadPool = ThreadPool(max(1, self._cfg.getMinThreads()),
                                          max(0, self._cfg.getMaxThreads()),
                                          self._cfg.getMaxWaitingPetitions())
            self._threadPool.daemonize()
        self._msgBroker = MessageBroker("%sMSB" % self._name,
                                        threadPool=self._threadPool)
        # Create static dict
        self._serviceInfoDict = {
            'serviceName':
            self._name,
            'serviceSectionPath':
            PathFinder.getServiceSection(self._name),
            'URL':
            self._cfg.getURL(),
            'messageSender':
            MessageSender(self._name, self._msgBroker),
            'validNames':
            self._validNames,
            'csPaths': [
                PathFinder.getServiceSection(svcName)
                for svcName in self._validNames
            ]
        }
        # Call static initialization function
        try:
            self._handler['class']._rh__initializeClass(
                dict(self._serviceInfoDict), self._lockManager,
                self._msgBroker, self._monitor)
            if self._handler['init']:
                for initFunc in self._handler['init']:
                    gLogger.verbose("Executing initialization function")
                    try:
                        result = initFunc(dict(self._serviceInfoDict))
                    except Exception as excp:
                        gLogger.exception(
                            "Exception while calling initialization function",
                            lException=excp)
                        return S_ERROR(
                            "Exception while calling initialization function: %s"
                            % str(excp))
                    if not isReturnStructure(result):
                        return S_ERROR(
                            "Service initialization function %s must return S_OK/S_ERROR"
                            % initFunc)
                    if not result['OK']:
                        return S_ERROR("Error while initializing %s: %s" %
                                       (self._name, result['Message']))
        except Exception as e:
            errMsg = "Exception while initializing %s" % self._name
            gLogger.exception(e)
            gLogger.exception(errMsg)
            return S_ERROR(errMsg)

        # Load actions after the handler has initialized itself
        result = self._loadActions()
        if not result['OK']:
            return result
        self._actions = result['Value']

        gThreadScheduler.addPeriodicTask(30, self.__reportThreadPoolContents)

        return S_OK()

    def __searchInitFunctions(self, handlerClass, currentClass=None):
        if not currentClass:
            currentClass = handlerClass
        initFuncs = []
        ancestorHasInit = False
        for ancestor in currentClass.__bases__:
            initFuncs += self.__searchInitFunctions(handlerClass, ancestor)
            if 'initializeHandler' in dir(ancestor):
                ancestorHasInit = True
        if ancestorHasInit:
            initFuncs.append(
                super(currentClass, handlerClass).initializeHandler)
        if currentClass == handlerClass and 'initializeHandler' in dir(
                handlerClass):
            initFuncs.append(handlerClass.initializeHandler)
        return initFuncs

    def _loadHandlerInit(self):
        handlerClass = self._svcData['classObj']
        handlerName = handlerClass.__name__
        handlerInitMethods = self.__searchInitFunctions(handlerClass)
        try:
            handlerInitMethods.append(
                getattr(self._svcData['moduleObj'],
                        "initialize%s" % handlerName))
        except AttributeError:
            gLogger.verbose(
                "Not found global initialization function for service")

        if handlerInitMethods:
            gLogger.info("Found %s initialization methods" %
                         len(handlerInitMethods))

        handlerInfo = {}
        handlerInfo["name"] = handlerName
        handlerInfo["module"] = self._svcData['moduleObj']
        handlerInfo["class"] = handlerClass
        handlerInfo["init"] = handlerInitMethods

        return S_OK(handlerInfo)

    def _loadActions(self):

        handlerClass = self._handler['class']

        authRules = {}
        typeCheck = {}
        methodsList = {}
        for actionType in Service.SVC_VALID_ACTIONS:
            if self._isMetaAction(actionType):
                continue
            authRules[actionType] = {}
            typeCheck[actionType] = {}
            methodsList[actionType] = []
        handlerAttributeList = dir(handlerClass)
        for actionType in Service.SVC_VALID_ACTIONS:
            if self._isMetaAction(actionType):
                continue
            methodPrefix = '%s_' % Service.SVC_VALID_ACTIONS[actionType]
            for attribute in handlerAttributeList:
                if attribute.find(methodPrefix) != 0:
                    continue
                exportedName = attribute[len(methodPrefix):]
                methodsList[actionType].append(exportedName)
                gLogger.verbose("+ Found %s method %s" %
                                (actionType, exportedName))
                # Create lock for method
                self._lockManager.createLock(
                    "%s/%s" % (actionType, exportedName),
                    self._cfg.getMaxThreadsForMethod(actionType, exportedName))
                # Look for type and auth rules
                if actionType == 'RPC':
                    typeAttr = "types_%s" % exportedName
                    authAttr = "auth_%s" % exportedName
                else:
                    typeAttr = "types_%s_%s" % (
                        Service.SVC_VALID_ACTIONS[actionType], exportedName)
                    authAttr = "auth_%s_%s" % (
                        Service.SVC_VALID_ACTIONS[actionType], exportedName)
                if typeAttr in handlerAttributeList:
                    obj = getattr(handlerClass, typeAttr)
                    gLogger.verbose("|- Found type definition %s: %s" %
                                    (typeAttr, str(obj)))
                    typeCheck[actionType][exportedName] = obj
                if authAttr in handlerAttributeList:
                    obj = getattr(handlerClass, authAttr)
                    gLogger.verbose("|- Found auth rules %s: %s" %
                                    (authAttr, str(obj)))
                    authRules[actionType][exportedName] = obj

        for actionType in Service.SVC_VALID_ACTIONS:
            referedAction = self._isMetaAction(actionType)
            if not referedAction:
                continue
            gLogger.verbose("Action %s is a meta action for %s" %
                            (actionType, referedAction))
            authRules[actionType] = []
            for method in authRules[referedAction]:
                for prop in authRules[referedAction][method]:
                    if prop not in authRules[actionType]:
                        authRules[actionType].append(prop)
            gLogger.verbose("Meta action %s props are %s" %
                            (actionType, authRules[actionType]))

        return S_OK({
            'methods': methodsList,
            'auth': authRules,
            'types': typeCheck
        })

    def _initMonitoring(self):
        # Init extra bits of monitoring
        self._monitor.setComponentType(MonitoringClient.COMPONENT_SERVICE)
        self._monitor.setComponentName(self._name)
        self._monitor.setComponentLocation(self._cfg.getURL())
        self._monitor.initialize()
        self._monitor.registerActivity("Connections", "Connections received",
                                       "Framework", "connections",
                                       MonitoringClient.OP_RATE)
        self._monitor.registerActivity("Queries", "Queries served",
                                       "Framework", "queries",
                                       MonitoringClient.OP_RATE)
        self._monitor.registerActivity('CPU', "CPU Usage", 'Framework',
                                       "CPU,%", MonitoringClient.OP_MEAN, 600)
        self._monitor.registerActivity('MEM', "Memory Usage", 'Framework',
                                       'Memory,MB', MonitoringClient.OP_MEAN,
                                       600)
        self._monitor.registerActivity('PendingQueries', "Pending queries",
                                       'Framework', 'queries',
                                       MonitoringClient.OP_MEAN)
        self._monitor.registerActivity('ActiveQueries', "Active queries",
                                       'Framework', 'threads',
                                       MonitoringClient.OP_MEAN)
        self._monitor.registerActivity('RunningThreads', "Running threads",
                                       'Framework', 'threads',
                                       MonitoringClient.OP_MEAN)
        self._monitor.registerActivity('MaxFD', "Max File Descriptors",
                                       'Framework', 'fd',
                                       MonitoringClient.OP_MEAN)

        self._monitor.setComponentExtraParam('DIRACVersion', DIRAC.version)
        self._monitor.setComponentExtraParam('platform', DIRAC.getPlatform())
        self._monitor.setComponentExtraParam('startTime', Time.dateTime())
        for prop in (("__RCSID__", "version"), ("__doc__", "description")):
            try:
                value = getattr(self._handler['module'], prop[0])
            except Exception as e:
                gLogger.exception(e)
                gLogger.error("Missing property", prop[0])
                value = 'unset'
            self._monitor.setComponentExtraParam(prop[1], value)
        for secondaryName in self._cfg.registerAlsoAs():
            gLogger.info("Registering %s also as %s" %
                         (self._name, secondaryName))
            self._validNames.append(secondaryName)

        return S_OK()

    def __reportThreadPoolContents(self):
        # TODO: remove later
        if useThreadPoolExecutor:
            pendingQueries = self._threadPool._work_queue.qsize()
            activeQuereies = len(self._threadPool._threads)
        else:
            pendingQueries = self._threadPool.pendingJobs()
            activeQuereies = self._threadPool.numWorkingThreads()

        self._monitor.addMark('PendingQueries', pendingQueries)
        self._monitor.addMark('ActiveQueries', activeQuereies)
        self._monitor.addMark('RunningThreads', threading.activeCount())
        self._monitor.addMark('MaxFD', self.__maxFD)
        self.__maxFD = 0

    def getConfig(self):
        return self._cfg

    # End of initialization functions

    def handleConnection(self, clientTransport):
        """
      This method may be called by ServiceReactor.
      The method stacks openened connection in a queue, another thread
      read this queue and handle connection.

      :param clientTransport: Object wich describe opened connection (PlainTransport or SSLTransport)
    """
        self._stats['connections'] += 1
        self._monitor.setComponentExtraParam('queries',
                                             self._stats['connections'])
        # TODO: remove later
        if useThreadPoolExecutor:
            self._threadPool.submit(self._processInThread, clientTransport)
        else:
            self._threadPool.generateJobAndQueueIt(self._processInThread,
                                                   args=(clientTransport, ))

    # Threaded process function
    def _processInThread(self, clientTransport):
        """
    This method handles a RPC, FileTransfer or Connection.
    Connection may be opened via ServiceReactor.__acceptIncomingConnection


    - Do the SSL/TLS Handshake (if dips is used) and extract credentials
    - Get the action called by the client
    - Check if the client is authorized to perform ation
      - If not, connection is closed
    - Instanciate the RequestHandler (RequestHandler contain all methods callable)

    (Following is not directly in this method but it describe what happen at
    #Execute the action)
    - Notify the client we're ready to execute the action (via _processProposal)
      and call RequestHandler._rh_executeAction()
    - Receive arguments/file/something else (depending on action) in the RequestHandler
    - Executing the action asked by the client

    :param clientTransport: Object who describe the opened connection (SSLTransport or PlainTransport)

    :return: S_OK with "closeTransport" a boolean to indicate if th connection have to be closed
            e.g. after RPC, closeTransport=True

    """
        self.__maxFD = max(self.__maxFD, clientTransport.oSocket.fileno())
        self._lockManager.lockGlobal()
        try:
            monReport = self.__startReportToMonitoring()
        except Exception:
            monReport = False
        try:
            # Handshake
            try:
                result = clientTransport.handshake()
                if not result['OK']:
                    clientTransport.close()
                    return
            except BaseException:
                return
            # Add to the transport pool
            trid = self._transportPool.add(clientTransport)
            if not trid:
                return
            # Receive and check proposal
            result = self._receiveAndCheckProposal(trid)
            if not result['OK']:
                self._transportPool.sendAndClose(trid, result)
                return
            proposalTuple = result['Value']
            # Instantiate handler
            result = self._instantiateHandler(trid, proposalTuple)
            if not result['OK']:
                self._transportPool.sendAndClose(trid, result)
                return
            handlerObj = result['Value']
            # Execute the action
            result = self._processProposal(trid, proposalTuple, handlerObj)
            # Close the connection if required
            if result['closeTransport'] or not result['OK']:
                if not result['OK']:
                    gLogger.error("Error processing proposal",
                                  result['Message'])
                self._transportPool.close(trid)
            return result
        finally:
            self._lockManager.unlockGlobal()
            if monReport:
                self.__endReportToMonitoring(*monReport)

    def _createIdentityString(self, credDict, clientTransport=None):
        if 'username' in credDict:
            if 'group' in credDict:
                identity = "[%s:%s]" % (credDict['username'],
                                        credDict['group'])
            else:
                identity = "[%s:unknown]" % credDict['username']
        else:
            identity = 'unknown'
        if clientTransport:
            addr = clientTransport.getRemoteAddress()
            if addr:
                addr = "{%s:%s}" % (addr[0], addr[1])
        if 'DN' in credDict:
            identity += "(%s)" % credDict['DN']
        return identity

    @staticmethod
    def _deserializeProposalTuple(serializedProposal):
        """ We receive the proposalTuple as a list.
        Turn it into a tuple again
    """
        proposalTuple = tuple(
            tuple(x) if isinstance(x, list) else x for x in serializedProposal)
        return proposalTuple

    def _receiveAndCheckProposal(self, trid):
        clientTransport = self._transportPool.get(trid)
        # Get the peer credentials
        credDict = clientTransport.getConnectingCredentials()
        # Receive the action proposal
        retVal = clientTransport.receiveData(1024)
        if not retVal['OK']:
            gLogger.error(
                "Invalid action proposal",
                "%s %s" % (self._createIdentityString(
                    credDict, clientTransport), retVal['Message']))
            return S_ERROR("Invalid action proposal")
        proposalTuple = Service._deserializeProposalTuple(retVal['Value'])
        gLogger.debug("Received action from client",
                      "/".join(list(proposalTuple[1])))
        # Check if there are extra credentials
        if proposalTuple[2]:
            clientTransport.setExtraCredentials(proposalTuple[2])
        # Check if this is the requested service
        requestedService = proposalTuple[0][0]
        if requestedService not in self._validNames:
            return S_ERROR("%s is not up in this server" % requestedService)
        # Check if the action is valid
        requestedActionType = proposalTuple[1][0]
        if requestedActionType not in Service.SVC_VALID_ACTIONS:
            return S_ERROR("%s is not a known action type" %
                           requestedActionType)
        # Check if it's authorized
        result = self._authorizeProposal(proposalTuple[1], trid, credDict)
        if not result['OK']:
            return result
        # Proposal is OK
        return S_OK(proposalTuple)

    def _authorizeProposal(self, actionTuple, trid, credDict):
        # Find CS path for the Auth rules
        referedAction = self._isMetaAction(actionTuple[0])
        if referedAction:
            csAuthPath = "%s/Default" % actionTuple[0]
            hardcodedMethodAuth = self._actions['auth'][actionTuple[0]]
        else:
            if actionTuple[0] == 'RPC':
                csAuthPath = actionTuple[1]
            else:
                csAuthPath = "/".join(actionTuple)
            # Find if there are hardcoded auth rules in the code
            hardcodedMethodAuth = False
            if actionTuple[0] in self._actions['auth']:
                hardcodedRulesByType = self._actions['auth'][actionTuple[0]]
                if actionTuple[0] == "FileTransfer":
                    methodName = actionTuple[1][0].lower() + actionTuple[1][1:]
                else:
                    methodName = actionTuple[1]

                if methodName in hardcodedRulesByType:
                    hardcodedMethodAuth = hardcodedRulesByType[methodName]
        # Auth time!
        if not self._authMgr.authQuery(csAuthPath, credDict,
                                       hardcodedMethodAuth):
            # Get the identity string
            identity = self._createIdentityString(credDict)
            fromHost = "unknown host"
            tr = self._transportPool.get(trid)
            if tr:
                fromHost = '/'.join(
                    [str(item) for item in tr.getRemoteAddress()])
            gLogger.warn(
                "Unauthorized query", "to %s:%s by %s from %s" %
                (self._name, "/".join(actionTuple), identity, fromHost))
            result = S_ERROR(ENOAUTH, "Unauthorized query")
        else:
            result = S_OK()

        # Security log
        tr = self._transportPool.get(trid)
        if not tr:
            return S_ERROR("Client disconnected")
        sourceAddress = tr.getRemoteAddress()
        identity = self._createIdentityString(credDict)
        Service.SVC_SECLOG_CLIENT.addMessage(result['OK'], sourceAddress[0],
                                             sourceAddress[1], identity,
                                             self._cfg.getHostname(),
                                             self._cfg.getPort(), self._name,
                                             "/".join(actionTuple))
        return result

    def _instantiateHandler(self, trid, proposalTuple=None):
        """
    Generate an instance of the handler for a given service

    :param int trid: transport ID
    :param tuple proposalTuple: tuple describing the proposed action

    :return: S_OK/S_ERROR, Value is the handler object
    """
        # Generate the client params
        clientParams = {'serviceStartTime': self._startTime}
        if proposalTuple:
            # The 4th element is the client version
            clientParams['clientVersion'] = proposalTuple[3] if len(
                proposalTuple) > 3 else None
            clientParams['clientSetup'] = proposalTuple[0][1]
            if len(proposalTuple[0]) < 3:
                clientParams['clientVO'] = gConfig.getValue(
                    "/DIRAC/VirtualOrganization", "unknown")
            else:
                clientParams['clientVO'] = proposalTuple[0][2]
        clientTransport = self._transportPool.get(trid)
        if clientTransport:
            clientParams['clientAddress'] = clientTransport.getRemoteAddress()
        # Generate handler dict with per client info
        handlerInitDict = dict(self._serviceInfoDict)
        for key in clientParams:
            handlerInitDict[key] = clientParams[key]
        # Instantiate and initialize
        try:
            handlerInstance = self._handler['class'](handlerInitDict, trid)
            handlerInstance.initialize()
        except Exception as e:
            gLogger.exception("Server error while loading handler: %s" %
                              str(e))
            return S_ERROR("Server error while loading handler")
        return S_OK(handlerInstance)

    def _processProposal(self, trid, proposalTuple, handlerObj):
        # Notify the client we're ready to execute the action
        retVal = self._transportPool.send(trid, S_OK())
        if not retVal['OK']:
            return retVal

        messageConnection = False
        if proposalTuple[1] == ('Connection', 'new'):
            messageConnection = True

        if messageConnection:

            if self._msgBroker.getNumConnections(
            ) > self._cfg.getMaxMessagingConnections():
                result = S_ERROR(
                    "Maximum number of connections reached. Try later")
                result['closeTransport'] = True
                return result

            # This is a stable connection
            self._msgBroker.addTransportId(
                trid,
                self._name,
                receiveMessageCallback=self._mbReceivedMsg,
                disconnectCallback=self._mbDisconnect,
                listenToConnection=False)

        result = self._executeAction(trid, proposalTuple, handlerObj)
        if result['OK'] and messageConnection:
            self._msgBroker.listenToTransport(trid)
            result = self._mbConnect(trid, handlerObj)
            if not result['OK']:
                self._msgBroker.removeTransport(trid)

        result['closeTransport'] = not messageConnection or not result['OK']
        return result

    def _mbConnect(self, trid, handlerObj=None):
        if not handlerObj:
            result = self._instantiateHandler(trid)
            if not result['OK']:
                return result
            handlerObj = result['Value']
        return handlerObj._rh_executeConnectionCallback('connected')

    def _executeAction(self, trid, proposalTuple, handlerObj):
        try:
            return handlerObj._rh_executeAction(proposalTuple)
        except Exception as e:
            gLogger.exception("Exception while executing handler action")
            return S_ERROR("Server error while executing action: %s" % str(e))

    def _mbReceivedMsg(self, trid, msgObj):
        result = self._authorizeProposal(
            ('Message', msgObj.getName()), trid,
            self._transportPool.get(trid).getConnectingCredentials())
        if not result['OK']:
            return result
        result = self._instantiateHandler(trid)
        if not result['OK']:
            return result
        handlerObj = result['Value']
        return handlerObj._rh_executeMessageCallback(msgObj)

    def _mbDisconnect(self, trid):
        result = self._instantiateHandler(trid)
        if not result['OK']:
            return result
        handlerObj = result['Value']
        return handlerObj._rh_executeConnectionCallback('drop')

    def __startReportToMonitoring(self):
        self._monitor.addMark("Queries")
        now = time.time()
        stats = os.times()
        cpuTime = stats[0] + stats[2]
        if now - self.__monitorLastStatsUpdate < 0:
            return (now, cpuTime)
        # Send CPU consumption mark
        wallClock = now - self.__monitorLastStatsUpdate
        self.__monitorLastStatsUpdate = now
        # Send Memory consumption mark
        membytes = MemStat.VmB('VmRSS:')
        if membytes:
            mem = membytes / (1024. * 1024.)
            self._monitor.addMark('MEM', mem)
        return (now, cpuTime)

    def __endReportToMonitoring(self, initialWallTime, initialCPUTime):
        wallTime = time.time() - initialWallTime
        stats = os.times()
        cpuTime = stats[0] + stats[2] - initialCPUTime
        percentage = cpuTime / wallTime * 100.
        if percentage > 0:
            self._monitor.addMark('CPU', percentage)
コード例 #21
0
class ElementInspectorAgent(AgentModule):
    '''
    The ElementInspector agent is a generic agent used to check the elements
    of one of the elementTypes ( e.g. Site, Resource, Node ).

    This Agent takes care of the Elements. In order to do so, it gathers
    the eligible ones and then evaluates their statuses with the PEP.
  '''

    # Max number of worker threads by default
    __maxNumberOfThreads = 5
    # ElementType, to be defined among Site, Resource or Node
    __elementType = None
    # Inspection freqs, defaults, the lower, the higher priority to be checked.
    # Error state usually means there is a glitch somewhere, so it has the highest
    # priority.
    __checkingFreqs = {
        'Default': {
            'Active': 60,
            'Degraded': 30,
            'Probing': 30,
            'Banned': 30,
            'Unknown': 15,
            'Error': 15
        }
    }
    # queue size limit to stop feeding
    __limitQueueFeeder = 15

    def __init__(self, *args, **kwargs):
        ''' c'tor
    '''

        AgentModule.__init__(self, *args, **kwargs)

        # members initialization

        self.maxNumberOfThreads = self.__maxNumberOfThreads
        self.elementType = self.__elementType
        self.checkingFreqs = self.__checkingFreqs
        self.limitQueueFeeder = self.__limitQueueFeeder

        self.elementsToBeChecked = None
        self.threadPool = None
        self.rsClient = None
        self.clients = {}

    def initialize(self):
        ''' Standard initialize.
        Uses the ProductionManager shifterProxy to modify the ResourceStatus DB
    '''

        self.maxNumberOfThreads = self.am_getOption('maxNumberOfThreads',
                                                    self.maxNumberOfThreads)
        self.elementType = self.am_getOption('elementType', self.elementType)
        self.checkingFreqs = self.am_getOption('checkingFreqs',
                                               self.checkingFreqs)
        self.limitQueueFeeder = self.am_getOption('limitQueueFeeder',
                                                  self.limitQueueFeeder)

        self.elementsToBeChecked = Queue.Queue()
        self.threadPool = ThreadPool(self.maxNumberOfThreads,
                                     self.maxNumberOfThreads)

        self.rsClient = ResourceStatusClient()

        self.clients['ResourceStatusClient'] = self.rsClient
        self.clients['ResourceManagementClient'] = ResourceManagementClient()
        self.clients['PilotsDB'] = PilotAgentsDB()

        return S_OK()

    def execute(self):

        # If there are elements in the queue to be processed, we wait ( we know how
        # many elements in total we can have, so if there are more than 15% of them
        # on the queue, we do not add anything ), but the threads are running and
        # processing items from the queue on background.

        qsize = self.elementsToBeChecked.qsize()
        if qsize > self.limitQueueFeeder:
            self.log.warn(
                'Queue not empty ( %s > %s ), skipping feeding loop' %
                (qsize, self.limitQueueFeeder))
            return S_OK()

        # We get all the elements, then we filter.
        elements = self.rsClient.selectStatusElement(self.elementType,
                                                     'Status')
        if not elements['OK']:
            self.log.error(elements['Message'])
            return elements

        utcnow = datetime.datetime.utcnow().replace(microsecond=0)

        # filter elements by Type
        for element in elements['Value']:

            # Maybe an overkill, but this way I have NEVER again to worry about order
            # of elements returned by mySQL on tuples
            elemDict = dict(zip(elements['Columns'], element))

            # We skip the elements with token different than "rs_svc"
            if elemDict['TokenOwner'] != 'rs_svc':
                self.log.info('Skipping %s ( %s ) with token %s' %
                              (elemDict['Name'], elemDict['StatusType'],
                               elemDict['TokenOwner']))
                continue

            if not elemDict['ElementType'] in self.checkingFreqs:
                #self.log.warn( '"%s" not in inspectionFreqs, getting default' % elemDict[ 'ElementType' ] )
                timeToNextCheck = self.checkingFreqs['Default'][
                    elemDict['Status']]
            else:
                timeToNextCheck = self.checkingFreqs[elemDict['ElementType']][
                    elemDict['Status']]

            if utcnow - datetime.timedelta(
                    minutes=timeToNextCheck) > elemDict['LastCheckTime']:

                # We are not checking if the item is already on the queue or not. It may
                # be there, but in any case, it is not a big problem.

                lowerElementDict = {'element': self.elementType}
                for key, value in elemDict.items():
                    lowerElementDict[key[0].lower() + key[1:]] = value

                # We add lowerElementDict to the queue
                self.elementsToBeChecked.put(lowerElementDict)
                self.log.verbose('%s # "%s" # "%s" # %s # %s' %
                                 (elemDict['Name'], elemDict['ElementType'],
                                  elemDict['StatusType'], elemDict['Status'],
                                  elemDict['LastCheckTime']))

        # Measure size of the queue, more or less, to know how many threads should
        # we start !
        queueSize = self.elementsToBeChecked.qsize()
        # 30, could have been other number.. but it works reasonably well. ( +1 to get ceil )
        threadsToStart = max(
            min(self.maxNumberOfThreads, (queueSize / 30) + 1), 1)
        threadsRunning = self.threadPool.numWorkingThreads()

        self.log.info('Needed %d threads to process %d elements' %
                      (threadsToStart, queueSize))
        if threadsRunning:
            self.log.info('Already %d threads running' % threadsRunning)
            threadsToStart = max(0, threadsToStart - threadsRunning)
            self.log.info('Starting %d threads to process %d elements' %
                          (threadsToStart, queueSize))

        # It may happen that we start two threads, 0 and 1. 1 goes DOWN, but 0 keeps
        # running. In next loop we will start a new thread, and will be called 0
        # again. To have a mechanism to see which thread is where, we append the
        # cycle number before the threadId.
        cycle = self._AgentModule__moduleProperties['cyclesDone']

        for _x in xrange(threadsToStart):
            threadId = '%s_%s' % (cycle, _x)
            jobUp = self.threadPool.generateJobAndQueueIt(self._execute,
                                                          args=(threadId, ))
            if not jobUp['OK']:
                self.log.error(jobUp['Message'])

        return S_OK()

    def finalize(self):

        self.log.info('draining queue... blocking until empty')
        # block until all tasks are done
        self.elementsToBeChecked.join()

        return S_OK()

## Private methods #############################################################

    def _execute(self, threadNumber):
        '''
      Method run by the thread pool. It enters a loop until there are no elements
      on the queue. On each iteration, it evaluates the policies for such element
      and enforces the necessary actions. If there are no more elements in the
      queue, the loop is finished.
    '''

        tHeader = '%sJob%s' % ('* ' * 30, threadNumber)

        self.log.info('%s UP' % tHeader)

        pep = PEP(clients=self.clients)

        while True:

            try:
                element = self.elementsToBeChecked.get_nowait()
            except Queue.Empty:
                self.log.info('%s DOWN' % tHeader)
                return S_OK()

            self.log.info(
                '%s ( %s / %s ) being processed' %
                (element['name'], element['status'], element['statusType']))

            resEnforce = pep.enforce(element)
            if not resEnforce['OK']:
                self.log.error(resEnforce['Message'])
                self.elementsToBeChecked.task_done()
                continue

            resEnforce = resEnforce['Value']

            oldStatus = resEnforce['decissionParams']['status']
            statusType = resEnforce['decissionParams']['statusType']
            newStatus = resEnforce['policyCombinedResult']['Status']
            reason = resEnforce['policyCombinedResult']['Reason']

            if oldStatus != newStatus:
                self.log.info('%s (%s) is now %s ( %s ), before %s' %
                              (element['name'], statusType, newStatus, reason,
                               oldStatus))

            # Used together with join !
            self.elementsToBeChecked.task_done()

        self.log.info('%s DOWN' % tHeader)

        return S_OK()
コード例 #22
0
ファイル: Service.py プロジェクト: DIRACGrid-test/DIRAC
class Service( object ):

  SVC_VALID_ACTIONS = { 'RPC' : 'export',
                        'FileTransfer': 'transfer',
                        'Message' : 'msg',
                        'Connection' : 'Message' }
  SVC_SECLOG_CLIENT = SecurityLogClient()

  def __init__( self, serviceData ):
    self._svcData = serviceData
    self._name = serviceData[ 'modName' ]
    self._startTime = Time.dateTime()
    self._validNames = [ serviceData[ 'modName' ]  ]
    if serviceData[ 'loadName' ] not in self._validNames:
      self._validNames.append( serviceData[ 'loadName' ] )
    self._cfg = ServiceConfiguration( list( self._validNames ) )
    if serviceData[ 'standalone' ]:
      self._monitor = gMonitor
    else:
      self._monitor = MonitoringClient()
    self.__monitorLastStatsUpdate = time.time()
    self._stats = { 'queries' : 0, 'connections' : 0 }
    self._authMgr = AuthManager( "%s/Authorization" % PathFinder.getServiceSection( serviceData[ 'loadName' ] ) )
    self._transportPool = getGlobalTransportPool()
    self.__cloneId = 0
    self.__maxFD = 0

  def setCloneProcessId( self, cloneId ):
    self.__cloneId = cloneId
    self._monitor.setComponentName( "%s-Clone:%s" % ( self._name, cloneId ) )

  def _isMetaAction( self, action ):
    referedAction = Service.SVC_VALID_ACTIONS[ action ]
    if referedAction in Service.SVC_VALID_ACTIONS:
      return referedAction
    return False

  def initialize( self ):
    #Build the URLs
    self._url = self._cfg.getURL()
    if not self._url:
      return S_ERROR( "Could not build service URL for %s" % self._name )
    gLogger.verbose( "Service URL is %s" % self._url )
    #Load handler
    result = self._loadHandlerInit()
    if not result[ 'OK' ]:
      return result
    self._handler = result[ 'Value' ]
    #Initialize lock manager
    self._lockManager = LockManager( self._cfg.getMaxWaitingPetitions() )
    self._initMonitoring()
    self._threadPool = ThreadPool( max( 1, self._cfg.getMinThreads() ),
                                   max( 0, self._cfg.getMaxThreads() ),
                                   self._cfg.getMaxWaitingPetitions() )
    self._threadPool.daemonize()
    self._msgBroker = MessageBroker( "%sMSB" % self._name, threadPool = self._threadPool )
    #Create static dict
    self._serviceInfoDict = { 'serviceName' : self._name,
                              'serviceSectionPath' : PathFinder.getServiceSection( self._name ),
                              'URL' : self._cfg.getURL(),
                              'messageSender' : MessageSender( self._name, self._msgBroker ),
                              'validNames' : self._validNames,
                              'csPaths' : [ PathFinder.getServiceSection( svcName ) for svcName in self._validNames ]
                            }
    #Call static initialization function
    try:
      self._handler[ 'class' ]._rh__initializeClass( dict( self._serviceInfoDict ),
                                                     self._lockManager,
                                                     self._msgBroker,
                                                     self._monitor )
      if self._handler[ 'init' ]:
        for initFunc in self._handler[ 'init' ]:
          gLogger.verbose( "Executing initialization function" )
          try:
            result = initFunc( dict( self._serviceInfoDict ) )
          except Exception as excp:
            gLogger.exception( "Exception while calling initialization function", lException = excp )
            return S_ERROR( "Exception while calling initialization function: %s" % str( excp ) )
          if not isReturnStructure( result ):
            return S_ERROR( "Service initialization function %s must return S_OK/S_ERROR" % initFunc )
          if not result[ 'OK' ]:
            return S_ERROR( "Error while initializing %s: %s" % ( self._name, result[ 'Message' ] ) )
    except Exception as e:
      errMsg = "Exception while initializing %s" % self._name
      gLogger.exception( e )
      gLogger.exception( errMsg )
      return S_ERROR( errMsg )

    #Load actions after the handler has initialized itself
    result = self._loadActions()
    if not result[ 'OK' ]:
      return result
    self._actions = result[ 'Value' ]

    gThreadScheduler.addPeriodicTask( 30, self.__reportThreadPoolContents )

    return S_OK()

  def __searchInitFunctions( self, handlerClass, currentClass = None ):
    if not currentClass:
      currentClass = handlerClass
    initFuncs = []
    ancestorHasInit = False
    for ancestor in currentClass.__bases__:
      initFuncs += self.__searchInitFunctions( handlerClass, ancestor )
      if 'initializeHandler' in dir( ancestor ):
        ancestorHasInit = True
    if ancestorHasInit:
      initFuncs.append( super( currentClass, handlerClass ).initializeHandler )
    if currentClass == handlerClass and 'initializeHandler' in dir( handlerClass ):
      initFuncs.append( handlerClass.initializeHandler )
    return initFuncs

  def _loadHandlerInit( self ):
    handlerClass = self._svcData[ 'classObj' ]
    handlerName = handlerClass.__name__
    handlerInitMethods = self.__searchInitFunctions( handlerClass )
    try:
      handlerInitMethods.append( getattr( self._svcData[ 'moduleObj' ], "initialize%s" % handlerName ) )
    except AttributeError:
      gLogger.verbose( "Not found global initialization function for service" )

    if handlerInitMethods:
      gLogger.info( "Found %s initialization methods" % len( handlerInitMethods ) )

    handlerInfo = {}
    handlerInfo[ "name" ] = handlerName
    handlerInfo[ "module" ] = self._svcData[ 'moduleObj' ]
    handlerInfo[ "class" ] = handlerClass
    handlerInfo[ "init" ] = handlerInitMethods

    return S_OK( handlerInfo )

  def _loadActions( self ):

    handlerClass = self._handler[ 'class' ]

    authRules = {}
    typeCheck = {}
    methodsList = {}
    for actionType in Service.SVC_VALID_ACTIONS:
      if self._isMetaAction( actionType ):
        continue
      authRules[ actionType ] = {}
      typeCheck[ actionType ] = {}
      methodsList[ actionType ] = []
    handlerAttributeList = dir( handlerClass )
    for actionType in Service.SVC_VALID_ACTIONS:
      if self._isMetaAction( actionType ):
        continue
      methodPrefix = '%s_' % Service.SVC_VALID_ACTIONS[ actionType ]
      for attribute in handlerAttributeList:
        if attribute.find( methodPrefix ) != 0:
          continue
        exportedName = attribute[ len( methodPrefix ) : ]
        methodsList[ actionType ].append( exportedName )
        gLogger.verbose( "+ Found %s method %s" % ( actionType, exportedName ) )
        #Create lock for method
        self._lockManager.createLock( "%s/%s" % ( actionType, exportedName ),
                                       self._cfg.getMaxThreadsForMethod( actionType, exportedName ) )
        #Look for type and auth rules
        if actionType == 'RPC':
          typeAttr = "types_%s" % exportedName
          authAttr = "auth_%s" % exportedName
        else:
          typeAttr = "types_%s_%s" % ( Service.SVC_VALID_ACTIONS[ actionType ], exportedName )
          authAttr = "auth_%s_%s" % ( Service.SVC_VALID_ACTIONS[ actionType ], exportedName )
        if typeAttr in handlerAttributeList:
          obj = getattr( handlerClass, typeAttr )
          gLogger.verbose( "|- Found type definition %s: %s" % ( typeAttr, str( obj ) ) )
          typeCheck[ actionType ][ exportedName ] = obj
        if authAttr in handlerAttributeList:
          obj = getattr( handlerClass, authAttr )
          gLogger.verbose( "|- Found auth rules %s: %s" % ( authAttr, str( obj ) ) )
          authRules[ actionType ][ exportedName ] = obj

    for actionType in Service.SVC_VALID_ACTIONS:
      referedAction = self._isMetaAction( actionType )
      if not referedAction:
        continue
      gLogger.verbose( "Action %s is a meta action for %s" % ( actionType, referedAction ) )
      authRules[ actionType ] = []
      for method in authRules[ referedAction ]:
        for prop in authRules[ referedAction ][ method ]:
          if prop not in authRules[ actionType ]:
            authRules[ actionType ].append( prop )
      gLogger.verbose( "Meta action %s props are %s" % ( actionType, authRules[ actionType ] ) )

    return S_OK( { 'methods' : methodsList, 'auth' : authRules, 'types' : typeCheck } )

  def _initMonitoring( self ):
    #Init extra bits of monitoring
    self._monitor.setComponentType( MonitoringClient.COMPONENT_SERVICE )
    self._monitor.setComponentName( self._name )
    self._monitor.setComponentLocation( self._cfg.getURL() )
    self._monitor.initialize()
    self._monitor.registerActivity( "Connections", "Connections received", "Framework", "connections", MonitoringClient.OP_RATE )
    self._monitor.registerActivity( "Queries", "Queries served", "Framework", "queries", MonitoringClient.OP_RATE )
    self._monitor.registerActivity( 'CPU', "CPU Usage", 'Framework', "CPU,%", MonitoringClient.OP_MEAN, 600 )
    self._monitor.registerActivity( 'MEM', "Memory Usage", 'Framework', 'Memory,MB', MonitoringClient.OP_MEAN, 600 )
    self._monitor.registerActivity( 'PendingQueries', "Pending queries", 'Framework', 'queries', MonitoringClient.OP_MEAN )
    self._monitor.registerActivity( 'ActiveQueries', "Active queries", 'Framework', 'threads', MonitoringClient.OP_MEAN )
    self._monitor.registerActivity( 'RunningThreads', "Running threads", 'Framework', 'threads', MonitoringClient.OP_MEAN )
    self._monitor.registerActivity( 'MaxFD', "Max File Descriptors", 'Framework', 'fd', MonitoringClient.OP_MEAN )

    self._monitor.setComponentExtraParam( 'DIRACVersion', DIRAC.version )
    self._monitor.setComponentExtraParam( 'platform', DIRAC.getPlatform() )
    self._monitor.setComponentExtraParam( 'startTime', Time.dateTime() )
    for prop in ( ( "__RCSID__", "version" ), ( "__doc__", "description" ) ):
      try:
        value = getattr( self._handler[ 'module' ], prop[0] )
      except Exception as e:
        gLogger.exception( e )
        gLogger.error( "Missing property", prop[0] )
        value = 'unset'
      self._monitor.setComponentExtraParam( prop[1], value )
    for secondaryName in self._cfg.registerAlsoAs():
      gLogger.info( "Registering %s also as %s" % ( self._name, secondaryName ) )
      self._validNames.append( secondaryName )
    return S_OK()

  def __reportThreadPoolContents( self ):
    self._monitor.addMark( 'PendingQueries', self._threadPool.pendingJobs() )
    self._monitor.addMark( 'ActiveQueries', self._threadPool.numWorkingThreads() )
    self._monitor.addMark( 'RunningThreads', threading.activeCount() )
    self._monitor.addMark( 'MaxFD', self.__maxFD )
    self.__maxFD = 0


  def getConfig( self ):
    return self._cfg

  #End of initialization functions

  def handleConnection( self, clientTransport ):
    self._stats[ 'connections' ] += 1
    self._monitor.setComponentExtraParam( 'queries', self._stats[ 'connections' ] )
    self._threadPool.generateJobAndQueueIt( self._processInThread,
                                             args = ( clientTransport, ) )

  #Threaded process function
  def _processInThread( self, clientTransport ):
    self.__maxFD = max( self.__maxFD, clientTransport.oSocket.fileno() )
    self._lockManager.lockGlobal()
    try:
      monReport = self.__startReportToMonitoring()
    except Exception:
      monReport = False
    try:
      #Handshake
      try:
        result = clientTransport.handshake()
        if not result[ 'OK' ]:
          clientTransport.close()
          return
      except:
        return
      #Add to the transport pool
      trid = self._transportPool.add( clientTransport )
      if not trid:
        return
      #Receive and check proposal
      result = self._receiveAndCheckProposal( trid )
      if not result[ 'OK' ]:
        self._transportPool.sendAndClose( trid, result )
        return
      proposalTuple = result[ 'Value' ]
      #Instantiate handler
      result = self._instantiateHandler( trid, proposalTuple )
      if not result[ 'OK' ]:
        self._transportPool.sendAndClose( trid, result )
        return
      handlerObj = result[ 'Value' ]
      #Execute the action
      result = self._processProposal( trid, proposalTuple, handlerObj )
      #Close the connection if required
      if result[ 'closeTransport' ] or not result[ 'OK' ]:
        if not result[ 'OK' ]:
          gLogger.error( "Error processing proposal", result[ 'Message' ] )
        self._transportPool.close( trid )
      return result
    finally:
      self._lockManager.unlockGlobal()
      if monReport:
        self.__endReportToMonitoring( *monReport )


  def _createIdentityString( self, credDict, clientTransport = None ):
    if 'username' in credDict:
      if 'group' in credDict:
        identity = "[%s:%s]" % ( credDict[ 'username' ], credDict[ 'group' ] )
      else:
        identity = "[%s:unknown]" % credDict[ 'username' ]
    else:
      identity = 'unknown'
    if clientTransport:
      addr = clientTransport.getRemoteAddress()
      if addr:
        addr = "{%s:%s}" % ( addr[0], addr[1] )
    if 'DN' in credDict:
      identity += "(%s)" % credDict[ 'DN' ]
    return identity

  def _receiveAndCheckProposal( self, trid ):
    clientTransport = self._transportPool.get( trid )
    #Get the peer credentials
    credDict = clientTransport.getConnectingCredentials()
    #Receive the action proposal
    retVal = clientTransport.receiveData( 1024 )
    if not retVal[ 'OK' ]:
      gLogger.error( "Invalid action proposal", "%s %s" % ( self._createIdentityString( credDict,
                                                                                        clientTransport ),
                                                            retVal[ 'Message' ] ) )
      return S_ERROR( "Invalid action proposal" )
    proposalTuple = retVal[ 'Value' ]
    gLogger.debug( "Received action from client", "/".join( list( proposalTuple[1] ) ) )
    #Check if there are extra credentials
    if proposalTuple[2]:
      clientTransport.setExtraCredentials( proposalTuple[2] )
    #Check if this is the requested service
    requestedService = proposalTuple[0][0]
    if requestedService not in self._validNames:
      return S_ERROR( "%s is not up in this server" % requestedService )
    #Check if the action is valid
    requestedActionType = proposalTuple[1][0]
    if requestedActionType not in Service.SVC_VALID_ACTIONS:
      return S_ERROR( "%s is not a known action type" % requestedActionType )
    #Check if it's authorized
    result = self._authorizeProposal( proposalTuple[1], trid, credDict )
    if not result[ 'OK' ]:
      return result
    #Proposal is OK
    return S_OK( proposalTuple )

  def _authorizeProposal( self, actionTuple, trid, credDict ):
    #Find CS path for the Auth rules
    referedAction = self._isMetaAction( actionTuple[0] )
    if referedAction:
      csAuthPath = "%s/Default" % actionTuple[0]
      hardcodedMethodAuth = self._actions[ 'auth' ][ actionTuple[0] ]
    else:
      if actionTuple[0] == 'RPC':
        csAuthPath = actionTuple[1]
      else:
        csAuthPath = "/".join( actionTuple )
      #Find if there are hardcoded auth rules in the code
      hardcodedMethodAuth = False
      if actionTuple[0] in self._actions[ 'auth' ]:
        hardcodedRulesByType = self._actions[ 'auth' ][ actionTuple[0] ]
        if actionTuple[0] == "FileTransfer":
          methodName = actionTuple[1][0].lower() + actionTuple[1][1:]
        else:
          methodName = actionTuple[1]

        if methodName in hardcodedRulesByType:
          hardcodedMethodAuth = hardcodedRulesByType[ methodName ]
    #Auth time!
    if not self._authMgr.authQuery( csAuthPath, credDict, hardcodedMethodAuth ):
      #Get the identity string
      identity = self._createIdentityString( credDict )
      fromHost = "unknown host"
      tr = self._transportPool.get( trid )
      if tr:
        fromHost = '/'.join( [ str( item ) for item in tr.getRemoteAddress() ] )
      gLogger.warn( "Unauthorized query", "to %s:%s by %s from %s" % ( self._name,
                                                               "/".join( actionTuple ),
                                                               identity, fromHost ) )
      result = S_ERROR( "Unauthorized query" )
    else:
      result = S_OK()

    #Security log
    tr = self._transportPool.get( trid )
    if not tr:
      return S_ERROR( "Client disconnected" )
    sourceAddress = tr.getRemoteAddress()
    identity = self._createIdentityString( credDict )
    Service.SVC_SECLOG_CLIENT.addMessage( result[ 'OK' ], sourceAddress[0], sourceAddress[1], identity,
                                      self._cfg.getHostname(),
                                      self._cfg.getPort(),
                                      self._name, "/".join( actionTuple ) )
    return result

  def _instantiateHandler( self, trid, proposalTuple = None ):
    """
    Generate an instance of the handler for a given service
    """
    #Generate the client params
    clientParams = { 'serviceStartTime' : self._startTime }
    if proposalTuple:
      clientParams[ 'clientSetup' ] = proposalTuple[0][1]
      if len( proposalTuple[0] ) < 3:
        clientParams[ 'clientVO' ] = gConfig.getValue( "/DIRAC/VirtualOrganization", "unknown" )
      else:
        clientParams[ 'clientVO' ] = proposalTuple[0][2]
    clientTransport = self._transportPool.get( trid )
    if clientTransport:
      clientParams[ 'clientAddress' ] = clientTransport.getRemoteAddress()
    #Generate handler dict with per client info
    handlerInitDict = dict( self._serviceInfoDict )
    for key in clientParams:
      handlerInitDict[ key ] = clientParams[ key ]
    #Instantiate and initialize
    try:
      handlerInstance = self._handler[ 'class' ]( handlerInitDict, trid )
      handlerInstance.initialize()
    except Exception as e:
      gLogger.exception( "Server error while loading handler: %s" % str( e ) )
      return S_ERROR( "Server error while loading handler" )
    return S_OK( handlerInstance )

  def _processProposal( self, trid, proposalTuple, handlerObj ):
    #Notify the client we're ready to execute the action
    retVal = self._transportPool.send( trid, S_OK() )
    if not retVal[ 'OK' ]:
      return retVal

    messageConnection = False
    if proposalTuple[1] == ( 'Connection', 'new' ):
      messageConnection = True

    if messageConnection:

      if self._msgBroker.getNumConnections() > self._cfg.getMaxMessagingConnections():
        result = S_ERROR( "Maximum number of connections reached. Try later" )
        result[ 'closeTransport' ] = True
        return result

      #This is a stable connection
      self._msgBroker.addTransportId( trid, self._name,
                                       receiveMessageCallback = self._mbReceivedMsg,
                                       disconnectCallback = self._mbDisconnect,
                                       listenToConnection = False )

    result = self._executeAction( trid, proposalTuple, handlerObj )
    if result[ 'OK' ] and messageConnection:
      self._msgBroker.listenToTransport( trid )
      result = self._mbConnect( trid, handlerObj )
      if not result[ 'OK' ]:
        self._msgBroker.removeTransport( trid )

    result[ 'closeTransport' ] = not messageConnection or not result[ 'OK' ]
    return result

  def _mbConnect( self, trid, handlerObj = None ):
    if not handlerObj:
      result = self._instantiateHandler( trid )
      if not result[ 'OK' ]:
        return result
      handlerObj = result[ 'Value' ]
    return handlerObj._rh_executeConnectionCallback( 'connected' )

  def _executeAction( self, trid, proposalTuple, handlerObj ):
    try:
      return handlerObj._rh_executeAction( proposalTuple )
    except Exception as e:
      gLogger.exception( "Exception while executing handler action" )
      return S_ERROR( "Server error while executing action: %s" % str( e ) )

  def _mbReceivedMsg( self, trid, msgObj ):
    result = self._authorizeProposal( ( 'Message', msgObj.getName() ),
                                      trid,
                                      self._transportPool.get( trid ).getConnectingCredentials() )
    if not result[ 'OK' ]:
      return result
    result = self._instantiateHandler( trid )
    if not result[ 'OK' ]:
      return result
    handlerObj = result[ 'Value' ]
    return handlerObj._rh_executeMessageCallback( msgObj )

  def _mbDisconnect( self, trid ):
    result = self._instantiateHandler( trid )
    if not result[ 'OK' ]:
      return result
    handlerObj = result[ 'Value' ]
    return handlerObj._rh_executeConnectionCallback( 'drop' )


  def __startReportToMonitoring( self ):
    self._monitor.addMark( "Queries" )
    now = time.time()
    stats = os.times()
    cpuTime = stats[0] + stats[2]
    if now - self.__monitorLastStatsUpdate < 0:
      return ( now, cpuTime )
    # Send CPU consumption mark
    wallClock = now - self.__monitorLastStatsUpdate
    self.__monitorLastStatsUpdate = now
    # Send Memory consumption mark
    membytes = MemStat.VmB( 'VmRSS:' )
    if membytes:
      mem = membytes / ( 1024. * 1024. )
      self._monitor.addMark( 'MEM', mem )
    return ( now, cpuTime )

  def __endReportToMonitoring( self, initialWallTime, initialCPUTime ):
    wallTime = time.time() - initialWallTime
    stats = os.times()
    cpuTime = stats[0] + stats[2] - initialCPUTime
    percentage = cpuTime / wallTime * 100.
    if percentage > 0:
      self._monitor.addMark( 'CPU', percentage )
コード例 #23
0
class FTSMonitorAgent( AgentModule ):
  """
  .. class:: FTSMonitorAgent

  Monitor submitted FTS jobs.
  """
  # # transfer DB handle
  transferDB = None
  # # thread pool
  threadPool = None
  # # min threads
  minThreads = 1
  # # max threads
  maxThreads = 10

  # # missing source regexp patterns
  missingSourceErrors = [
    re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] Failed" ),
    re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] No such file or directory" ),
    re.compile( r"SOURCE error during PREPARATION phase: \[INVALID_PATH\] Failed" ),
    re.compile( r"SOURCE error during PREPARATION phase: \[INVALID_PATH\] The requested file either does not exist" ),
    re.compile( r"TRANSFER error during TRANSFER phase: \[INVALID_PATH\] the server sent an error response: 500 500"\
               " Command failed. : open error: No such file or directory" ),
    re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[USER_ERROR\] source file doesnt exist" ) ]

  def initialize( self ):
    """ agent's initialisation """
    self.transferDB = TransferDB()
    self.am_setOption( "shifterProxy", "DataManager" )
    self.minThreads = self.am_getOption( "MinThreads", self.minThreads )
    self.maxThreads = self.am_getOption( "MaxThreads", self.maxThreads )
    minmax = ( abs( self.minThreads ), abs( self.maxThreads ) )
    self.minThreads, self.maxThreads = min( minmax ), max( minmax )
    self.log.info( "ThreadPool min threads = %s" % self.minThreads )
    self.log.info( "ThreadPool max threads = %s" % self.maxThreads )
    self.threadPool = ThreadPool( self.minThreads, self.maxThreads )
    self.threadPool.daemonize()
    return S_OK()

  def execute( self ):
    """ push jobs to the thread pool """
    self.log.info( "Obtaining requests to monitor" )
    res = self.transferDB.getFTSReq()
    if not res["OK"]:
      self.log.error( "Failed to get FTS requests", res['Message'] )
      return res
    if not res["Value"]:
      self.log.info( "No FTS requests found to monitor." )
      return S_OK()
    ftsReqs = res["Value"]
    self.log.info( "Found %s FTS jobs" % len( ftsReqs ) )
    i = 1
    for ftsJob in ftsReqs:
      while True:
        self.log.debug( "submitting FTS Job %s FTSReqID=%s to monitor" % ( i, ftsJob["FTSReqID"] ) )
        ret = self.threadPool.generateJobAndQueueIt( self.monitorTransfer, args = ( ftsJob, ), )
        if ret["OK"]:
          i += 1
          break
        # # sleep 1 second to proceed
        time.sleep( 1 )

    self.threadPool.processAllResults()
    return S_OK()

  def ftsJobExpired( self, ftsReqID, channelID ):
    """ clean up when FTS job had expired on the server side

    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    """
    log = gLogger.getSubLogger( "@%s" % str( ftsReqID ) )
    fileIDs = self.transferDB.getFTSReqFileIDs( ftsReqID )
    if not fileIDs["OK"]:
      log.error( "Unable to retrieve FileIDs associated to %s request" % ftsReqID )
      return fileIDs
    fileIDs = fileIDs["Value"]

    # # update FileToFTS table, this is just a clean up, no worry if somethings goes wrong
    for fileID in fileIDs:
      fileStatus = self.transferDB.setFileToFTSFileAttribute( ftsReqID, fileID,
                                                              "Status", "Failed" )
      if not fileStatus["OK"]:
        log.error( "Unable to set FileToFTS status to 'Failed' for FileID %s: %s" % ( fileID,
                                                                                     fileStatus["Message"] ) )

      failReason = self.transferDB.setFileToFTSFileAttribute( ftsReqID, fileID,
                                                              "Reason", "FTS job expired on server" )
      if not failReason["OK"]:
        log.error( "Unable to set FileToFTS reason for FileID %s: %s" % ( fileID,
                                                                         failReason["Message"] ) )
    # # update Channel table
    resetChannels = self.transferDB.resetFileChannelStatus( channelID, fileIDs )
    if not resetChannels["OK"]:
      log.error( "Failed to reset Channel table for files to retry" )
      return resetChannels

    # # update FTSReq table
    log.info( "Setting FTS request status to 'Finished'" )
    ftsReqStatus = self.transferDB.setFTSReqStatus( ftsReqID, "Finished" )
    if not ftsReqStatus["OK"]:
      log.error( "Failed update FTS Request status", ftsReqStatus["Message"] )
      return ftsReqStatus

    # # if we land here, everything should be OK
    return S_OK()

  def monitorTransfer( self, ftsReqDict ):
    """ monitors transfer obtained from TransferDB

    :param dict ftsReqDict: FTS job dictionary
    """
    ftsReqID = ftsReqDict.get( "FTSReqID" )
    ftsGUID = ftsReqDict.get( "FTSGuid" )
    ftsServer = ftsReqDict.get( "FTSServer" )
    channelID = ftsReqDict.get( "ChannelID" )
    sourceSE = ftsReqDict.get( "SourceSE" )
    targetSE = ftsReqDict.get( "TargetSE" )

    oFTSRequest = FTSRequest()
    oFTSRequest.setFTSServer( ftsServer )
    oFTSRequest.setFTSGUID( ftsGUID )
    oFTSRequest.setSourceSE( sourceSE )
    oFTSRequest.setTargetSE( targetSE )

    log = gLogger.getSubLogger( "@%s" % str( ftsReqID ) )

    #########################################################################
    # Perform summary update of the FTS Request and update FTSReq entries.
    log.info( "Perform summary update of the FTS Request" )
    infoStr = [ "glite-transfer-status -s %s -l %s" % ( ftsServer, ftsGUID ) ]
    infoStr.append( "FTS GUID:   %s" % ftsGUID )
    infoStr.append( "FTS Server: %s" % ftsServer )
    log.info( "\n".join( infoStr ) )
    res = oFTSRequest.summary()
    self.transferDB.setFTSReqLastMonitor( ftsReqID )
    if not res["OK"]:
      log.error( "Failed to update the FTS request summary", res["Message"] )
      if "getTransferJobSummary2: Not authorised to query request" in res["Message"]:
        log.error( "FTS job is not existing at the FTS server anymore, will clean it up on TransferDB side" )
        cleanUp = self.ftsJobExpired( ftsReqID, channelID )
        if not cleanUp["OK"]:
          log.error( cleanUp["Message"] )
        return cleanUp
      return res

    res = oFTSRequest.dumpSummary()
    if not res['OK']:
      log.error( "Failed to get FTS request summary", res["Message"] )
      return res
    log.info( res['Value'] )
    res = oFTSRequest.getPercentageComplete()
    if not res['OK']:
      log.error( "Failed to get FTS percentage complete", res["Message"] )
      return res
    log.info( 'FTS Request found to be %.1f percent complete' % res["Value"] )
    self.transferDB.setFTSReqAttribute( ftsReqID, "PercentageComplete", res["Value"] )
    self.transferDB.addLoggingEvent( ftsReqID, res["Value"] )

    #########################################################################
    # Update the information in the TransferDB if the transfer is terminal.
    res = oFTSRequest.isRequestTerminal()
    if not res["OK"]:
      log.error( "Failed to determine whether FTS request terminal", res["Message"] )
      return res
    if not res["Value"]:
      return S_OK()
    # # request is terminal
    return self.terminalRequest( oFTSRequest, ftsReqID, channelID, sourceSE )

  def terminalRequest( self, oFTSRequest, ftsReqID, channelID, sourceSE ):
    """ process terminal FTS job

    :param FTSRequest oFTSRequest: FTSRequest instance
    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    :param str sourceSE: FTSReq.SourceSE
    """
    log = gLogger.getSubLogger( "@%s" % ftsReqID )

    log.info( "FTS Request found to be terminal, updating file states" )
    #########################################################################
    # Get the LFNS associated to the FTS request
    log.info( "Obtaining the LFNs associated to this request" )
    res = self.transferDB.getFTSReqLFNs( ftsReqID, channelID, sourceSE )
    if not res["OK"]:
      log.error( "Failed to obtain FTS request LFNs", res['Message'] )
      return res
    files = res["Value"]
    if not files:
      log.error( "No files present for transfer" )
      return S_ERROR( "No files were found in the DB" )

    lfns = files.keys()
    log.debug( "Obtained %s files" % len( lfns ) )
    for lfn in lfns:
      oFTSRequest.setLFN( lfn )

    res = oFTSRequest.monitor()
    if not res["OK"]:
      log.error( "Failed to perform detailed monitoring of FTS request", res["Message"] )
      return res
    res = oFTSRequest.getFailed()
    if not res["OK"]:
      log.error( "Failed to obtained failed files for FTS request", res["Message"] )
      return res
    failedFiles = res["Value"]
    res = oFTSRequest.getDone()
    if not res["OK"]:
      log.error( "Failed to obtained successful files for FTS request", res["Message"] )
      return res
    completedFiles = res["Value"]

    # An LFN can be included more than once if it was entered into more than one Request.
    # FTS will only do the transfer once. We need to identify all FileIDs
    res = self.transferDB.getFTSReqFileIDs( ftsReqID )
    if not res["OK"]:
      log.error( "Failed to get FileIDs associated to FTS Request", res["Message"] )
      return res
    fileIDs = res["Value"]
    res = self.transferDB.getAttributesForFilesList( fileIDs, ["LFN"] )
    if not res["OK"]:
      log.error( "Failed to get LFNs associated to FTS Request", res["Message"] )
      return res
    fileIDDict = res["Value"]

    fileToFTSUpdates = []
    completedFileIDs = []
    filesToRetry = []
    filesToFail = []

    for fileID, fileDict in fileIDDict.items():
      lfn = fileDict['LFN']
      if lfn in completedFiles:
        completedFileIDs.append( fileID )
        transferTime = 0
        res = oFTSRequest.getTransferTime( lfn )
        if res["OK"]:
          transferTime = res["Value"]
        fileToFTSUpdates.append( ( fileID, "Completed", "", 0, transferTime ) )

      if lfn in failedFiles:
        failReason = ""
        res = oFTSRequest.getFailReason( lfn )
        if res["OK"]:
          failReason = res["Value"]
        if "Source file/user checksum mismatch" in failReason:
          filesToFail.append( fileID )
          continue
        if self.missingSource( failReason ):
          log.error( "The source SURL does not exist.", "%s %s" % ( lfn, oFTSRequest.getSourceSURL( lfn ) ) )
          filesToFail.append( fileID )
        else:
          filesToRetry.append( fileID )
        log.error( "Failed to replicate file on channel.", "%s %s" % ( channelID, failReason ) )
        fileToFTSUpdates.append( ( fileID, "Failed", failReason, 0, 0 ) )

    # # update TransferDB.FileToFTS table
    updateFileToFTS = self.updateFileToFTS( ftsReqID, channelID,
                                            filesToRetry, filesToFail,
                                            completedFileIDs, fileToFTSUpdates )

    if updateFileToFTS["OK"] and updateFileToFTS["Value"]:
      res = oFTSRequest.finalize()
      if not res["OK"]:
        log.error( "Failed to perform the finalization for the FTS request", res["Message"] )
        return res

      log.info( 'Adding logging event for FTS request' )
      # Now set the FTSReq status to terminal so that it is not monitored again
      res = self.transferDB.addLoggingEvent( ftsReqID, 'Finished' )
      if not res['OK']:
        log.error( 'Failed to add logging event for FTS Request', res['Message'] )

      # update TransferDB.FileToCat table
      updateFileToCat = self.updateFileToCat( oFTSRequest, channelID, fileIDDict, completedFiles, filesToFail )
      if not updateFileToCat["OK"]:
        log.error( updateFileToCat["Message"] )

      log.debug( "Updating FTS request status" )
      res = self.transferDB.setFTSReqStatus( ftsReqID, 'Finished' )
      if not res['OK']:
        log.error( 'Failed update FTS Request status', res['Message'] )
    return S_OK()


  def updateFileToFTS( self, ftsReqID, channelID, filesToRetry, filesToFail, completedFileIDs, fileToFTSUpdates ):
    """ update TransferDB.FileToFTS table for finished request

    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    :param list filesToRetry: FileIDs to retry
    :param list filesToFail: FileIDs for failed files
    :param list completedFileIDs: files completed
    :param list fileToFTSUpdates: ???
    """
    log = gLogger.getSubLogger( "@%s" % ftsReqID )

    allUpdated = True

    res = self.transferDB.resetFileChannelStatus( channelID, filesToRetry ) if filesToRetry else S_OK()
    if not res["OK"]:
      log.error( "Failed to update the Channel table for file to retry.", res["Message"] )
      allUpdated = False

    for fileID in filesToFail:
      log.info( "Updating the Channel table for files to reschedule" )
      res = self.transferDB.setFileToReschedule( fileID )
      if not res["OK"]:
        log.error( "Failed to update Channel table for failed files.", res["Message"] )
        allUpdated = False
      elif res["Value"] == "max reschedule attempt reached":
        log.error( "setting Channel status to 'Failed' : " % res["Value"] )
        res = self.transferDB.setFileChannelStatus( channelID, fileID, 'Failed' )
        if not res["OK"]:
          log.error( "Failed to update Channel table for failed files.", res["Message"] )
          allUpdated = False

    if completedFileIDs:
      res = self.transferDB.updateCompletedChannelStatus( channelID, completedFileIDs )
      if not res["OK"]:
        log.error( "Failed to update the Channel table for successful files.", res["Message"] )
        allUpdated = False
      res = self.transferDB.updateAncestorChannelStatus( channelID, completedFileIDs )
      if not res["OK"]:
        log.error( 'Failed to update the Channel table for ancestors of successful files.', res['Message'] )
        allUpdated = False

    if fileToFTSUpdates:
      res = self.transferDB.setFileToFTSFileAttributes( ftsReqID, channelID, fileToFTSUpdates )
      if not res["OK"]:
        log.error( "Failed to update the FileToFTS table for files.", res["Message"] )
        allUpdated = False

    return S_OK( allUpdated )

  def updateFileToCat( self, oFTSRequest, channelID, fileIDDict, completedFiles, filesToFail ):
    """ update TransferDB.FileToCat table for finished request

    :param FTSRequest oFTSRequest: FTSRequest instance
    :param int ftsReqID: FTSReq.FTSReqID
    :param dict fileIDDict: fileIDs dictionary
    :param int channelID: FTSReq.ChannelID
    """
    res = oFTSRequest.getFailedRegistrations()
    failedRegistrations = res["Value"]
    regFailedFileIDs = []
    regDoneFileIDs = []
    regForgetFileIDs = []
    for fileID, fileDict in fileIDDict.items():
      lfn = fileDict['LFN']

      if lfn in failedRegistrations:
        regFailedFileIDs.append( fileID )
        # if the LFN appears more than once, FileToCat needs to be reset only once
        del failedRegistrations[lfn]
      elif lfn in completedFiles:
        regDoneFileIDs.append( fileID )
      elif fileID in filesToFail:
        regForgetFileIDs.append( fileID )

    res = self.transferDB.setRegistrationWaiting( channelID, regFailedFileIDs ) if regFailedFileIDs else S_OK()
    if not res["OK"]:
      res["Message"] = "Failed to reset entries in FileToCat: %s" % res["Message"]
      return res

    res = self.transferDB.setRegistrationDone( channelID, regDoneFileIDs ) if regDoneFileIDs else S_OK()
    if not res["OK"]:
      res["Message"] = "Failed to set entries Done in FileToCat: %s" % res["Message"]
      return res

    # This entries could also be set to Failed, but currently there is no method to do so.
    res = self.transferDB.setRegistrationDone( channelID, regForgetFileIDs ) if regForgetFileIDs else S_OK()
    if not res["OK"]:
      res["Message"] = "Failed to set entries Done in FileToCat: %s" % res["Message"]
      return res

    return S_OK()

  @classmethod
  def missingSource( cls, failReason ):
    """ check if message sent by FTS server is concering missing source file

    :param str failReason: message sent by FTS server
    """
    for error in cls.missingSourceErrors:
      if error.search( failReason ):
        return 1
    return 0
コード例 #24
0
ファイル: Publisher.py プロジェクト: NathalieRauschmayr/DIRAC
class Publisher:
  """
  Class Publisher is in charge of getting dispersed information, to be published on the web.
  """

#############################################################################

  def __init__(self, VOExtension, rsDBIn = None, commandCallerIn = None, infoGetterIn = None,
               WMSAdminIn = None):
    """
    Standard constructor

    :params:
      :attr:`VOExtension`: string, VO Extension (e.g. 'LHCb')

      :attr:`rsDBIn`: optional ResourceStatusDB object
      (see :class: `DIRAC.ResourceStatusSystem.DB.ResourceStatusDB.ResourceStatusDB`)

      :attr:`commandCallerIn`: optional CommandCaller object
      (see :class: `DIRAC.ResourceStatusSystem.Command.CommandCaller.CommandCaller`)

      :attr:`infoGetterIn`: optional InfoGetter object
      (see :class: `DIRAC.ResourceStatusSystem.Utilities.InfoGetter.InfoGetter`)

      :attr:`WMSAdminIn`: optional RPCClient object for WMSAdmin
      (see :class: `DIRAC.Core.DISET.RPCClient.RPCClient`)
    """

    self.configModule = Utils.voimport("DIRAC.ResourceStatusSystem.Policy.Configurations", VOExtension)

    if rsDBIn is not None:
      self.rsDB = rsDBIn
    else:
      from DIRAC.ResourceStatusSystem.DB.ResourceStatusDB import ResourceStatusDB
      self.rsDB = ResourceStatusDB()

    from DIRAC.ResourceStatusSystem.DB.ResourceManagementDB import ResourceManagementDB
    self.rmDB = ResourceManagementDB()

    if commandCallerIn is not None:
      self.cc = commandCallerIn
    else:
      from DIRAC.ResourceStatusSystem.Command.CommandCaller import CommandCaller
      self.cc = CommandCaller()

    if infoGetterIn is not None:
      self.ig = infoGetterIn
    else:
      from DIRAC.ResourceStatusSystem.Utilities.InfoGetter import InfoGetter
      self.ig = InfoGetter(VOExtension)

    if WMSAdminIn is not None:
      self.WMSAdmin = WMSAdminIn
    else:
      from DIRAC.Core.DISET.RPCClient import RPCClient
      self.WMSAdmin = RPCClient("WorkloadManagement/WMSAdministrator")

    self.threadPool = ThreadPool( 2, 5 )

    self.lockObj = threading.RLock()

    self.infoForPanel_res = {}

#############################################################################

  def getInfo(self, granularity, name, useNewRes = False):
    """
    Standard method to get all the info to be published

    This method uses a ThreadPool (:class:`DIRAC.Core.Utilities.ThreadPool.ThreadPool`)
    with 2-5 threads. The threaded method is
    :meth:`DIRAC.ResourceStatusSystem.Utilities.Publisher.Publisher.getInfoForPanel`

    :params:
      :attr:`granularity`: string - a ValidRes

      :attr:`name`: string - name of the Validres

      :attr:`useNewRes`: boolean. When set to true, will get new results,
      otherwise it will get cached results (where available).
    """

    if granularity not in ValidRes:
      raise InvalidRes, Utils.where(self, self.getInfo)

    self.infoForPanel_res = {}

    status = None
    formerStatus = None
    siteType = None
    serviceType = None
    resourceType = None

    if granularity in ('Resource', 'Resources'):
      try:
        resourceType = self.rsDB.getMonitoredsList('Resource', ['ResourceType'],
                                              resourceName = name)[0][0]
      except IndexError:
        return "%s does not exist!" %name

    if granularity in ('StorageElement', 'StorageElements'):
      try:
        siteType = self.rsDB.getMonitoredsList('StorageElement', ['SiteType'],
                                              storageElementName = name)[0][0]
      except IndexError:
        return "%s does not exist!" %name

    paramNames = ['Type', 'Group', 'Name', 'Policy', 'DIRAC Status',
                  'RSS Status', 'Reason', 'Description']

    infoToGet = self.ig.getInfoToApply(('view_info', ), granularity, status = status,
                                       formerStatus = formerStatus, siteType = siteType,
                                       serviceType = serviceType, resourceType = resourceType,
                                       useNewRes = useNewRes)[0]['Panels']
    infoToGet_res = {}

    recordsList = []

    infosForPolicy = {}

    for panel in infoToGet.keys():

      (granularityForPanel, nameForPanel) = self.__getNameForPanel(granularity, name, panel)

      if not self._resExist(granularityForPanel, nameForPanel):
#        completeInfoForPanel_res = None
        continue

      #take composite RSS result for name
      nameStatus_res = self._getStatus(nameForPanel, panel)

      recordBase = [None, None, None, None, None, None, None, None]

      recordBase[1] = panel.replace('_Panel', '')
      recordBase[2] = nameForPanel #nameForPanel
      try:
        recordBase[4] = nameStatus_res[nameForPanel]['DIRACStatus'] #DIRAC Status
      except:
        pass
      recordBase[5] = nameStatus_res[nameForPanel]['RSSStatus'] #RSS Status

      record = copy.deepcopy(recordBase)
      record[0] = 'ResultsForResource'

      recordsList.append(record)

      #take info that goes into the panel
      infoForPanel = infoToGet[panel]

      for info in infoForPanel:

        self.threadPool.generateJobAndQueueIt(self.getInfoForPanel,
                                              args = (info, granularityForPanel, nameForPanel) )

      self.threadPool.processAllResults()

      for policy in [x.keys()[0] for x in infoForPanel]:
        record = copy.deepcopy(recordBase)
        record[0] = 'SpecificInformation'
        record[3] = policy #policyName
        record[4] = None #DIRAC Status
        record[5] = self.infoForPanel_res[policy]['Status'] #RSS status for the policy
        record[6] = self.infoForPanel_res[policy]['Reason'] #Reason
        record[7] = self.infoForPanel_res[policy]['desc'] #Description
        recordsList.append(record)

        infosForPolicy[policy] = self.infoForPanel_res[policy]['infos']

    infoToGet_res['TotalRecords'] = len(recordsList)
    infoToGet_res['ParameterNames'] = paramNames
    infoToGet_res['Records'] = recordsList

    infoToGet_res['Extras'] = infosForPolicy

    return infoToGet_res

#############################################################################

  def getInfoForPanel(self, info, granularityForPanel, nameForPanel):

    #get single RSS policy results
    policyResToGet = info.keys()[0]
    pol_res = self.rmDB.getPolicyRes(nameForPanel, policyResToGet)
    if pol_res != []:
      pol_res_dict = {'Status' : pol_res[0], 'Reason' : pol_res[1]}
    else:
      pol_res_dict = {'Status' : 'Unknown', 'Reason' : 'Unknown'}
    self.lockObj.acquire()
    try:
      self.infoForPanel_res[policyResToGet] = pol_res_dict
    finally:
      self.lockObj.release()

    #get policy description
    desc = self._getPolicyDesc(policyResToGet)

    #get other info
    othersInfo = info.values()[0]
    if not isinstance(othersInfo, list):
      othersInfo = [othersInfo]

    info_res = {}

    for oi in othersInfo:
      format_ = oi.keys()[0]
      what = oi.values()[0]

      info_bit_got = self._getInfo(granularityForPanel, nameForPanel, format_, what)

      info_res[format_] = info_bit_got

    self.lockObj.acquire()
    try:
      self.infoForPanel_res[policyResToGet]['infos'] = info_res
      self.infoForPanel_res[policyResToGet]['desc'] = desc
    finally:
      self.lockObj.release()

#############################################################################

  def _getStatus(self, name, panel):

    #get RSS status
    RSSStatus = self._getInfoFromRSSDB(name, panel)[0][1]

    #get DIRAC status
    if panel in ('Site_Panel', 'SE_Panel'):

      if panel == 'Site_Panel':
        DIRACStatus = self.WMSAdmin.getSiteMaskLogging(name)
        if DIRACStatus['OK']:
          DIRACStatus = DIRACStatus['Value'][name].pop()[0]
        else:
          raise RSSException, Utils.where(self, self._getStatus)

      elif panel == 'SE_Panel':
        ra = getStorageElementStatus(name, 'ReadAccess')['Value']
        wa = getStorageElementStatus(name, 'WriteAccess')['Value']
        DIRACStatus = {'ReadAccess': ra, 'WriteAccess': wa}

      status = { name : { 'RSSStatus': RSSStatus, 'DIRACStatus': DIRACStatus } }

    else:
      status = { name : { 'RSSStatus': RSSStatus} }


    return status

#############################################################################

  def _getInfo(self, granularity, name, format_, what):

    if format_ == 'RSS':
      info_bit_got = self._getInfoFromRSSDB(name, what)
    else:
      if isinstance(what, dict):
        command = what['CommandIn']
        extraArgs = what['args']
      else:
        command = what
        extraArgs = None

      info_bit_got = self.cc.commandInvocation(granularity, name, None,
                                               None, command, extraArgs)

      try:
        info_bit_got = info_bit_got['Result']
      except:
        pass

    return info_bit_got

#############################################################################

  def _getInfoFromRSSDB(self, name, what):

    paramsL = ['Status']

    siteName = None
    serviceName = None
    resourceName = None
    storageElementName = None
    serviceType = None
    gridSiteName = None

    if what == 'ServiceOfSite':
      gran = 'Service'
      paramsL.insert(0, 'ServiceName')
      paramsL.append('Reason')
      siteName = name
    elif what == 'ResOfCompService':
      gran = 'Resources'
      paramsL.insert(0, 'ResourceName')
      paramsL.append('Reason')
      serviceType = name.split('@')[0]
      gridSiteName = getGOCSiteName(name.split('@')[1])
      if not gridSiteName['OK']:
        raise RSSException, gridSiteName['Message']
      gridSiteName = gridSiteName['Value']
    elif what == 'ResOfStorService':
      gran = 'Resources'
      paramsL.insert(0, 'ResourceName')
      paramsL.append('Reason')
      serviceType = name.split('@')[0]
      gridSiteName = getGOCSiteName(name.split('@')[1])
      if not gridSiteName['OK']:
        raise RSSException, gridSiteName['Message']
      gridSiteName = gridSiteName['Value']
    elif what == 'ResOfStorEl':
      gran = 'StorageElements'
      paramsL.insert(0, 'ResourceName')
      paramsL.append('Reason')
      storageElementName = name
    elif what == 'StorageElementsOfSite':
      gran = 'StorageElements'
      paramsL.insert(0, 'StorageElementName')
      paramsL.append('Reason')
      if '@' in name:
        DIRACsiteName = name.split('@').pop()
      else:
        DIRACsiteName = name
      gridSiteName = getGOCSiteName(DIRACsiteName)
      if not gridSiteName['OK']:
        raise RSSException, gridSiteName['Message']
      gridSiteName = gridSiteName['Value']
    elif what == 'Site_Panel':
      gran = 'Site'
      paramsL.insert(0, 'SiteName')
      siteName = name
    elif what == 'Service_Computing_Panel':
      gran = 'Service'
      paramsL.insert(0, 'ServiceName')
      serviceName = name
    elif what == 'Service_Storage_Panel':
      gran = 'Service'
      paramsL.insert(0, 'ServiceName')
      serviceName = name
    elif what == 'Service_VO-BOX_Panel':
      gran = 'Services'
      paramsL.insert(0, 'ServiceName')
      serviceName = name
    elif what == 'Service_VOMS_Panel':
      gran = 'Services'
      paramsL.insert(0, 'ServiceName')
      serviceName = name
    elif what == 'Resource_Panel':
      gran = 'Resource'
      paramsL.insert(0, 'ResourceName')
      resourceName = name
    elif what == 'SE_Panel':
      gran = 'StorageElement'
      paramsL.insert(0, 'StorageElementName')
      storageElementName = name

    info_bit_got = self.rsDB.getMonitoredsList(gran, paramsList = paramsL, siteName = siteName,
                                               serviceName = serviceName, serviceType = serviceType,
                                               resourceName = resourceName,
                                               storageElementName = storageElementName,
                                               gridSiteName = gridSiteName)

    return info_bit_got

#############################################################################

  def _getPolicyDesc(self, policyName):

    return self.configModule.Policies[policyName]['Description']

#############################################################################

  def __getNameForPanel(self, granularity, name, panel):

    if granularity in ('Site', 'Sites'):
      if panel == 'Service_Computing_Panel':
        granularity = 'Service'
        name = 'Computing@' + name
      elif panel == 'Service_Storage_Panel':
        granularity = 'Service'
        name = 'Storage@' + name
      elif panel == 'OtherServices_Panel':
        granularity = 'Service'
        name = 'OtherS@' + name
      elif panel == 'Service_VOMS_Panel':
        granularity = 'Service'
        name = 'VOMS@' + name
      elif panel == 'Service_VO-BOX_Panel':
        granularity = 'Service'
        name = 'VO-BOX@' + name
#      else:
#        granularity = granularity
#        name = name
#    else:
#      granularity = granularity
#      name = name

    return (granularity, name)

#############################################################################

  def _resExist(self, granularity, name):

    siteName = None
    serviceName = None
    resourceName = None
    storageElementName = None

    if granularity in ('Site', 'Sites'):
      siteName = name
    elif granularity in ('Service', 'Services'):
      serviceName = name
    elif granularity in ('Resource', 'Resources'):
      resourceName = name
    elif granularity in ('StorageElement', 'StorageElements'):
      storageElementName = name

    res = self.rsDB.getMonitoredsList(granularity, siteName = siteName,
                                      serviceName = serviceName, resourceName = resourceName,
                                      storageElementName = storageElementName)

    if res == []:
      return False
    else:
      return True
コード例 #25
0
ファイル: RSInspectorAgent.py プロジェクト: vfalbor/DIRAC
class RSInspectorAgent(AgentModule):
    """ Class RSInspectorAgent is in charge of going through Resources
      table, and pass Resource and Status to the PEP
  """

    #############################################################################

    def initialize(self):
        """ Standard constructor
    """

        try:
            self.rsDB = ResourceStatusDB()
            self.rmDB = ResourceManagementDB()

            self.ResourcesToBeChecked = Queue.Queue()
            self.ResourceNamesInCheck = []

            self.maxNumberOfThreads = self.am_getOption('maxThreadsInPool', 1)
            self.threadPool = ThreadPool(self.maxNumberOfThreads,
                                         self.maxNumberOfThreads)

            if not self.threadPool:
                self.log.error('Can not create Thread Pool')
                return S_ERROR('Can not create Thread Pool')

            self.setup = getSetup()['Value']

            self.VOExtension = getExt()

            configModule = __import__(
                self.VOExtension +
                "DIRAC.ResourceStatusSystem.Policy.Configurations", globals(),
                locals(), ['*'])

            self.Resources_check_freq = copy.deepcopy(
                configModule.Resources_check_freq)

            self.nc = NotificationClient()

            self.diracAdmin = DiracAdmin()

            self.csAPI = CSAPI()

            for i in xrange(self.maxNumberOfThreads):
                self.threadPool.generateJobAndQueueIt(self._executeCheck,
                                                      args=(None, ))

            return S_OK()

        except Exception:
            errorStr = "RSInspectorAgent initialization"
            gLogger.exception(errorStr)
            return S_ERROR(errorStr)

#############################################################################

    def execute(self):
        """ 
    The main RSInspectorAgent execution method.
    Calls :meth:`DIRAC.ResourceStatusSystem.DB.ResourceStatusDB.getResourcesToCheck` and 
    put result in self.ResourcesToBeChecked (a Queue) and in self.ResourceNamesInCheck (a list)
    """

        try:

            res = self.rsDB.getStuffToCheck('Resources',
                                            self.Resources_check_freq)

            for resourceTuple in res:
                if resourceTuple[0] in self.ResourceNamesInCheck:
                    break
                resourceL = ['Resource']
                for x in resourceTuple:
                    resourceL.append(x)
                self.ResourceNamesInCheck.insert(0, resourceL[1])
                self.ResourcesToBeChecked.put(resourceL)

            return S_OK()

        except Exception, x:
            errorStr = where(self, self.execute)
            gLogger.exception(errorStr, lException=x)
            return S_ERROR(errorStr)
コード例 #26
0
class SiteInspectorAgent( AgentModule ):
  """ SiteInspectorAgent

  The SiteInspectorAgent agent is an agent that is used to get the all the site names
  and trigger PEP to evaluate their status.

  """

  # Max number of worker threads by default
  __maxNumberOfThreads = 15

  # Inspection freqs, defaults, the lower, the higher priority to be checked.
  # Error state usually means there is a glitch somewhere, so it has the highest
  # priority.
  __checkingFreqs = {'Active'   : 20,
                     'Degraded' : 20,
                     'Probing'  : 20,
                     'Banned'   : 15,
                     'Unknown'  : 10,
                     'Error'    : 5}


  def __init__( self, *args, **kwargs ):

    AgentModule.__init__( self, *args, **kwargs )

    # ElementType, to be defined among Site, Resource or Node
    self.sitesToBeChecked    = None
    self.threadPool          = None
    self.siteClient          = None
    self.clients             = {}


  def initialize( self ):
    """ Standard initialize.
    """

    maxNumberOfThreads = self.am_getOption( 'maxNumberOfThreads', self.__maxNumberOfThreads )
    self.threadPool    = ThreadPool( maxNumberOfThreads, maxNumberOfThreads )

    self.siteClient  = SiteStatus()

    self.clients['SiteStatus']               = self.siteClient
    self.clients['ResourceManagementClient'] = ResourceManagementClient()

    return S_OK()

  def execute( self ):
    """ execute

    This is the main method of the agent. It gets the sites from the Database, calculates how many threads should be
    started and spawns them. Each thread will get a site from the queue until
    it is empty. At the end, the method will join the queue such that the agent
    will not terminate a cycle until all sites have been processed.

    """

    # Gets sites to be checked ( returns a Queue )
    sitesToBeChecked = self.getSitesToBeChecked()
    if not sitesToBeChecked['OK']:
      self.log.error( sitesToBeChecked['Message'] )
      return sitesToBeChecked
    self.sitesToBeChecked = sitesToBeChecked['Value']

    queueSize = self.sitesToBeChecked.qsize()
    pollingTime = self.am_getPollingTime()

    # Assigns number of threads on the fly such that we exhaust the PollingTime
    # without having to spawn too many threads. We assume 10 seconds per element
    # to be processed ( actually, it takes something like 1 sec per element ):
    # numberOfThreads = elements * 10(s/element) / pollingTime
    numberOfThreads = int( math.ceil( queueSize * 10. / pollingTime ) )

    self.log.info( 'Needed %d threads to process %d elements' % ( numberOfThreads, queueSize ) )

    for _x in xrange( numberOfThreads ):
      jobUp = self.threadPool.generateJobAndQueueIt( self._execute )
      if not jobUp['OK']:
        self.log.error( jobUp['Message'] )

    self.log.info( 'blocking until all sites have been processed' )
    # block until all tasks are done
    self.sitesToBeChecked.join()
    self.log.info( 'done')

    return S_OK()


  def getSitesToBeChecked( self ):
    """ getElementsToBeChecked

    This method gets all the site names from the SiteStatus table, after that it get the details of each
    site (status, name, etc..) and adds them to a queue.

    """

    toBeChecked = Queue.Queue()

    res = self.siteClient.getSites('All')
    if not res['OK']:
      return res

    # get the current status
    res = self.siteClient.getSiteStatuses( res['Value'] )
    if not res['OK']:
      return res

    # filter elements
    for site in res['Value']:
      status = res['Value'].get(site, 'Unknown')

      toBeChecked.put( { 'status': status,
                         'name': site,
                         'site' : site,
                         'element' : 'Site',
                         'statusType': 'all',
                         'elementType': 'Site' } )

    return S_OK( toBeChecked )


  # Private methods ............................................................

  def _execute( self ):
    """
      Method run by each of the thread that is in the ThreadPool.
      It enters a loop until there are no sites on the queue.

      On each iteration, it evaluates the policies for such site
      and enforces the necessary actions. If there are no more sites in the
      queue, the loop is finished.
    """

    pep = PEP( clients = self.clients )

    while True:

      try:
        site = self.sitesToBeChecked.get_nowait()
      except Queue.Empty:
        return S_OK()

      resEnforce = pep.enforce( site )
      if not resEnforce['OK']:
        self.log.error( 'Failed policy enforcement', resEnforce['Message'] )
        self.sitesToBeChecked.task_done()
        continue

      # Used together with join !
      self.sitesToBeChecked.task_done()
コード例 #27
0
ファイル: FTSMonitorAgent.py プロジェクト: ptakha/DIRAC-1
class FTSMonitorAgent(AgentModule):
    """
  .. class:: FTSMonitorAgent

  Monitor submitted FTS jobs.
  """
    # # transfer DB handle
    transferDB = None
    # # thread pool
    threadPool = None
    # # min threads
    minThreads = 1
    # # max threads
    maxThreads = 10

    # # missing source regexp patterns
    missingSourceErrors = [
      re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] Failed" ),
      re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] No such file or directory" ),
      re.compile( r"SOURCE error during PREPARATION phase: \[INVALID_PATH\] Failed" ),
      re.compile( r"SOURCE error during PREPARATION phase: \[INVALID_PATH\] The requested file either does not exist" ),
      re.compile( r"TRANSFER error during TRANSFER phase: \[INVALID_PATH\] the server sent an error response: 500 500"\
                 " Command failed. : open error: No such file or directory" ),
      re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[USER_ERROR\] source file doesnt exist" ) ]

    def initialize(self):
        """ agent's initialisation """
        self.transferDB = TransferDB()
        self.am_setOption("shifterProxy", "DataManager")
        self.minThreads = self.am_getOption("MinThreads", self.minThreads)
        self.maxThreads = self.am_getOption("MaxThreads", self.maxThreads)
        minmax = (abs(self.minThreads), abs(self.maxThreads))
        self.minThreads, self.maxThreads = min(minmax), max(minmax)
        self.log.info("ThreadPool min threads = %s" % self.minThreads)
        self.log.info("ThreadPool max threads = %s" % self.maxThreads)
        self.threadPool = ThreadPool(self.minThreads, self.maxThreads)
        self.threadPool.daemonize()
        return S_OK()

    def execute(self):
        """ push jobs to the thread pool """
        self.log.info("Obtaining requests to monitor")
        res = self.transferDB.getFTSReq()
        if not res["OK"]:
            self.log.error("Failed to get FTS requests", res['Message'])
            return res
        if not res["Value"]:
            self.log.info("No FTS requests found to monitor.")
            return S_OK()
        ftsReqs = res["Value"]
        self.log.info("Found %s FTS jobs" % len(ftsReqs))
        i = 1
        for ftsJob in ftsReqs:
            while True:
                self.log.debug("submitting FTS Job %s FTSReqID=%s to monitor" %
                               (i, ftsJob["FTSReqID"]))
                ret = self.threadPool.generateJobAndQueueIt(
                    self.monitorTransfer,
                    args=(ftsJob, ),
                )
                if ret["OK"]:
                    i += 1
                    break
                # # sleep 1 second to proceed
                time.sleep(1)

        self.threadPool.processAllResults()
        return S_OK()

    def ftsJobExpired(self, ftsReqID, channelID):
        """ clean up when FTS job had expired on the server side

    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    """
        log = gLogger.getSubLogger("@%s" % str(ftsReqID))
        fileIDs = self.transferDB.getFTSReqFileIDs(ftsReqID)
        if not fileIDs["OK"]:
            log.error("Unable to retrieve FileIDs associated to %s request" %
                      ftsReqID)
            return fileIDs
        fileIDs = fileIDs["Value"]

        # # update FileToFTS table, this is just a clean up, no worry if somethings goes wrong
        for fileID in fileIDs:
            fileStatus = self.transferDB.setFileToFTSFileAttribute(
                ftsReqID, fileID, "Status", "Failed")
            if not fileStatus["OK"]:
                log.error(
                    "Unable to set FileToFTS status to 'Failed' for FileID %s: %s"
                    % (fileID, fileStatus["Message"]))

            failReason = self.transferDB.setFileToFTSFileAttribute(
                ftsReqID, fileID, "Reason", "FTS job expired on server")
            if not failReason["OK"]:
                log.error("Unable to set FileToFTS reason for FileID %s: %s" %
                          (fileID, failReason["Message"]))
        # # update Channel table
        resetChannels = self.transferDB.resetFileChannelStatus(
            channelID, fileIDs)
        if not resetChannels["OK"]:
            log.error("Failed to reset Channel table for files to retry")
            return resetChannels

        # # update FTSReq table
        log.info("Setting FTS request status to 'Finished'")
        ftsReqStatus = self.transferDB.setFTSReqStatus(ftsReqID, "Finished")
        if not ftsReqStatus["OK"]:
            log.error("Failed update FTS Request status",
                      ftsReqStatus["Message"])
            return ftsReqStatus

        # # if we land here, everything should be OK
        return S_OK()

    def monitorTransfer(self, ftsReqDict):
        """ monitors transfer obtained from TransferDB

    :param dict ftsReqDict: FTS job dictionary
    """
        ftsReqID = ftsReqDict.get("FTSReqID")
        ftsGUID = ftsReqDict.get("FTSGuid")
        ftsServer = ftsReqDict.get("FTSServer")
        channelID = ftsReqDict.get("ChannelID")
        sourceSE = ftsReqDict.get("SourceSE")
        targetSE = ftsReqDict.get("TargetSE")

        oFTSRequest = FTSRequest()
        oFTSRequest.setFTSServer(ftsServer)
        oFTSRequest.setFTSGUID(ftsGUID)
        oFTSRequest.setSourceSE(sourceSE)
        oFTSRequest.setTargetSE(targetSE)

        log = gLogger.getSubLogger("@%s" % str(ftsReqID))

        #########################################################################
        # Perform summary update of the FTS Request and update FTSReq entries.
        log.info("Perform summary update of the FTS Request")
        infoStr = ["glite-transfer-status -s %s -l %s" % (ftsServer, ftsGUID)]
        infoStr.append("FTS GUID:   %s" % ftsGUID)
        infoStr.append("FTS Server: %s" % ftsServer)
        log.info("\n".join(infoStr))
        res = oFTSRequest.summary()
        self.transferDB.setFTSReqLastMonitor(ftsReqID)
        if not res["OK"]:
            log.error("Failed to update the FTS request summary",
                      res["Message"])
            if "getTransferJobSummary2: Not authorised to query request" in res[
                    "Message"]:
                log.error(
                    "FTS job is not existing at the FTS server anymore, will clean it up on TransferDB side"
                )
                cleanUp = self.ftsJobExpired(ftsReqID, channelID)
                if not cleanUp["OK"]:
                    log.error(cleanUp["Message"])
                return cleanUp
            return res

        res = oFTSRequest.dumpSummary()
        if not res['OK']:
            log.error("Failed to get FTS request summary", res["Message"])
            return res
        log.info(res['Value'])
        res = oFTSRequest.getPercentageComplete()
        if not res['OK']:
            log.error("Failed to get FTS percentage complete", res["Message"])
            return res
        log.info('FTS Request found to be %.1f percent complete' %
                 res["Value"])
        self.transferDB.setFTSReqAttribute(ftsReqID, "PercentageComplete",
                                           res["Value"])
        self.transferDB.addLoggingEvent(ftsReqID, res["Value"])

        #########################################################################
        # Update the information in the TransferDB if the transfer is terminal.
        res = oFTSRequest.isRequestTerminal()
        if not res["OK"]:
            log.error("Failed to determine whether FTS request terminal",
                      res["Message"])
            return res
        if not res["Value"]:
            return S_OK()
        # # request is terminal
        return self.terminalRequest(oFTSRequest, ftsReqID, channelID, sourceSE)

    def terminalRequest(self, oFTSRequest, ftsReqID, channelID, sourceSE):
        """ process terminal FTS job

    :param FTSRequest oFTSRequest: FTSRequest instance
    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    :param str sourceSE: FTSReq.SourceSE
    """
        log = gLogger.getSubLogger("@%s" % ftsReqID)

        log.info("FTS Request found to be terminal, updating file states")
        #########################################################################
        # Get the LFNS associated to the FTS request
        log.info("Obtaining the LFNs associated to this request")
        res = self.transferDB.getFTSReqLFNs(ftsReqID, channelID, sourceSE)
        if not res["OK"]:
            log.error("Failed to obtain FTS request LFNs", res['Message'])
            return res
        files = res["Value"]
        if not files:
            log.error("No files present for transfer")
            return S_ERROR("No files were found in the DB")

        lfns = files.keys()
        log.debug("Obtained %s files" % len(lfns))
        for lfn in lfns:
            oFTSRequest.setLFN(lfn)

        res = oFTSRequest.monitor()
        if not res["OK"]:
            log.error("Failed to perform detailed monitoring of FTS request",
                      res["Message"])
            return res
        res = oFTSRequest.getFailed()
        if not res["OK"]:
            log.error("Failed to obtained failed files for FTS request",
                      res["Message"])
            return res
        failedFiles = res["Value"]
        res = oFTSRequest.getDone()
        if not res["OK"]:
            log.error("Failed to obtained successful files for FTS request",
                      res["Message"])
            return res
        completedFiles = res["Value"]

        # An LFN can be included more than once if it was entered into more than one Request.
        # FTS will only do the transfer once. We need to identify all FileIDs
        res = self.transferDB.getFTSReqFileIDs(ftsReqID)
        if not res["OK"]:
            log.error("Failed to get FileIDs associated to FTS Request",
                      res["Message"])
            return res
        fileIDs = res["Value"]
        res = self.transferDB.getAttributesForFilesList(fileIDs, ["LFN"])
        if not res["OK"]:
            log.error("Failed to get LFNs associated to FTS Request",
                      res["Message"])
            return res
        fileIDDict = res["Value"]

        fileToFTSUpdates = []
        completedFileIDs = []
        filesToRetry = []
        filesToFail = []

        for fileID, fileDict in fileIDDict.items():
            lfn = fileDict['LFN']
            if lfn in completedFiles:
                completedFileIDs.append(fileID)
                transferTime = 0
                res = oFTSRequest.getTransferTime(lfn)
                if res["OK"]:
                    transferTime = res["Value"]
                fileToFTSUpdates.append(
                    (fileID, "Completed", "", 0, transferTime))

            if lfn in failedFiles:
                failReason = ""
                res = oFTSRequest.getFailReason(lfn)
                if res["OK"]:
                    failReason = res["Value"]
                if "Source file/user checksum mismatch" in failReason:
                    filesToFail.append(fileID)
                    continue
                if self.missingSource(failReason):
                    log.error("The source SURL does not exist.",
                              "%s %s" % (lfn, oFTSRequest.getSourceSURL(lfn)))
                    filesToFail.append(fileID)
                else:
                    filesToRetry.append(fileID)
                log.error("Failed to replicate file on channel.",
                          "%s %s" % (channelID, failReason))
                fileToFTSUpdates.append((fileID, "Failed", failReason, 0, 0))

        # # update TransferDB.FileToFTS table
        updateFileToFTS = self.updateFileToFTS(ftsReqID, channelID,
                                               filesToRetry, filesToFail,
                                               completedFileIDs,
                                               fileToFTSUpdates)

        if updateFileToFTS["OK"] and updateFileToFTS["Value"]:
            res = oFTSRequest.finalize()
            if not res["OK"]:
                log.error(
                    "Failed to perform the finalization for the FTS request",
                    res["Message"])
                return res

            log.info('Adding logging event for FTS request')
            # Now set the FTSReq status to terminal so that it is not monitored again
            res = self.transferDB.addLoggingEvent(ftsReqID, 'Finished')
            if not res['OK']:
                log.error('Failed to add logging event for FTS Request',
                          res['Message'])

            # update TransferDB.FileToCat table
            updateFileToCat = self.updateFileToCat(oFTSRequest, channelID,
                                                   fileIDDict, completedFiles,
                                                   filesToFail)
            if not updateFileToCat["OK"]:
                log.error(updateFileToCat["Message"])

            log.debug("Updating FTS request status")
            res = self.transferDB.setFTSReqStatus(ftsReqID, 'Finished')
            if not res['OK']:
                log.error('Failed update FTS Request status', res['Message'])
        return S_OK()

    def updateFileToFTS(self, ftsReqID, channelID, filesToRetry, filesToFail,
                        completedFileIDs, fileToFTSUpdates):
        """ update TransferDB.FileToFTS table for finished request

    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    :param list filesToRetry: FileIDs to retry
    :param list filesToFail: FileIDs for failed files
    :param list completedFileIDs: files completed
    :param list fileToFTSUpdates: ???
    """
        log = gLogger.getSubLogger("@%s" % ftsReqID)

        allUpdated = True

        res = self.transferDB.resetFileChannelStatus(
            channelID, filesToRetry) if filesToRetry else S_OK()
        if not res["OK"]:
            log.error("Failed to update the Channel table for file to retry.",
                      res["Message"])
            allUpdated = False

        for fileID in filesToFail:
            log.info("Updating the Channel table for files to reschedule")
            res = self.transferDB.setFileToReschedule(fileID)
            if not res["OK"]:
                log.error("Failed to update Channel table for failed files.",
                          res["Message"])
                allUpdated = False
            elif res["Value"] == "max reschedule attempt reached":
                log.error("setting Channel status to 'Failed' : " %
                          res["Value"])
                res = self.transferDB.setFileChannelStatus(
                    channelID, fileID, 'Failed')
                if not res["OK"]:
                    log.error(
                        "Failed to update Channel table for failed files.",
                        res["Message"])
                    allUpdated = False

        if completedFileIDs:
            res = self.transferDB.updateCompletedChannelStatus(
                channelID, completedFileIDs)
            if not res["OK"]:
                log.error(
                    "Failed to update the Channel table for successful files.",
                    res["Message"])
                allUpdated = False
            res = self.transferDB.updateAncestorChannelStatus(
                channelID, completedFileIDs)
            if not res["OK"]:
                log.error(
                    'Failed to update the Channel table for ancestors of successful files.',
                    res['Message'])
                allUpdated = False

        if fileToFTSUpdates:
            res = self.transferDB.setFileToFTSFileAttributes(
                ftsReqID, channelID, fileToFTSUpdates)
            if not res["OK"]:
                log.error("Failed to update the FileToFTS table for files.",
                          res["Message"])
                allUpdated = False

        return S_OK(allUpdated)

    def updateFileToCat(self, oFTSRequest, channelID, fileIDDict,
                        completedFiles, filesToFail):
        """ update TransferDB.FileToCat table for finished request

    :param FTSRequest oFTSRequest: FTSRequest instance
    :param int ftsReqID: FTSReq.FTSReqID
    :param dict fileIDDict: fileIDs dictionary
    :param int channelID: FTSReq.ChannelID
    """
        res = oFTSRequest.getFailedRegistrations()
        failedRegistrations = res["Value"]
        regFailedFileIDs = []
        regDoneFileIDs = []
        regForgetFileIDs = []
        for fileID, fileDict in fileIDDict.items():
            lfn = fileDict['LFN']

            if lfn in failedRegistrations:
                regFailedFileIDs.append(fileID)
                # if the LFN appears more than once, FileToCat needs to be reset only once
                del failedRegistrations[lfn]
            elif lfn in completedFiles:
                regDoneFileIDs.append(fileID)
            elif fileID in filesToFail:
                regForgetFileIDs.append(fileID)

        res = self.transferDB.setRegistrationWaiting(
            channelID, regFailedFileIDs) if regFailedFileIDs else S_OK()
        if not res["OK"]:
            res["Message"] = "Failed to reset entries in FileToCat: %s" % res[
                "Message"]
            return res

        res = self.transferDB.setRegistrationDone(
            channelID, regDoneFileIDs) if regDoneFileIDs else S_OK()
        if not res["OK"]:
            res["Message"] = "Failed to set entries Done in FileToCat: %s" % res[
                "Message"]
            return res

        # This entries could also be set to Failed, but currently there is no method to do so.
        res = self.transferDB.setRegistrationDone(
            channelID, regForgetFileIDs) if regForgetFileIDs else S_OK()
        if not res["OK"]:
            res["Message"] = "Failed to set entries Done in FileToCat: %s" % res[
                "Message"]
            return res

        return S_OK()

    @classmethod
    def missingSource(cls, failReason):
        """ check if message sent by FTS server is concering missing source file

    :param str failReason: message sent by FTS server
    """
        for error in cls.missingSourceErrors:
            if error.search(failReason):
                return 1
        return 0
コード例 #28
0
class Publisher:
    """
  Class Publisher is in charge of getting dispersed information, to be published on the web.
  """

    #############################################################################

    def __init__(self,
                 VOExtension,
                 rsDBIn=None,
                 commandCallerIn=None,
                 infoGetterIn=None,
                 WMSAdminIn=None):
        """
    Standard constructor

    :params:
      :attr:`VOExtension`: string, VO Extension (e.g. 'LHCb')

      :attr:`rsDBIn`: optional ResourceStatusDB object
      (see :class: `DIRAC.ResourceStatusSystem.DB.ResourceStatusDB.ResourceStatusDB`)

      :attr:`commandCallerIn`: optional CommandCaller object
      (see :class: `DIRAC.ResourceStatusSystem.Command.CommandCaller.CommandCaller`)

      :attr:`infoGetterIn`: optional InfoGetter object
      (see :class: `DIRAC.ResourceStatusSystem.Utilities.InfoGetter.InfoGetter`)

      :attr:`WMSAdminIn`: optional RPCClient object for WMSAdmin
      (see :class: `DIRAC.Core.DISET.RPCClient.RPCClient`)
    """

        self.configModule = __import__(
            VOExtension + "DIRAC.ResourceStatusSystem.Policy.Configurations",
            globals(), locals(), ['*'])

        if rsDBIn is not None:
            self.rsDB = rsDBIn
        else:
            from DIRAC.ResourceStatusSystem.DB.ResourceStatusDB import ResourceStatusDB
            self.rsDB = ResourceStatusDB()

        if commandCallerIn is not None:
            self.cc = commandCallerIn
        else:
            from DIRAC.ResourceStatusSystem.Command.CommandCaller import CommandCaller
            self.cc = CommandCaller()

        if infoGetterIn is not None:
            self.ig = infoGetterIn
        else:
            from DIRAC.ResourceStatusSystem.Utilities.InfoGetter import InfoGetter
            self.ig = InfoGetter(VOExtension)

        if WMSAdminIn is not None:
            self.WMSAdmin = WMSAdminIn
        else:
            from DIRAC.Core.DISET.RPCClient import RPCClient
            self.WMSAdmin = RPCClient("WorkloadManagement/WMSAdministrator")

        self.threadPool = ThreadPool(2, 5)

        self.lockObj = threading.RLock()

        self.infoForPanel_res = {}

#############################################################################

    def getInfo(self, granularity, name, useNewRes=False):
        """
    Standard method to get all the info to be published

    This method uses a ThreadPool (:class:`DIRAC.Core.Utilities.ThreadPool.ThreadPool`)
    with 2-5 threads. The threaded method is
    :meth:`DIRAC.ResourceStatusSystem.Utilities.Publisher.Publisher.getInfoForPanel`

    :params:
      :attr:`granularity`: string - a ValidRes

      :attr:`name`: string - name of the Validres

      :attr:`useNewRes`: boolean. When set to true, will get new results,
      otherwise it will get cached results (where available).
    """

        if granularity not in ValidRes:
            raise InvalidRes, where(self, self.getInfo)

        self.infoForPanel_res = {}

        status = None
        formerStatus = None
        siteType = None
        serviceType = None
        resourceType = None

        if granularity in ('Resource', 'Resources'):
            try:
                resourceType = self.rsDB.getMonitoredsList(
                    'Resource', ['ResourceType'], resourceName=name)[0][0]
            except IndexError:
                return "%s does not exist!" % name

        if granularity in ('StorageElement', 'StorageElements'):
            try:
                siteType = self.rsDB.getMonitoredsList(
                    'StorageElement', ['SiteType'],
                    storageElementName=name)[0][0]
            except IndexError:
                return "%s does not exist!" % name

        paramNames = [
            'Type', 'Group', 'Name', 'Policy', 'DIRAC Status', 'RSS Status',
            'Reason', 'Description'
        ]

        infoToGet = self.ig.getInfoToApply(('view_info', ),
                                           granularity,
                                           status=status,
                                           formerStatus=formerStatus,
                                           siteType=siteType,
                                           serviceType=serviceType,
                                           resourceType=resourceType,
                                           useNewRes=useNewRes)[0]['Panels']
        infoToGet_res = {}

        recordsList = []

        infosForPolicy = {}

        for panel in infoToGet.keys():

            (granularityForPanel,
             nameForPanel) = self.__getNameForPanel(granularity, name, panel)

            if not self._resExist(granularityForPanel, nameForPanel):
                #        completeInfoForPanel_res = None
                continue

            #take composite RSS result for name
            nameStatus_res = self._getStatus(nameForPanel, panel)

            recordBase = [None, None, None, None, None, None, None, None]

            recordBase[1] = panel.replace('_Panel', '')
            recordBase[2] = nameForPanel  #nameForPanel
            try:
                recordBase[4] = nameStatus_res[nameForPanel][
                    'DIRACStatus']  #DIRAC Status
            except:
                pass
            recordBase[5] = nameStatus_res[nameForPanel][
                'RSSStatus']  #RSS Status

            record = copy.deepcopy(recordBase)
            record[0] = 'ResultsForResource'

            recordsList.append(record)

            #take info that goes into the panel
            infoForPanel = infoToGet[panel]

            for info in infoForPanel:

                self.threadPool.generateJobAndQueueIt(
                    self.getInfoForPanel,
                    args=(info, granularityForPanel, nameForPanel))

            self.threadPool.processAllResults()

            for policy in [x.keys()[0] for x in infoForPanel]:
                record = copy.deepcopy(recordBase)
                record[0] = 'SpecificInformation'
                record[3] = policy  #policyName
                record[4] = None  #DIRAC Status
                record[5] = self.infoForPanel_res[policy][
                    'Status']  #RSS status for the policy
                record[6] = self.infoForPanel_res[policy]['Reason']  #Reason
                record[7] = self.infoForPanel_res[policy]['desc']  #Description
                recordsList.append(record)

                infosForPolicy[policy] = self.infoForPanel_res[policy]['infos']

        infoToGet_res['TotalRecords'] = len(recordsList)
        infoToGet_res['ParameterNames'] = paramNames
        infoToGet_res['Records'] = recordsList

        infoToGet_res['Extras'] = infosForPolicy

        return infoToGet_res

#############################################################################

    def getInfoForPanel(self, info, granularityForPanel, nameForPanel):

        #get single RSS policy results
        policyResToGet = info.keys()[0]
        pol_res = self.rsDB.getPolicyRes(nameForPanel, policyResToGet)
        if pol_res != []:
            pol_res_dict = {'Status': pol_res[0], 'Reason': pol_res[1]}
        else:
            pol_res_dict = {'Status': 'Unknown', 'Reason': 'Unknown'}
        self.lockObj.acquire()
        try:
            self.infoForPanel_res[policyResToGet] = pol_res_dict
        finally:
            self.lockObj.release()

        #get policy description
        desc = self._getPolicyDesc(policyResToGet)

        #get other info
        othersInfo = info.values()[0]
        if not isinstance(othersInfo, list):
            othersInfo = [othersInfo]

        info_res = {}

        for oi in othersInfo:
            format = oi.keys()[0]
            what = oi.values()[0]

            info_bit_got = self._getInfo(granularityForPanel, nameForPanel,
                                         format, what)

            info_res[format] = info_bit_got

        self.lockObj.acquire()
        try:
            self.infoForPanel_res[policyResToGet]['infos'] = info_res
            self.infoForPanel_res[policyResToGet]['desc'] = desc
        finally:
            self.lockObj.release()

#############################################################################

    def _getStatus(self, name, panel):

        #get RSS status
        RSSStatus = self._getInfoFromRSSDB(name, panel)[0][1]

        #get DIRAC status
        if panel in ('Site_Panel', 'SE_Panel'):

            if panel == 'Site_Panel':
                DIRACStatus = self.WMSAdmin.getSiteMaskLogging(name)
                if DIRACStatus['OK']:
                    DIRACStatus = DIRACStatus['Value'][name].pop()[0]
                else:
                    raise RSSException, where(self, self._getStatus)

            elif panel == 'SE_Panel':
                ra = getStorageElementStatus(name, 'ReadAccess')['Value']
                wa = getStorageElementStatus(name, 'WriteAccess')['Value']
                DIRACStatus = {'ReadAccess': ra, 'WriteAccess': wa}

            status = {
                name: {
                    'RSSStatus': RSSStatus,
                    'DIRACStatus': DIRACStatus
                }
            }

        else:
            status = {name: {'RSSStatus': RSSStatus}}

        return status

#############################################################################

    def _getInfo(self, granularity, name, format, what):

        if format == 'RSS':
            info_bit_got = self._getInfoFromRSSDB(name, what)
        else:
            if isinstance(what, dict):
                command = what['CommandIn']
                extraArgs = what['args']
            else:
                command = what
                extraArgs = None

            info_bit_got = self.cc.commandInvocation(granularity, name, None,
                                                     None, command, extraArgs)

            try:
                info_bit_got = info_bit_got['Result']
            except:
                pass

        return info_bit_got

#############################################################################

    def _getInfoFromRSSDB(self, name, what):

        paramsL = ['Status']

        siteName = None
        serviceName = None
        resourceName = None
        storageElementName = None
        serviceType = None
        gridSiteName = None

        if what == 'ServiceOfSite':
            gran = 'Service'
            paramsL.insert(0, 'ServiceName')
            paramsL.append('Reason')
            siteName = name
        elif what == 'ResOfCompService':
            gran = 'Resources'
            paramsL.insert(0, 'ResourceName')
            paramsL.append('Reason')
            serviceType = name.split('@')[0]
            gridSiteName = getGOCSiteName(name.split('@')[1])
            if not gridSiteName['OK']:
                raise RSSException, gridSiteName['Message']
            gridSiteName = gridSiteName['Value']
        elif what == 'ResOfStorService':
            gran = 'Resources'
            paramsL.insert(0, 'ResourceName')
            paramsL.append('Reason')
            serviceType = name.split('@')[0]
            gridSiteName = getGOCSiteName(name.split('@')[1])
            if not gridSiteName['OK']:
                raise RSSException, gridSiteName['Message']
            gridSiteName = gridSiteName['Value']
        elif what == 'ResOfStorEl':
            gran = 'StorageElements'
            paramsL.insert(0, 'ResourceName')
            paramsL.append('Reason')
            storageElementName = name
        elif what == 'StorageElementsOfSite':
            gran = 'StorageElements'
            paramsL.insert(0, 'StorageElementName')
            paramsL.append('Reason')
            if '@' in name:
                DIRACsiteName = name.split('@').pop()
            else:
                DIRACsiteName = name
            gridSiteName = getGOCSiteName(DIRACsiteName)
            if not gridSiteName['OK']:
                raise RSSException, gridSiteName['Message']
            gridSiteName = gridSiteName['Value']
        elif what == 'Site_Panel':
            gran = 'Site'
            paramsL.insert(0, 'SiteName')
            siteName = name
        elif what == 'Service_Computing_Panel':
            gran = 'Service'
            paramsL.insert(0, 'ServiceName')
            serviceName = name
        elif what == 'Service_Storage_Panel':
            gran = 'Service'
            paramsL.insert(0, 'ServiceName')
            serviceName = name
        elif what == 'Service_VO-BOX_Panel':
            gran = 'Services'
            paramsL.insert(0, 'ServiceName')
            serviceName = name
        elif what == 'Service_VOMS_Panel':
            gran = 'Services'
            paramsL.insert(0, 'ServiceName')
            serviceName = name
        elif what == 'Resource_Panel':
            gran = 'Resource'
            paramsL.insert(0, 'ResourceName')
            resourceName = name
        elif what == 'SE_Panel':
            gran = 'StorageElement'
            paramsL.insert(0, 'StorageElementName')
            storageElementName = name

        info_bit_got = self.rsDB.getMonitoredsList(
            gran,
            paramsList=paramsL,
            siteName=siteName,
            serviceName=serviceName,
            serviceType=serviceType,
            resourceName=resourceName,
            storageElementName=storageElementName,
            gridSiteName=gridSiteName)

        return info_bit_got

#############################################################################

    def _getPolicyDesc(self, policyName):

        return self.configModule.Policies[policyName]['Description']

#############################################################################

    def __getNameForPanel(self, granularity, name, panel):

        if granularity in ('Site', 'Sites'):
            if panel == 'Service_Computing_Panel':
                granularity = 'Service'
                name = 'Computing@' + name
            elif panel == 'Service_Storage_Panel':
                granularity = 'Service'
                name = 'Storage@' + name
            elif panel == 'OtherServices_Panel':
                granularity = 'Service'
                name = 'OtherS@' + name
            elif panel == 'Service_VOMS_Panel':
                granularity = 'Service'
                name = 'VOMS@' + name
            elif panel == 'Service_VO-BOX_Panel':
                granularity = 'Service'
                name = 'VO-BOX@' + name
#      else:
#        granularity = granularity
#        name = name
#    else:
#      granularity = granularity
#      name = name

        return (granularity, name)

#############################################################################

    def _resExist(self, granularity, name):

        siteName = None
        serviceName = None
        resourceName = None
        storageElementName = None

        if granularity in ('Site', 'Sites'):
            siteName = name
        elif granularity in ('Service', 'Services'):
            serviceName = name
        elif granularity in ('Resource', 'Resources'):
            resourceName = name
        elif granularity in ('StorageElement', 'StorageElements'):
            storageElementName = name

        res = self.rsDB.getMonitoredsList(
            granularity,
            siteName=siteName,
            serviceName=serviceName,
            resourceName=resourceName,
            storageElementName=storageElementName)

        if res == []:
            return False
        else:
            return True
コード例 #29
0
class StElWriteInspectorAgent( AgentModule ):
  """ Class StElWriteInspectorAgent is in charge of going through StorageElements
      table, and pass StorageElement and Status to the PEP
  """

#############################################################################

  def initialize( self ):
    """ Standard constructor
    """

    try:
      self.rsDB = ResourceStatusDB()
      self.rmDB = ResourceManagementDB()

      self.StorageElementToBeChecked = Queue.Queue()
      self.StorageElementInCheck     = []

      self.maxNumberOfThreads = self.am_getOption( 'maxThreadsInPool', 1 )
      self.threadPool         = ThreadPool( self.maxNumberOfThreads,
                                            self.maxNumberOfThreads )

      if not self.threadPool:
        self.log.error( 'Can not create Thread Pool' )
        return S_ERROR( 'Can not create Thread Pool' )

      self.setup                = getSetup()[ 'Value' ]
      self.VOExtension          = getExt()
      self.StorageElsWriteFreqs = CheckingFreqs[ 'StorageElsWriteFreqs' ]
      self.nc                   = NotificationClient()
      self.diracAdmin           = DiracAdmin()
      self.csAPI                = CSAPI()

      for _i in xrange( self.maxNumberOfThreads ):
        self.threadPool.generateJobAndQueueIt( self._executeCheck, args = ( None, ) )

      return S_OK()

    except Exception:
      errorStr = "StElWriteInspectorAgent initialization"
      gLogger.exception( errorStr )
      return S_ERROR( errorStr )

#############################################################################

  def execute( self ):
    """
    The main RSInspectorAgent execution method.
    Calls :meth:`DIRAC.ResourceStatusSystem.DB.ResourceStatusDB.getResourcesToCheck` and
    put result in self.StorageElementToBeChecked (a Queue) and in self.StorageElementInCheck (a list)
    """

    try:

      res = self.rsDB.getStuffToCheck( 'StorageElementsWrite', self.StorageElsWriteFreqs )

      for resourceTuple in res:
        if resourceTuple[ 0 ] in self.StorageElementInCheck:
          break
        resourceL = [ 'StorageElementWrite' ]
        for x in resourceTuple:
          resourceL.append( x )
        self.StorageElementInCheck.insert( 0, resourceL[ 1 ] )
        self.StorageElementToBeChecked.put( resourceL )

      return S_OK()

    except Exception, x:
      errorStr = where( self, self.execute )
      gLogger.exception( errorStr, lException = x )
      return S_ERROR( errorStr )
コード例 #30
0
class OutputDataExecutor:

  def __init__( self, csPath = "" ):
    self.log = gLogger.getSubLogger( "OutputDataExecutor" )
    if not csPath:
      vo = gConfig.getValue( "/DIRAC/VirtualOrganization", "" )
      self.__transfersCSPath = '/Operations/%s/OutputData' % vo
    else:
      self.__transfersCSPath = csPath
    self.log.verbose( "Reading transfer paths from %s" % self.__transfersCSPath )
    self.__requiredCSOptions = ['InputPath', 'InputFC', 'OutputPath', 'OutputFC', 'OutputSE']

    self.__threadPool = ThreadPool( gConfig.getValue( "%s/MinTransfers" % self.__transfersCSPath, 1 ),
                                    gConfig.getValue( "%s/MaxTransfers" % self.__transfersCSPath, 4 ),
                                    gConfig.getValue( "%s/MaxQueuedTransfers" % self.__transfersCSPath, 100 ) )
    self.__threadPool.daemonize()
    self.__processingFiles = set()
    self.__okTransferredFiles = 0
    self.__okTransferredBytes = 0
    self.__failedFiles = {}

  def getNumOKTransferredFiles( self ):
    return self.__okTransferredFiles

  def getNumOKTransferredBytes( self ):
    return self.__okTransferredBytes

  def transfersPending( self ):
    return self.__threadPool.isWorking()

  def getDefinedTransferPaths( self ):
    result = gConfig.getSections( self.__transfersCSPath )
    if not result['OK']:
      self.log.info( 'No Input/Output Pair defined in CS' )
      return S_OK()

    pathList = result['Value']

    tPaths = {}
    for name in pathList:
      csPath = self.__transfersCSPath + '/%s' % name
      result = gConfig.getOptionsDict( csPath )
      if not result['OK']:
        continue
      transferDict = result['Value']
      ok = True
      for i in self.__requiredCSOptions:
        if i not in transferDict:
          self.log.error( 'Missing Option %s in %s' % ( i, csPath ) )
          ok = False
          break
      if not ok:
        continue
      tPaths[ name ] = transferDict

    return S_OK( tPaths )

  def getNumLocalOutgoingFiles( self ):
    result = self.getDefinedTransferPaths()
    if not result[ 'OK' ]:
      return 0
    localOutgoing = 0
    tPaths = result[ 'Value' ]
    for name in tPaths:
      transferDict = tPaths[ name ]
      if 'LocalDisk' != transferDict['InputFC']:
        continue
      localOutgoing += len( self.getOutgoingFiles( transferDict ) )
    return localOutgoing

  def getOutgoingFiles( self, transferDict ):
    """
    Get list of files to be processed from InputPath
    """
    inputFCName = transferDict['InputFC']
    inputPath = transferDict['InputPath']

    if inputFCName == 'LocalDisk':
      files = []
      try:
        for fileName in os.listdir( inputPath ):
          if os.path.isfile( os.path.join( inputPath, fileName ) ):
            files.append( fileName )
      except:
        pass
      return files

    inputFC = FileCatalog( [inputFCName] )
    result = inputFC.listDirectory( inputPath, True )

    if not result['OK']:
      self.log.error( result['Message'] )
      return []
    if not inputPath in result['Value']['Successful']:
      self.log.error( result['Value']['Failed'][inputPath] )
      return []

    subDirs = result['Value']['Successful'][inputPath]['SubDirs']
    files = result['Value']['Successful'][inputPath]['Files']
    for subDir in subDirs:
      self.log.info( 'Ignoring subdirectory:', subDir )
    return files.keys()

  def checkForTransfers( self ):
    """
    Check for transfers to do and start them
    """
    result = self.getDefinedTransferPaths()
    if not result[ 'OK' ]:
      return result
    tPaths = result[ 'Value' ]
    for name in tPaths:
      transferPath = tPaths[ name ]
      self.log.verbose( "Checking %s transfer path" % name )
      filesToTransfer = self.getOutgoingFiles( tPaths[ name ] )
      self.log.info( "Transfer path %s has %d files" % ( name, len( filesToTransfer ) ) )
      ret = self.__addFilesToThreadPool( filesToTransfer, transferPath )
      if not ret['OK']:
        # The thread pool got full 
        break

  def processAllPendingTransfers( self ):
    self.__threadPool.processAllResults()

  @transferSync
  def __addFilesToThreadPool( self, files, transferDict ):
    for fileName in files:
      fileName = os.path.basename( fileName )
      if fileName in self.__processingFiles:
        continue
      self.__processingFiles.add( fileName )
      time.sleep( 1 )
      ret = self.__threadPool.generateJobAndQueueIt( self.__transferIfNotRegistered,
                                            args = ( fileName, transferDict ),
                                            oCallback = self.transferCallback,
                                            blocking = False )
      if not ret['OK']:
        # The thread pool got full 
        return ret
    return S_OK()

  def __transferIfNotRegistered( self, file, transferDict ):
    result = self.isRegisteredInOutputCatalog( file, transferDict )
    if not result[ 'OK' ]:
      self.log.error( result[ 'Message' ] )
      return result
    #Already registered. Need to delete
    if result[ 'Value' ]:
      self.log.info( "Transfer file %s is already registered in the output catalog" % file )
      #Delete
      filePath = os.path.join( transferDict[ 'InputPath' ], file )
      if transferDict[ 'InputFC' ] == 'LocalDisk':
        os.unlink( filePath )
      #FIXME: what is inFile supposed to be ??
      else:
        inputFC = FileCatalog( [ transferDict['InputFC'] ] )
        replicaDict = inputFC.getReplicas( filePath )
        if not replicaDict['OK']:
          self.log.error( "Error deleting file", replicaDict['Message'] )
        elif not inFile in replicaDict['Value']['Successful']:
          self.log.error( "Error deleting file", replicaDict['Value']['Failed'][inFile] )
        else:
          seList = replicaDict['Value']['Successful'][inFile].keys()
          for se in seList:
            se = StorageElement( se )
            self.log.info( 'Removing from %s:' % se.name, inFile )
            se.removeFile( inFile )
          inputFC.removeFile( file )
      self.log.info( "File %s deleted from %s" % ( file, transferDict[ 'InputFC' ] ) )
      self.__processingFiles.discard( file )
      return S_OK( file )
    #Do the transfer
    return self.__retrieveAndUploadFile( file, transferDict )

  def isRegisteredInOutputCatalog( self, file, transferDict ):
    fc = FileCatalog( [ transferDict[ 'OutputFC' ] ] )
    lfn = os.path.join( transferDict['OutputPath'], os.path.basename( file ) )
    result = fc.getReplicas( lfn )
    if not result[ 'OK' ]:
      return result
    if lfn not in result[ 'Value' ][ 'Successful' ]:
      return S_OK( False )
    replicas = result[ 'Value' ][ 'Successful' ][ lfn ]
    for seName in List.fromChar( transferDict[ 'OutputSE' ], "," ):
      if seName in replicas:
        self.log.verbose( "Transfer file %s is already registered in %s SE" % ( file, seName ) )
        return S_OK( True )
    return S_OK( False )

  def __retrieveAndUploadFile( self, file, outputDict ):
    """
    Retrieve, Upload, and remove
    """
    fileName = file
    inputPath = outputDict['InputPath']
    inputFCName = outputDict['InputFC']
    inBytes = 0
    if inputFCName == 'LocalDisk':
      inFile = file
      file = os.path.join( inputPath, file )
    else:
      inputFC = FileCatalog( [inputFCName] )

      inFile = os.path.join( inputPath, file )
      replicaDict = inputFC.getReplicas( inFile )
      if not replicaDict['OK']:
        self.log.error( replicaDict['Message'] )
        return S_ERROR( fileName )
      if not inFile in replicaDict['Value']['Successful']:
        self.log.error( replicaDict['Value']['Failed'][inFile] )
        return S_ERROR( fileName )
      seList = replicaDict['Value']['Successful'][inFile].keys()

      inputSE = StorageElement( seList[0] )
      self.log.info( 'Retrieving from %s:' % inputSE.name, inFile )
      # ret = inputSE.getFile( inFile )
      # lcg_util binding prevent multithreading, use subprocess instead
      res = pythonCall( 2 * 3600, inputSE.getFile, inFile )
      if not res['OK']:
        self.log.error( res['Message'] )
        return S_ERROR( fileName )
      ret = res['Value']
      if not ret['OK']:
        self.log.error( ret['Message'] )
        return S_ERROR( fileName )
      if not inFile in ret['Value']['Successful']:
        self.log.error( ret['Value']['Failed'][inFile] )
        return S_ERROR( fileName )

    if os.path.isfile( file ):
      inBytes = os.stat( file )[6]

    outputPath = outputDict['OutputPath']
    outputFCName = outputDict['OutputFC']
    replicaManager = ReplicaManager()
    outFile = os.path.join( outputPath, os.path.basename( file ) )
    transferOK = False
    for outputSEName in List.fromChar( outputDict['OutputSE'], "," ):
      outputSE = StorageElement( outputSEName )
      self.log.info( 'Trying to upload to %s:' % outputSE.name, outFile )
      # ret = replicaManager.putAndRegister( outFile, os.path.realpath( file ), outputSE.name, catalog=outputFCName )
      # lcg_util binding prevent multithreading, use subprocess instead
      result = pythonCall( 2 * 3600, replicaManager.putAndRegister, outFile, os.path.realpath( file ), outputSE.name, catalog = outputFCName )
      if result['OK'] and result['Value']['OK']:
        if outFile in result['Value']['Value']['Successful']:
          transferOK = True
          break
        else:
          self.log.error( result['Value']['Value']['Failed'][outFile] )
      else:
        if result['OK']:
          self.log.error( result['Value']['Message'] )
        else:
          self.log.error( result['Message'] )

    if not transferOK:
      return S_ERROR( fileName )

    if result['OK'] or not inputFCName == 'LocalDisk':
      os.unlink( file )

    if not result['OK']:
      self.log.error( ret['Message'] )
      return S_ERROR( fileName )

    self.log.info( "Finished transferring %s [%s bytes]" % ( inFile, inBytes ) )
    self.__okTransferredFiles += 1
    self.__okTransferredBytes += inBytes

    if inputFCName == 'LocalDisk':
      return S_OK( fileName )

    # Now the file is on final SE/FC, remove from input SE/FC
    for se in seList:
      se = StorageElement( se )
      self.log.info( 'Removing from %s:' % se.name, inFile )
      se.removeFile( inFile )

    inputFC.removeFile( inFile )

    return S_OK( fileName )

  @transferSync
  def transferCallback( self, threadedJob, submitResult ):
    if not submitResult['OK']:
      fileName = submitResult['Message']
      if fileName not in self.__failedFiles:
        self.__failedFiles[fileName] = 0
      self.__failedFiles[fileName] += 1
    else:
      fileName = submitResult['Value']
      if fileName in self.__failedFiles:
        del self.__failedFiles[fileName]
    #Take out from processing files
    if fileName in self.__processingFiles:
      self.__processingFiles.discard( fileName )
コード例 #31
0
class ElementInspectorAgent( AgentModule ):
  """ ElementInspectorAgent
  
  The ElementInspector agent is a generic agent used to check the elements
  of one of the elementTypes ( e.g. Site, Resource, Node ).

  This Agent takes care of the Elements. In order to do so, it gathers
  the eligible ones and then evaluates their statuses with the PEP.
  
  """

  # Max number of worker threads by default
  __maxNumberOfThreads = 15
  
  # Inspection freqs, defaults, the lower, the higher priority to be checked.
  # Error state usually means there is a glitch somewhere, so it has the highest
  # priority.
  __checkingFreqs = { 
                     'Active'   : 20, 
                     'Degraded' : 20,  
                     'Probing'  : 20, 
                     'Banned'   : 15, 
                     'Unknown'  : 10,  
                     'Error'    : 5
                     }
  
  
  def __init__( self, *args, **kwargs ):
    """ c'tor
    """
    
    AgentModule.__init__( self, *args, **kwargs )

    # ElementType, to be defined among Site, Resource or Node
    self.elementType         = ''
    self.elementsToBeChecked = None
    self.threadPool          = None
    self.rsClient            = None
    self.clients             = {}


  def initialize( self ):
    """ Standard initialize.
    """

    maxNumberOfThreads = self.am_getOption( 'maxNumberOfThreads', self.__maxNumberOfThreads )
    self.threadPool    = ThreadPool( maxNumberOfThreads, maxNumberOfThreads )
       
    self.elementType = self.am_getOption( 'elementType', self.elementType )   
    self.rsClient    = ResourceStatusClient()

    self.clients[ 'ResourceStatusClient' ]     = self.rsClient
    self.clients[ 'ResourceManagementClient' ] = ResourceManagementClient() 

    if not self.elementType:
      return S_ERROR( 'Missing elementType' )

    return S_OK()
  
  def execute( self ):
    """ execute
    
    This is the main method of the agent. It gets the elements from the Database
    which are eligible to be re-checked, calculates how many threads should be
    started and spawns them. Each thread will get an element from the queue until
    it is empty. At the end, the method will join the queue such that the agent
    will not terminate a cycle until all elements have been processed.
    
    """
    
    # Gets elements to be checked ( returns a Queue ) 
    elementsToBeChecked = self.getElementsToBeChecked()
    if not elementsToBeChecked[ 'OK' ]:
      self.log.error( elementsToBeChecked[ 'Message' ] )
      return elementsToBeChecked
    self.elementsToBeChecked = elementsToBeChecked[ 'Value' ]
       
    queueSize   = self.elementsToBeChecked.qsize()
    pollingTime = self.am_getPollingTime()
    
    # Assigns number of threads on the fly such that we exhaust the PollingTime
    # without having to spawn too many threads. We assume 10 seconds per element
    # to be processed ( actually, it takes something like 1 sec per element ):
    # numberOfThreads = elements * 10(s/element) / pollingTime
    numberOfThreads = int( math.ceil( queueSize * 10. / pollingTime ) )
            
    self.log.info( 'Needed %d threads to process %d elements' % ( numberOfThreads, queueSize ) )
    
    for _x in xrange( numberOfThreads ):
      jobUp = self.threadPool.generateJobAndQueueIt( self._execute )
      if not jobUp[ 'OK' ]:
        self.log.error( jobUp[ 'Message' ] )
        
    self.log.info( 'blocking until all elements have been processed' )
    # block until all tasks are done
    self.elementsToBeChecked.join()
    self.log.info( 'done')  
    
    return S_OK()


  def getElementsToBeChecked( self ):
    """ getElementsToBeChecked
    
    This method gets all the rows in the <self.elementType>Status table, and then
    discards entries with TokenOwner != rs_svc. On top of that, there are check
    frequencies that are applied: depending on the current status of the element,
    they will be checked more or less often.
    
    """
    
    toBeChecked = Queue.Queue()
    
    # We get all the elements, then we filter.
    elements = self.rsClient.selectStatusElement( self.elementType, 'Status' )
    if not elements[ 'OK' ]:
      return elements
      
    utcnow = datetime.datetime.utcnow().replace( microsecond = 0 )  
       
    # filter elements by Type
    for element in elements[ 'Value' ]:
      
      # Maybe an overkill, but this way I have NEVER again to worry about order
      # of elements returned by mySQL on tuples
      elemDict = dict( zip( elements[ 'Columns' ], element ) )
      
      # This if-clause skips all the elements that are should not be checked yet
      timeToNextCheck = self.__checkingFreqs[ elemDict[ 'Status' ] ]
      if utcnow <= elemDict[ 'LastCheckTime' ] + datetime.timedelta( minutes = timeToNextCheck ):
        continue
      
      # We skip the elements with token different than "rs_svc"
      if elemDict[ 'TokenOwner' ] != 'rs_svc':
        self.log.verbose( 'Skipping %s ( %s ) with token %s' % ( elemDict[ 'Name' ],
                                                                 elemDict[ 'StatusType' ],
                                                                 elemDict[ 'TokenOwner' ]
                                                               ))
        continue
              
      # We are not checking if the item is already on the queue or not. It may
      # be there, but in any case, it is not a big problem.
        
      lowerElementDict = { 'element' : self.elementType }
      for key, value in elemDict.items():
        lowerElementDict[ key[0].lower() + key[1:] ] = value
        
      # We add lowerElementDict to the queue
      toBeChecked.put( lowerElementDict )
      self.log.verbose( '%s # "%s" # "%s" # %s # %s' % ( elemDict[ 'Name' ], 
                                                         elemDict[ 'ElementType' ],
                                                         elemDict[ 'StatusType' ],
                                                         elemDict[ 'Status' ],
                                                         elemDict[ 'LastCheckTime' ]) )
    return S_OK( toBeChecked )
    
        
  # Private methods ............................................................        
        
  def _execute( self ):
    """
      Method run by the thread pool. It enters a loop until there are no elements
      on the queue. On each iteration, it evaluates the policies for such element
      and enforces the necessary actions. If there are no more elements in the
      queue, the loop is finished.
    """

    pep = PEP( clients = self.clients )
    
    while True:
    
      try:
        element = self.elementsToBeChecked.get_nowait()
      except Queue.Empty:
        return S_OK()
      
      self.log.verbose( '%s ( %s / %s ) being processed' % ( element[ 'name' ], 
                                                             element[ 'status' ],
                                                             element[ 'statusType' ] ) )
      
      resEnforce = pep.enforce( element )
      if not resEnforce[ 'OK' ]:
        self.log.error( 'Failed policy enforcement', resEnforce[ 'Message' ] )
        self.elementsToBeChecked.task_done()
        continue
      
      resEnforce = resEnforce[ 'Value' ]  
      
      oldStatus  = resEnforce[ 'decissionParams' ][ 'status' ]
      statusType = resEnforce[ 'decissionParams' ][ 'statusType' ]
      newStatus  = resEnforce[ 'policyCombinedResult' ][ 'Status' ]
      reason     = resEnforce[ 'policyCombinedResult' ][ 'Reason' ]
      
      if oldStatus != newStatus:
        self.log.info( '%s (%s) is now %s ( %s ), before %s' % ( element[ 'name' ], 
                                                                 statusType,
                                                                 newStatus, 
                                                                 reason, 
                                                                 oldStatus ) )
        
      # Used together with join !
      self.elementsToBeChecked.task_done()   
コード例 #32
0
class ElementInspectorAgent( AgentModule ):
  '''
    The ElementInspector agent is a generic agent used to check the elements
    of one of the elementTypes ( e.g. Site, Resource, Node ).

    This Agent takes care of the Elements. In order to do so, it gathers
    the eligible ones and then evaluates their statuses with the PEP.
  '''

  # Max number of worker threads by default
  __maxNumberOfThreads = 5
  # ElementType, to be defined among Site, Resource or Node
  __elementType = None
  # Inspection freqs, defaults, the lower, the higher priority to be checked.
  # Error state usually means there is a glitch somewhere, so it has the highest
  # priority.
  __checkingFreqs = { 'Default' : 
                       { 
                         'Active' : 60, 'Degraded' : 30,  'Probing' : 30, 
                         'Banned' : 30, 'Unknown'  : 15,  'Error'   : 15 
                         } 
                     }
  # queue size limit to stop feeding
  __limitQueueFeeder = 15
  
  def __init__( self, *args, **kwargs ):
    ''' c'tor
    '''
    
    AgentModule.__init__( self, *args, **kwargs )

    # members initialization

    self.maxNumberOfThreads = self.__maxNumberOfThreads
    self.elementType        = self.__elementType
    self.checkingFreqs      = self.__checkingFreqs
    self.limitQueueFeeder   = self.__limitQueueFeeder
    
    self.elementsToBeChecked = None
    self.threadPool          = None
    self.rsClient            = None
    self.clients             = {}

  def initialize( self ):
    ''' Standard initialize.
        Uses the ProductionManager shifterProxy to modify the ResourceStatus DB
    '''

    self.maxNumberOfThreads = self.am_getOption( 'maxNumberOfThreads', self.maxNumberOfThreads )   
    self.elementType        = self.am_getOption( 'elementType',        self.elementType )
    self.checkingFreqs      = self.am_getOption( 'checkingFreqs',      self.checkingFreqs )
    self.limitQueueFeeder   = self.am_getOption( 'limitQueueFeeder',   self.limitQueueFeeder )      
    
    self.elementsToBeChecked = Queue.Queue()
    self.threadPool          = ThreadPool( self.maxNumberOfThreads,
                                           self.maxNumberOfThreads )

    self.rsClient = ResourceStatusClient()

    self.clients[ 'ResourceStatusClient' ]     = self.rsClient
    self.clients[ 'ResourceManagementClient' ] = ResourceManagementClient() 

    return S_OK()
  
  def execute( self ):
    
    # If there are elements in the queue to be processed, we wait ( we know how
    # many elements in total we can have, so if there are more than 15% of them
    # on the queue, we do not add anything ), but the threads are running and
    # processing items from the queue on background.    
    
    qsize = self.elementsToBeChecked.qsize() 
    if qsize > self.limitQueueFeeder:
      self.log.warn( 'Queue not empty ( %s > %s ), skipping feeding loop' % ( qsize, self.limitQueueFeeder ) )
      return S_OK()
    
    # We get all the elements, then we filter.
    elements = self.rsClient.selectStatusElement( self.elementType, 'Status' )
    if not elements[ 'OK' ]:
      self.log.error( elements[ 'Message' ] )
      return elements
      
    utcnow = datetime.datetime.utcnow().replace( microsecond = 0 )  
       
    # filter elements by Type
    for element in elements[ 'Value' ]:
      
      # Maybe an overkill, but this way I have NEVER again to worry about order
      # of elements returned by mySQL on tuples
      elemDict = dict( zip( elements[ 'Columns' ], element ) )
      
      # We skip the elements with token different than "rs_svc"
      if elemDict[ 'TokenOwner' ] != 'rs_svc':
        self.log.info( 'Skipping %s ( %s ) with token %s' % ( elemDict[ 'Name' ],
                                                              elemDict[ 'StatusType' ],
                                                              elemDict[ 'TokenOwner' ]
                                                             ))
        continue
      
      if not elemDict[ 'ElementType' ] in self.checkingFreqs:
        #self.log.warn( '"%s" not in inspectionFreqs, getting default' % elemDict[ 'ElementType' ] )
        timeToNextCheck = self.checkingFreqs[ 'Default' ][ elemDict[ 'Status' ] ]
      else:
        timeToNextCheck = self.checkingFreqs[ elemDict[ 'ElementType' ] ][ elemDict[ 'Status' ] ]
              
      if utcnow - datetime.timedelta( minutes = timeToNextCheck ) > elemDict[ 'LastCheckTime' ]:
               
        # We are not checking if the item is already on the queue or not. It may
        # be there, but in any case, it is not a big problem.
        
        lowerElementDict = { 'element' : self.elementType }
        for key, value in elemDict.items():
          lowerElementDict[ key[0].lower() + key[1:] ] = value
        
        # We add lowerElementDict to the queue
        self.elementsToBeChecked.put( lowerElementDict )
        self.log.verbose( '%s # "%s" # "%s" # %s # %s' % ( elemDict[ 'Name' ], 
                                                           elemDict[ 'ElementType' ],
                                                           elemDict[ 'StatusType' ],
                                                           elemDict[ 'Status' ],
                                                           elemDict[ 'LastCheckTime' ]) )
       
    # Measure size of the queue, more or less, to know how many threads should
    # we start !
    queueSize      = self.elementsToBeChecked.qsize()
    # 30, could have been other number.. but it works reasonably well. ( +1 to get ceil )
    threadsToStart = max( min( self.maxNumberOfThreads, ( queueSize / 30 ) + 1 ), 1 ) 
    threadsRunning = self.threadPool.numWorkingThreads()
    
    self.log.info( 'Needed %d threads to process %d elements' % ( threadsToStart, queueSize ) )
    if threadsRunning:
      self.log.info( 'Already %d threads running' % threadsRunning )
      threadsToStart = max( 0, threadsToStart - threadsRunning )
      self.log.info( 'Starting %d threads to process %d elements' % ( threadsToStart, queueSize ) )
    
    # It may happen that we start two threads, 0 and 1. 1 goes DOWN, but 0 keeps 
    # running. In next loop we will start a new thread, and will be called 0 
    # again. To have a mechanism to see which thread is where, we append the
    # cycle number before the threadId.
    cycle = self._AgentModule__moduleProperties[ 'cyclesDone' ]
    
    for _x in xrange( threadsToStart ):
      threadId = '%s_%s' % ( cycle, _x )
      jobUp = self.threadPool.generateJobAndQueueIt( self._execute, args = ( threadId, ) )
      if not jobUp[ 'OK' ]:
        self.log.error( jobUp[ 'Message' ] )
        
    return S_OK()

  def finalize( self ):
    
    self.log.info( 'draining queue... blocking until empty' )
    # block until all tasks are done
    self.elementsToBeChecked.join()  
    
    return S_OK()
        
## Private methods #############################################################        
        
  def _execute( self, threadNumber ):
    '''
      Method run by the thread pool. It enters a loop until there are no elements
      on the queue. On each iteration, it evaluates the policies for such element
      and enforces the necessary actions. If there are no more elements in the
      queue, the loop is finished.
    '''

    tHeader = '%sJob%s' % ( '* '*30, threadNumber )
    
    self.log.info( '%s UP' % tHeader )
    
    pep = PEP( clients = self.clients )
    
    while True:
    
      try:
        element = self.elementsToBeChecked.get_nowait()
      except Queue.Empty:
        self.log.info( '%s DOWN' % tHeader )
        return S_OK()
      
      self.log.info( '%s ( %s / %s ) being processed' % ( element[ 'name' ], 
                                                          element[ 'status' ],
                                                          element[ 'statusType' ] ) )
      
      resEnforce = pep.enforce( element )
      if not resEnforce[ 'OK' ]:
        self.log.error( resEnforce[ 'Message' ] )
        self.elementsToBeChecked.task_done()
        continue
      
      resEnforce = resEnforce[ 'Value' ]  
      
      oldStatus  = resEnforce[ 'decissionParams' ][ 'status' ]
      statusType = resEnforce[ 'decissionParams' ][ 'statusType' ]
      newStatus  = resEnforce[ 'policyCombinedResult' ][ 'Status' ]
      reason     = resEnforce[ 'policyCombinedResult' ][ 'Reason' ]
      
      if oldStatus != newStatus:
        self.log.info( '%s (%s) is now %s ( %s ), before %s' % ( element[ 'name' ], 
                                                                 statusType,
                                                                 newStatus, 
                                                                 reason, 
                                                                 oldStatus ) )
        
      # Used together with join !
      self.elementsToBeChecked.task_done()   

    self.log.info( '%s DOWN' % tHeader )

    return S_OK()
コード例 #33
0
class SystemAdministratorIntegrator(object):
    def __init__(self, **kwargs):
        """ Constructor
    """
        if 'hosts' in kwargs:
            self.__hosts = kwargs['hosts']
            del kwargs['hosts']
        else:
            result = Registry.getHosts()
            if result['OK']:
                self.__hosts = result['Value']
            else:
                self.__hosts = []
            # Excluded hosts
            if 'exclude' in kwargs:
                self.__hosts = list(set(self.__hosts) - set(kwargs['exclude']))

        # Ping the hosts to remove those that don't have a SystemAdministrator service
        sysAdminHosts = []
        self.silentHosts = []
        self.__resultDict = {}
        self.__kwargs = {}
        pool = ThreadPool(len(self.__hosts))
        for host in self.__hosts:
            pool.generateJobAndQueueIt(self.__executeClient,
                                       args=[host, "ping"],
                                       kwargs={},
                                       oCallback=self.__processResult)

        pool.processAllResults()
        for host, result in self.__resultDict.items():
            if result['OK']:
                sysAdminHosts.append(host)
            else:
                self.silentHosts.append(host)
        del pool

        self.__hosts = sysAdminHosts

        self.__kwargs = dict(kwargs)
        self.__pool = ThreadPool(len(self.__hosts))
        self.__resultDict = {}

    def getSilentHosts(self):
        """ Get a list of non-responding hosts

    :return: list of hosts
    """
        return self.silentHosts

    def getRespondingHosts(self):
        """ Get a list of responding hosts

    :return: list of hosts
    """
        return self.__hosts

    def __getattr__(self, name):
        self.call = name
        return self.execute

    def __executeClient(self, host, method, *parms, **kwargs):
        """ Execute RPC method on a given host
    """
        hostName = Registry.getHostOption(host, 'Host', host)
        client = SystemAdministratorClient(hostName, **self.__kwargs)
        result = getattr(client, method)(*parms, **kwargs)
        result['Host'] = host
        return result

    def __processResult(self, id_, result):
        """ Collect results in the final structure
    """
        host = result['Host']
        del result['Host']
        self.__resultDict[host] = result

    def execute(self, *args, **kwargs):
        """ Main execution method
    """
        self.__resultDict = {}
        for host in self.__hosts:
            self.__pool.generateJobAndQueueIt(self.__executeClient,
                                              args=[host, self.call] +
                                              list(args),
                                              kwargs=kwargs,
                                              oCallback=self.__processResult)

        self.__pool.processAllResults()
        return S_OK(self.__resultDict)
コード例 #34
0
class SystemAdministratorIntegrator( object ):

  def __init__( self, **kwargs ):
    """ Constructor  
    """
    if 'hosts' in kwargs:
      self.__hosts = kwargs['hosts']
      del kwargs['hosts']
    else:  
      result = Registry.getHosts()
      if result['OK']:
        self.__hosts = result['Value']
      else:
        self.__hosts = []
      # Excluded hosts
      if 'exclude' in kwargs:
        self.__hosts = list ( set( self.__hosts ) - set( kwargs[ 'exclude' ] ) )

    # Ping the hosts to remove those that don't have a SystemAdministrator service
    sysAdminHosts = []
    self.silentHosts = []
    self.__resultDict = {}
    self.__kwargs = {}
    pool = ThreadPool( len( self.__hosts ) )
    for host in self.__hosts:
      pool.generateJobAndQueueIt( self.__executeClient,
                                         args = [ host, "ping" ],
                                         kwargs = {},
                                         oCallback = self.__processResult )

    pool.processAllResults()
    for host, result in self.__resultDict.items():
      if result['OK']:
        sysAdminHosts.append( host )
      else:
        self.silentHosts.append( host )
    del pool

    self.__hosts = sysAdminHosts
      
    self.__kwargs = dict( kwargs )  
    self.__pool = ThreadPool( len( self.__hosts ) )  
    self.__resultDict = {}

  def getSilentHosts( self ):
    """ Get a list of non-responding hosts
    :return: list of hosts
    """
    return self.silentHosts

  def getRespondingHosts( self ):
    """ Get a list of responding hosts
    :return: list of hosts
    """
    return self.__hosts

  def __getattr__( self, name ):
    self.call = name
    return self.execute

  def __executeClient( self, host, method, *parms, **kwargs ):
    """ Execute RPC method on a given host 
    """        
    hostName = Registry.getHostOption( host, 'Host', host)
    client = SystemAdministratorClient( hostName, **self.__kwargs )
    result = getattr( client, method )( *parms, **kwargs )
    result['Host'] = host   
    return result
    
  def __processResult( self, id_, result ):
    """ Collect results in the final structure
    """
    host = result['Host']
    del result['Host']
    self.__resultDict[host] = result  
       
  def execute(self, *args, **kwargs ):
    """ Main execution method
    """
    self.__resultDict = {}
    for host in self.__hosts:
      self.__pool.generateJobAndQueueIt( self.__executeClient,
                                         args = [ host, self.call ] + list(args),
                                         kwargs = kwargs,
                                         oCallback = self.__processResult )
    
    self.__pool.processAllResults()
    return S_OK( self.__resultDict )
コード例 #35
0
ファイル: SiteInspectorAgent.py プロジェクト: thom991/DIRAC
class SiteInspectorAgent(AgentModule):
    """ SiteInspectorAgent

  The SiteInspectorAgent agent is an agent that is used to get the all the site names
  and trigger PEP to evaluate their status.

  """

    # Max number of worker threads by default
    __maxNumberOfThreads = 15

    # Inspection freqs, defaults, the lower, the higher priority to be checked.
    # Error state usually means there is a glitch somewhere, so it has the highest
    # priority.
    __checkingFreqs = {
        'Active': 20,
        'Degraded': 20,
        'Probing': 20,
        'Banned': 15,
        'Unknown': 10,
        'Error': 5
    }

    def __init__(self, *args, **kwargs):

        AgentModule.__init__(self, *args, **kwargs)

        # ElementType, to be defined among Site, Resource or Node
        self.sitesToBeChecked = None
        self.threadPool = None
        self.siteClient = None
        self.clients = {}

    def initialize(self):
        """ Standard initialize.
    """

        maxNumberOfThreads = self.am_getOption('maxNumberOfThreads',
                                               self.__maxNumberOfThreads)
        self.threadPool = ThreadPool(maxNumberOfThreads, maxNumberOfThreads)

        self.siteClient = SiteStatus()

        self.clients['SiteStatus'] = self.siteClient
        self.clients['ResourceManagementClient'] = ResourceManagementClient()

        return S_OK()

    def execute(self):
        """ execute

    This is the main method of the agent. It gets the sites from the Database, calculates how many threads should be
    started and spawns them. Each thread will get a site from the queue until
    it is empty. At the end, the method will join the queue such that the agent
    will not terminate a cycle until all sites have been processed.

    """

        # Gets sites to be checked ( returns a Queue )
        sitesToBeChecked = self.getSitesToBeChecked()
        if not sitesToBeChecked['OK']:
            self.log.error(sitesToBeChecked['Message'])
            return sitesToBeChecked
        self.sitesToBeChecked = sitesToBeChecked['Value']

        queueSize = self.sitesToBeChecked.qsize()
        pollingTime = self.am_getPollingTime()

        # Assigns number of threads on the fly such that we exhaust the PollingTime
        # without having to spawn too many threads. We assume 10 seconds per element
        # to be processed ( actually, it takes something like 1 sec per element ):
        # numberOfThreads = elements * 10(s/element) / pollingTime
        numberOfThreads = int(math.ceil(queueSize * 10. / pollingTime))

        self.log.info('Needed %d threads to process %d elements' %
                      (numberOfThreads, queueSize))

        for _x in xrange(numberOfThreads):
            jobUp = self.threadPool.generateJobAndQueueIt(self._execute)
            if not jobUp['OK']:
                self.log.error(jobUp['Message'])

        self.log.info('blocking until all sites have been processed')
        # block until all tasks are done
        self.sitesToBeChecked.join()
        self.log.info('done')

        return S_OK()

    def getSitesToBeChecked(self):
        """ getElementsToBeChecked

    This method gets all the site names from the SiteStatus table, after that it get the details of each
    site (status, name, etc..) and adds them to a queue.

    """

        toBeChecked = Queue.Queue()

        res = self.siteClient.getSites('All')
        if not res['OK']:
            return res

        # get the current status
        res = self.siteClient.getSiteStatuses(res['Value'])
        if not res['OK']:
            return res

        # filter elements
        for site in res['Value']:
            status = res['Value'].get(site, 'Unknown')

            toBeChecked.put({
                'status': status,
                'name': site,
                'site': site,
                'element': 'Site',
                'statusType': 'all',
                'elementType': 'Site'
            })

        return S_OK(toBeChecked)

    # Private methods ............................................................

    def _execute(self):
        """
      Method run by each of the thread that is in the ThreadPool.
      It enters a loop until there are no sites on the queue.

      On each iteration, it evaluates the policies for such site
      and enforces the necessary actions. If there are no more sites in the
      queue, the loop is finished.
    """

        pep = PEP(clients=self.clients)

        while True:

            try:
                site = self.sitesToBeChecked.get_nowait()
            except Queue.Empty:
                return S_OK()

            resEnforce = pep.enforce(site)
            if not resEnforce['OK']:
                self.log.error('Failed policy enforcement',
                               resEnforce['Message'])
                self.sitesToBeChecked.task_done()
                continue

            # Used together with join !
            self.sitesToBeChecked.task_done()
コード例 #36
0
class SSInspectorAgent(AgentModule):
    '''
    The SSInspector agent ( SiteInspectorAgent ) is one of the four
    InspectorAgents of the RSS.

    This Agent takes care of the Sites. In order to do so, it gathers
    the eligible ones and then evaluates their statuses with the PEP.

    If you want to know more about the SSInspectorAgent, scroll down to the
    end of the file.
  '''

    # Too many public methods
    # pylint: disable-msg=R0904

    def initialize(self):

        # Attribute defined outside __init__
        # pylint: disable-msg=W0201

        try:
            self.rsClient = ResourceStatusClient()
            self.sitesFreqs = CS.getTypedDictRootedAtOperations(
                'CheckingFreqs/SitesFreqs')
            self.sitesToBeChecked = Queue.Queue()
            self.siteNamesInCheck = []

            self.maxNumberOfThreads = self.am_getOption('maxThreadsInPool', 1)
            self.threadPool = ThreadPool(self.maxNumberOfThreads,
                                         self.maxNumberOfThreads)
            if not self.threadPool:
                self.log.error('Can not create Thread Pool')
                return S_ERROR('Can not create Thread Pool')

            for _i in xrange(self.maxNumberOfThreads):
                self.threadPool.generateJobAndQueueIt(self._executeCheck,
                                                      args=(None, ))

            return S_OK()

        except Exception:
            errorStr = "SSInspectorAgent initialization"
            self.log.exception(errorStr)
            return S_ERROR(errorStr)

    def execute(self):

        try:

            kwargs = {'meta': {}}
            kwargs['meta']['columns'] = [
                'SiteName', 'StatusType', 'Status', 'FormerStatus', 'SiteType',
                'TokenOwner'
            ]
            kwargs['tokenOwner'] = 'RS_SVC'

            resQuery = self.rsClient.getStuffToCheck('Site', self.sitesFreqs,
                                                     **kwargs)
            if not resQuery['OK']:
                self.log.error(resQuery['Message'])
                return resQuery

            resQuery = resQuery['Value']
            self.log.info('Found %d candidates to be checked.' % len(resQuery))

            for siteTuple in resQuery:

                if (siteTuple[0], siteTuple[1]) in self.siteNamesInCheck:
                    self.log.info('%s(%s) discarded, already on the queue' %
                                  (siteTuple[0], siteTuple[1]))
                    continue

                resourceL = ['Site'] + siteTuple

                self.siteNamesInCheck.insert(0, (siteTuple[0], siteTuple[1]))
                self.sitesToBeChecked.put(resourceL)

            return S_OK()

        except Exception, x:
            errorStr = where(self, self.execute)
            self.log.exception(errorStr, lException=x)
            return S_ERROR(errorStr)
コード例 #37
0
ファイル: RSInspectorAgent.py プロジェクト: bmb/DIRAC
class RSInspectorAgent( AgentModule ):
  """
    The RSInspector agent ( ResourceInspectorAgent ) is one of the four
    InspectorAgents of the RSS.

    This Agent takes care of the Resources. In order to do so, it gathers
    the eligible ones and then evaluates their statuses with the PEP.

    If you want to know more about the RSInspectorAgent, scroll down to the
    end of the file.
  """

  # Too many public methods
  # pylint: disable-msg=R0904

  def initialize( self ):

    # Attribute defined outside __init__ 
    # pylint: disable-msg=W0201

    try:
      self.rsClient             = ResourceStatusClient()
      self.resourcesFreqs       = CS.getTypedDictRootedAtOperations( 'CheckingFreqs/ResourcesFreqs' )
      self.resourcesToBeChecked = Queue.Queue()
      self.resourceNamesInCheck = []

      self.maxNumberOfThreads = self.am_getOption( 'maxThreadsInPool', 1 )
      self.threadPool         = ThreadPool( self.maxNumberOfThreads,
                                            self.maxNumberOfThreads )
      if not self.threadPool:
        self.log.error( 'Can not create Thread Pool' )
        return S_ERROR( 'Can not create Thread Pool' )

      for _i in xrange( self.maxNumberOfThreads ):
        self.threadPool.generateJobAndQueueIt( self._executeCheck, args = ( None, ) )

      return S_OK()

    except Exception:
      errorStr = "RSInspectorAgent initialization"
      self.log.exception( errorStr )
      return S_ERROR( errorStr )

  def execute( self ):

    try:

      kwargs = { 'meta' : {} }
      kwargs['meta']['columns'] = [ 'ResourceName', 'StatusType', 'Status',
                                    'FormerStatus', 'SiteType', 'ResourceType', \
                                    'TokenOwner' ]
      kwargs[ 'tokenOwner' ]    = 'RS_SVC'

      resQuery = self.rsClient.getStuffToCheck( 'Resource', self.resourcesFreqs, **kwargs )
      if not resQuery[ 'OK' ]:
        self.log.error( resQuery[ 'Message' ] )
        return resQuery

      resQuery = resQuery[ 'Value' ]  
      self.log.info( 'Found %d candidates to be checked.' % len( resQuery ) )

      for resourceTuple in resQuery:

        if ( resourceTuple[ 0 ], resourceTuple[ 1 ] ) in self.resourceNamesInCheck:
          self.log.info( '%s(%s) discarded, already on the queue' % ( resourceTuple[ 0 ], resourceTuple[ 1 ] ) )
          continue

        resourceL = [ 'Resource' ] + resourceTuple

        self.resourceNamesInCheck.insert( 0, ( resourceTuple[ 0 ], resourceTuple[ 1 ] ) )
        self.resourcesToBeChecked.put( resourceL )

      return S_OK()

    except Exception, x:
      errorStr = where( self, self.execute )
      self.log.exception( errorStr, lException = x )
      return S_ERROR( errorStr )
コード例 #38
0
class OutputDataExecutor:
    def __init__(self, csPath=""):
        self.log = gLogger.getSubLogger("OutputDataExecutor")
        if not csPath:
            vo = gConfig.getValue("/DIRAC/VirtualOrganization", "")
            self.__transfersCSPath = '/Operations/%s/OutputData' % vo
        else:
            self.__transfersCSPath = csPath
        self.log.verbose("Reading transfer paths from %s" %
                         self.__transfersCSPath)
        self.__requiredCSOptions = [
            'InputPath', 'InputFC', 'OutputPath', 'OutputFC', 'OutputSE'
        ]

        self.__threadPool = ThreadPool(
            gConfig.getValue("%s/MinTransfers" % self.__transfersCSPath, 1),
            gConfig.getValue("%s/MaxTransfers" % self.__transfersCSPath, 4),
            gConfig.getValue("%s/MaxQueuedTransfers" % self.__transfersCSPath,
                             100))
        self.__threadPool.daemonize()
        self.__processingFiles = set()
        self.__okTransferredFiles = 0
        self.__okTransferredBytes = 0
        self.__failedFiles = {}

    def getNumOKTransferredFiles(self):
        return self.__okTransferredFiles

    def getNumOKTransferredBytes(self):
        return self.__okTransferredBytes

    def transfersPending(self):
        return self.__threadPool.isWorking()

    def getDefinedTransferPaths(self):
        result = gConfig.getSections(self.__transfersCSPath)
        if not result['OK']:
            self.log.info('No Input/Output Pair defined in CS')
            return S_OK()

        pathList = result['Value']

        tPaths = {}
        for name in pathList:
            csPath = self.__transfersCSPath + '/%s' % name
            result = gConfig.getOptionsDict(csPath)
            if not result['OK']:
                continue
            transferDict = result['Value']
            ok = True
            for i in self.__requiredCSOptions:
                if i not in transferDict:
                    self.log.error('Missing Option %s in %s' % (i, csPath))
                    ok = False
                    break
            if not ok:
                continue
            tPaths[name] = transferDict

        return S_OK(tPaths)

    def getNumLocalOutgoingFiles(self):
        result = self.getDefinedTransferPaths()
        if not result['OK']:
            return 0
        localOutgoing = 0
        tPaths = result['Value']
        for name in tPaths:
            transferDict = tPaths[name]
            if 'LocalDisk' != transferDict['InputFC']:
                continue
            localOutgoing += len(self.getOutgoingFiles(transferDict))
        return localOutgoing

    def getOutgoingFiles(self, transferDict):
        """
    Get list of files to be processed from InputPath
    """
        inputFCName = transferDict['InputFC']
        inputPath = transferDict['InputPath']

        if inputFCName == 'LocalDisk':
            files = []
            try:
                for file in os.listdir(inputPath):
                    if os.path.isfile(os.path.join(inputPath, file)):
                        files.append(file)
            except:
                pass
            return files

        inputFC = FileCatalog([inputFCName])
        result = inputFC.listDirectory(inputPath, True)

        if not result['OK']:
            self.log.error(result['Message'])
            return []
        if not inputPath in result['Value']['Successful']:
            self.log.error(result['Value']['Failed'][inputPath])
            return []

        subDirs = result['Value']['Successful'][inputPath]['SubDirs']
        files = result['Value']['Successful'][inputPath]['Files']
        for dir in subDirs:
            self.log.info('Ignoring subdirectory:', dir)
        return files.keys()

    def checkForTransfers(self):
        """
    Check for transfers to do and start them
    """
        result = self.getDefinedTransferPaths()
        if not result['OK']:
            return result
        tPaths = result['Value']
        for name in tPaths:
            transferPath = tPaths[name]
            self.log.verbose("Checking %s transfer path" % name)
            filesToTransfer = self.getOutgoingFiles(tPaths[name])
            self.log.info("Transfer path %s has %d files" %
                          (name, len(filesToTransfer)))
            ret = self.__addFilesToThreadPool(filesToTransfer, transferPath)
            if not ret['OK']:
                # The thread pool got full
                break

    def processAllPendingTransfers(self):
        self.__threadPool.processAllResults()

    @transferSync
    def __addFilesToThreadPool(self, files, transferDict):
        for file in files:
            file = os.path.basename(file)
            if file in self.__processingFiles:
                continue
            self.__processingFiles.add(file)
            time.sleep(1)
            ret = self.__threadPool.generateJobAndQueueIt(
                self.__transferIfNotRegistered,
                args=(file, transferDict),
                oCallback=self.transferCallback,
                blocking=False)
            if not ret['OK']:
                # The thread pool got full
                return ret
        return S_OK()

    def __transferIfNotRegistered(self, file, transferDict):
        result = self.isRegisteredInOutputCatalog(file, transferDict)
        if not result['OK']:
            self.log.error(result['Message'])
            return result
        #Already registered. Need to delete
        if result['Value']:
            self.log.info(
                "Transfer file %s is already registered in the output catalog"
                % file)
            #Delete
            filePath = os.path.join(transferDict['InputPath'], file)
            if transferDict['InputFC'] == 'LocalDisk':
                os.unlink(filePath)
            else:
                inputFC = FileCatalog([transferDict['InputFC']])
                replicaDict = inputFC.getReplicas(filePath)
                if not replicaDict['OK']:
                    self.log.error("Error deleting file",
                                   replicaDict['Message'])
                elif not inFile in replicaDict['Value']['Successful']:
                    self.log.error("Error deleting file",
                                   replicaDict['Value']['Failed'][inFile])
                else:
                    seList = replicaDict['Value']['Successful'][inFile].keys()
                    for se in seList:
                        se = StorageElement(se)
                        self.log.info('Removing from %s:' % se.name, inFile)
                        se.removeFile(inFile)
                    inputFC.removeFile(file)
            self.log.info("File %s deleted from %s" %
                          (file, transferDict['InputFC']))
            self.__processingFiles.discard(file)
            return S_OK(file)
        #Do the transfer
        return self.__retrieveAndUploadFile(file, transferDict)

    def isRegisteredInOutputCatalog(self, file, transferDict):
        fc = FileCatalog([transferDict['OutputFC']])
        lfn = os.path.join(transferDict['OutputPath'], os.path.basename(file))
        result = fc.getReplicas(lfn)
        if not result['OK']:
            return result
        if lfn not in result['Value']['Successful']:
            return S_OK(False)
        replicas = result['Value']['Successful'][lfn]
        for seName in List.fromChar(transferDict['OutputSE'], ","):
            if seName in replicas:
                self.log.verbose(
                    "Transfer file %s is already registered in %s SE" %
                    (file, seName))
                return S_OK(True)
        return S_OK(False)

    def __retrieveAndUploadFile(self, file, outputDict):
        """
    Retrieve, Upload, and remove
    """
        fileName = file
        inputPath = outputDict['InputPath']
        inputFCName = outputDict['InputFC']
        inBytes = 0
        if inputFCName == 'LocalDisk':
            inFile = file
            file = os.path.join(inputPath, file)
        else:
            inputFC = FileCatalog([inputFCName])

            inFile = os.path.join(inputPath, file)
            replicaDict = inputFC.getReplicas(inFile)
            if not replicaDict['OK']:
                self.log.error(replicaDict['Message'])
                return S_ERROR(fileName)
            if not inFile in replicaDict['Value']['Successful']:
                self.log.error(replicaDict['Value']['Failed'][inFile])
                return S_ERROR(fileName)
            seList = replicaDict['Value']['Successful'][inFile].keys()

            inputSE = StorageElement(seList[0])
            self.log.info('Retrieving from %s:' % inputSE.name, inFile)
            # ret = inputSE.getFile( inFile )
            # lcg_util binding prevent multithreading, use subprocess instead
            res = pythonCall(2 * 3600, inputSE.getFile, inFile)
            if not res['OK']:
                self.log.error(res['Message'])
                return S_ERROR(fileName)
            ret = res['Value']
            if not ret['OK']:
                self.log.error(ret['Message'])
                return S_ERROR(fileName)
            if not inFile in ret['Value']['Successful']:
                self.log.error(ret['Value']['Failed'][inFile])
                return S_ERROR(fileName)

        if os.path.isfile(file):
            inBytes = os.stat(file)[6]

        outputPath = outputDict['OutputPath']
        outputFCName = outputDict['OutputFC']
        replicaManager = ReplicaManager()
        outFile = os.path.join(outputPath, os.path.basename(file))
        transferOK = False
        for outputSEName in List.fromChar(outputDict['OutputSE'], ","):
            outputSE = StorageElement(outputSEName)
            self.log.info('Trying to upload to %s:' % outputSE.name, outFile)
            # ret = replicaManager.putAndRegister( outFile, os.path.realpath( file ), outputSE.name, catalog=outputFCName )
            # lcg_util binding prevent multithreading, use subprocess instead
            result = pythonCall(2 * 3600,
                                replicaManager.putAndRegister,
                                outFile,
                                os.path.realpath(file),
                                outputSE.name,
                                catalog=outputFCName)
            if result['OK'] and result['Value']['OK']:
                if outFile in result['Value']['Value']['Successful']:
                    transferOK = True
                    break
                else:
                    self.log.error(result['Value']['Value']['Failed'][outFile])
            else:
                if result['OK']:
                    self.log.error(result['Value']['Message'])
                else:
                    self.log.error(result['Message'])

        if not transferOK:
            return S_ERROR(fileName)

        if result['OK'] or not inputFCName == 'LocalDisk':
            os.unlink(file)

        if not result['OK']:
            self.log.error(ret['Message'])
            return S_ERROR(fileName)

        self.log.info("Finished transferring %s [%s bytes]" %
                      (inFile, inBytes))
        self.__okTransferredFiles += 1
        self.__okTransferredBytes += inBytes

        if inputFCName == 'LocalDisk':
            return S_OK(fileName)

        # Now the file is on final SE/FC, remove from input SE/FC
        for se in seList:
            se = StorageElement(se)
            self.log.info('Removing from %s:' % se.name, inFile)
            se.removeFile(inFile)

        inputFC.removeFile(inFile)

        return S_OK(fileName)

    @transferSync
    def transferCallback(self, threadedJob, submitResult):
        if not submitResult['OK']:
            file = submitResult['Message']
            if file not in self.__failedFiles:
                self.__failedFiles[file] = 0
            self.__failedFiles[file] += 1
        else:
            file = submitResult['Value']
            if file in self.__failedFiles:
                del self.__failedFiles[file]
        #Take out from processing files
        if file in self.__processingFiles:
            self.__processingFiles.discard(file)