class MyProxyRenewalAgent(AgentModule):

  def initialize(self):

    requiredLifeTime = self.am_getOption( "MinimumLifeTime", 3600 )
    renewedLifeTime = self.am_getOption( "RenewedLifeTime", 54000 )
    myProxyServer = gConfig.getValue( "/DIRAC/VOPolicy/MyProxyServer" , "myproxy.cern.ch" )
    self.proxyDB = ProxyDB( requireVoms = True,
                            useMyProxy = True
                          )

    gLogger.info( "Minimum Life time      : %s" % requiredLifeTime )
    gLogger.info( "Life time on renew     : %s" % renewedLifeTime )
    gLogger.info( "MyProxy server         : %s" % self.proxyDB.getMyProxyServer() )
    gLogger.info( "MyProxy max proxy time : %s" % self.proxyDB.getMyProxyMaxLifeTime() )

    self.__threadPool = ThreadPool( 1, 10 )
    return S_OK()

  def __renewProxyForCredentials( self, userDN, userGroup ):
    lifeTime = self.am_getOption( "RenewedLifeTime", 54000 )
    gLogger.info( "Renewing for %s@%s %s secs" % ( userDN, userGroup, lifeTime ) )
    retVal = self.proxyDB.renewFromMyProxy( userDN,
                                            userGroup,
                                            lifeTime = lifeTime )
    if not retVal[ 'OK' ]:
      gLogger.error( "Failed to renew for %s@%s : %s" %( userDN, userGroup, retVal[ 'Message' ] ) )
    else:
      gLogger.info( "Renewed proxy for %s@%s" % ( userDN, userGroup ) )

  def __treatRenewalCallback( self, oTJ, exceptionList ):
    gLogger.exception( lException = exceptionList )

  def execute(self):
    """ The main agent execution method
    """
    self.proxyDB.purgeLogs()
    gLogger.info( "Purging expired requests" )
    retVal = self.proxyDB.purgeExpiredRequests()
    if retVal[ 'OK' ]:
      gLogger.info( " purged %s requests" % retVal[ 'Value' ] )
    gLogger.info( "Purging expired proxies" )
    retVal = self.proxyDB.purgeExpiredProxies()
    if retVal[ 'OK' ]:
      gLogger.info( " purged %s proxies" % retVal[ 'Value' ] )
    retVal = self.proxyDB.getCredentialsAboutToExpire( self.am_getOption( "MinimumLifeTime" , 3600 ) )
    if not retVal[ 'OK' ]:
      return retVal
    data = retVal[ 'Value' ]
    gLogger.info( "Renewing %s proxies..." % len( data ) )
    for record in data:
      userDN = record[0]
      userGroup = record[1]
      self.__threadPool.generateJobAndQueueIt( self.__renewProxyForCredentials,
                                               args = ( userDN, userGroup ),
                                               oExceptionCallback = self.__treatRenewalCallback )
    self.__threadPool.processAllResults()
    return S_OK()
Beispiel #2
0
class MyProxyRenewalAgent(AgentModule):

  def initialize(self):

    requiredLifeTime = self.am_getOption( "MinimumLifeTime", 3600 )
    renewedLifeTime = self.am_getOption( "RenewedLifeTime", 54000 )
    self.proxyDB = ProxyDB( useMyProxy = True )

    gLogger.info( "Minimum Life time      : %s" % requiredLifeTime )
    gLogger.info( "Life time on renew     : %s" % renewedLifeTime )
    gLogger.info( "MyProxy server         : %s" % self.proxyDB.getMyProxyServer() )
    gLogger.info( "MyProxy max proxy time : %s" % self.proxyDB.getMyProxyMaxLifeTime() )

    self.__threadPool = ThreadPool( 1, 10 )
    return S_OK()

  def __renewProxyForCredentials( self, userDN, userGroup ):
    lifeTime = self.am_getOption( "RenewedLifeTime", 54000 )
    gLogger.info( "Renewing for %s@%s %s secs" % ( userDN, userGroup, lifeTime ) )
    retVal = self.proxyDB.renewFromMyProxy( userDN,
                                            userGroup,
                                            lifeTime = lifeTime )
    if not retVal[ 'OK' ]:
      gLogger.error( "Failed to renew proxy", "for %s@%s : %s" %( userDN, userGroup, retVal[ 'Message' ] ) )
    else:
      gLogger.info( "Renewed proxy for %s@%s" % ( userDN, userGroup ) )

  def __treatRenewalCallback( self, oTJ, exceptionList ):
    gLogger.exception( lException = exceptionList )

  def execute(self):
    """ The main agent execution method
    """
    self.proxyDB.purgeLogs()
    gLogger.info( "Purging expired requests" )
    retVal = self.proxyDB.purgeExpiredRequests()
    if retVal[ 'OK' ]:
      gLogger.info( " purged %s requests" % retVal[ 'Value' ] )
    gLogger.info( "Purging expired proxies" )
    retVal = self.proxyDB.purgeExpiredProxies()
    if retVal[ 'OK' ]:
      gLogger.info( " purged %s proxies" % retVal[ 'Value' ] )
    retVal = self.proxyDB.getCredentialsAboutToExpire( self.am_getOption( "MinimumLifeTime" , 3600 ) )
    if not retVal[ 'OK' ]:
      return retVal
    data = retVal[ 'Value' ]
    gLogger.info( "Renewing %s proxies..." % len( data ) )
    for record in data:
      userDN = record[0]
      userGroup = record[1]
      self.__threadPool.generateJobAndQueueIt( self.__renewProxyForCredentials,
                                               args = ( userDN, userGroup ),
                                               oExceptionCallback = self.__treatRenewalCallback )
    self.__threadPool.processAllResults()
    return S_OK()
class SystemAdministratorIntegrator:

  def __init__( self, **kwargs ):
    """ Constructor  
    """
    if 'hosts' in kwargs:
      self.__hosts = kwargs['hosts']
      del kwargs['hosts']
    else:  
      result = Registry.getHosts()
      if result['OK']:
        self.__hosts = result['Value']
      else:
        self.__hosts = []
      
    self.__kwargs = dict( kwargs )  
    self.__pool = ThreadPool( len( self.__hosts ) )  
    self.__resultDict = {}
      
  def __getattr__( self, name ):
    self.call = name
    return self.execute

  def __executeClient( self, host, method, *parms, **kwargs ):
    """ Execute RPC method on a given host 
    """        
    hostName = Registry.getHostOption( host, 'Host', host)
    client = SystemAdministratorClient( hostName, **self.__kwargs )
    result = getattr( client, method )( *parms, **kwargs )
    result['Host'] = host   
    return result
    
  def __processResult( self, id_, result ):
    """ Collect results in the final structure
    """
    host = result['Host']
    del result['Host']
    self.__resultDict[host] = result  
       
  def execute(self, *args, **kwargs ):
    """ Main execution method
    """
    self.__resultDict = {}
    for host in self.__hosts:
      self.__pool.generateJobAndQueueIt( self.__executeClient,
                                         args = [ host, self.call ] + list(args),
                                         kwargs = kwargs,
                                         oCallback = self.__processResult )
    
    self.__pool.processAllResults()
    return S_OK( self.__resultDict )    
class SystemAdministratorIntegrator:
    def __init__(self, **kwargs):
        """ Constructor  
    """
        if 'hosts' in kwargs:
            self.__hosts = kwargs['hosts']
            del kwargs['hosts']
        else:
            result = Registry.getHosts()
            if result['OK']:
                self.__hosts = result['Value']
            else:
                self.__hosts = []

        self.__kwargs = dict(kwargs)
        self.__pool = ThreadPool(len(self.__hosts))
        self.__resultDict = {}

    def __getattr__(self, name):
        self.call = name
        return self.execute

    def __executeClient(self, host, method, *parms, **kwargs):
        """ Execute RPC method on a given host 
    """
        hostName = Registry.getHostOption(host, 'Host', host)
        client = SystemAdministratorClient(hostName, **self.__kwargs)
        result = getattr(client, method)(*parms, **kwargs)
        result['Host'] = host
        return result

    def __processResult(self, id_, result):
        """ Collect results in the final structure
    """
        host = result['Host']
        del result['Host']
        self.__resultDict[host] = result

    def execute(self, *args, **kwargs):
        """ Main execution method
    """
        self.__resultDict = {}
        for host in self.__hosts:
            self.__pool.generateJobAndQueueIt(self.__executeClient,
                                              args=[host, self.call] +
                                              list(args),
                                              kwargs=kwargs,
                                              oCallback=self.__processResult)

        self.__pool.processAllResults()
        return S_OK(self.__resultDict)
Beispiel #5
0
    def _updateServiceConfiguration(self, urlSet, fromMaster=False):
        """
    Update configuration in a set of service in parallel

    :param set urlSet: a set of service URLs
    :param fromMaster: flag to force updating from the master CS
    :return: Nothing
    """
        pool = ThreadPool(len(urlSet))
        for url in urlSet:
            pool.generateJobAndQueueIt(self._forceServiceUpdate,
                                       args=[url, fromMaster],
                                       kwargs={},
                                       oCallback=self.__processResults)
        pool.processAllResults()
Beispiel #6
0
  def __updateServiceConfiguration(self, urlSet, fromMaster=False):
    """
    Update configuration in a set of service in parallel

    :param set urlSet: a set of service URLs
    :param fromMaster: flag to force updating from the master CS
    :return: S_OK/S_ERROR, Value Successful/Failed dict with service URLs
    """
    pool = ThreadPool(len(urlSet))
    for url in urlSet:
      pool.generateJobAndQueueIt(self.__forceServiceUpdate,
                                 args=[url, fromMaster],
                                 kwargs={},
                                 oCallback=self.__processResults)
    pool.processAllResults()
    return S_OK(self.__updateResultDict)
Beispiel #7
0
    def __init__(self, **kwargs):
        """ Constructor
    """
        if 'hosts' in kwargs:
            self.__hosts = kwargs['hosts']
            del kwargs['hosts']
        else:
            result = Registry.getHosts()
            if result['OK']:
                self.__hosts = result['Value']
            else:
                self.__hosts = []
            # Excluded hosts
            if 'exclude' in kwargs:
                self.__hosts = list(set(self.__hosts) - set(kwargs['exclude']))

        # Ping the hosts to remove those that don't have a SystemAdministrator service
        sysAdminHosts = []
        self.silentHosts = []
        self.__resultDict = {}
        self.__kwargs = {}
        pool = ThreadPool(len(self.__hosts))
        for host in self.__hosts:
            pool.generateJobAndQueueIt(self.__executeClient,
                                       args=[host, "ping"],
                                       kwargs={},
                                       oCallback=self.__processResult)

        pool.processAllResults()
        for host, result in self.__resultDict.items():
            if result['OK']:
                sysAdminHosts.append(host)
            else:
                self.silentHosts.append(host)
        del pool

        self.__hosts = sysAdminHosts

        self.__kwargs = dict(kwargs)
        self.__pool = ThreadPool(len(self.__hosts))
        self.__resultDict = {}
  def __init__( self, **kwargs ):
    """ Constructor  
    """
    if 'hosts' in kwargs:
      self.__hosts = kwargs['hosts']
      del kwargs['hosts']
    else:  
      result = Registry.getHosts()
      if result['OK']:
        self.__hosts = result['Value']
      else:
        self.__hosts = []
      # Excluded hosts
      if 'exclude' in kwargs:
        self.__hosts = list ( set( self.__hosts ) - set( kwargs[ 'exclude' ] ) )

    # Ping the hosts to remove those that don't have a SystemAdministrator service
    sysAdminHosts = []
    self.silentHosts = []
    self.__resultDict = {}
    self.__kwargs = {}
    pool = ThreadPool( len( self.__hosts ) )
    for host in self.__hosts:
      pool.generateJobAndQueueIt( self.__executeClient,
                                         args = [ host, "ping" ],
                                         kwargs = {},
                                         oCallback = self.__processResult )

    pool.processAllResults()
    for host, result in self.__resultDict.items():
      if result['OK']:
        sysAdminHosts.append( host )
      else:
        self.silentHosts.append( host )
    del pool

    self.__hosts = sysAdminHosts
      
    self.__kwargs = dict( kwargs )  
    self.__pool = ThreadPool( len( self.__hosts ) )  
    self.__resultDict = {}
Beispiel #9
0
class Publisher:
    """
  Class Publisher is in charge of getting dispersed information, to be published on the web.
  """

    #############################################################################

    def __init__(self,
                 VOExtension,
                 rsDBIn=None,
                 commandCallerIn=None,
                 infoGetterIn=None,
                 WMSAdminIn=None):
        """
    Standard constructor

    :params:
      :attr:`VOExtension`: string, VO Extension (e.g. 'LHCb')

      :attr:`rsDBIn`: optional ResourceStatusDB object
      (see :class: `DIRAC.ResourceStatusSystem.DB.ResourceStatusDB.ResourceStatusDB`)

      :attr:`commandCallerIn`: optional CommandCaller object
      (see :class: `DIRAC.ResourceStatusSystem.Command.CommandCaller.CommandCaller`)

      :attr:`infoGetterIn`: optional InfoGetter object
      (see :class: `DIRAC.ResourceStatusSystem.Utilities.InfoGetter.InfoGetter`)

      :attr:`WMSAdminIn`: optional RPCClient object for WMSAdmin
      (see :class: `DIRAC.Core.DISET.RPCClient.RPCClient`)
    """

        self.configModule = __import__(
            VOExtension + "DIRAC.ResourceStatusSystem.Policy.Configurations",
            globals(), locals(), ['*'])

        if rsDBIn is not None:
            self.rsDB = rsDBIn
        else:
            from DIRAC.ResourceStatusSystem.DB.ResourceStatusDB import ResourceStatusDB
            self.rsDB = ResourceStatusDB()

        if commandCallerIn is not None:
            self.cc = commandCallerIn
        else:
            from DIRAC.ResourceStatusSystem.Command.CommandCaller import CommandCaller
            self.cc = CommandCaller()

        if infoGetterIn is not None:
            self.ig = infoGetterIn
        else:
            from DIRAC.ResourceStatusSystem.Utilities.InfoGetter import InfoGetter
            self.ig = InfoGetter(VOExtension)

        if WMSAdminIn is not None:
            self.WMSAdmin = WMSAdminIn
        else:
            from DIRAC.Core.DISET.RPCClient import RPCClient
            self.WMSAdmin = RPCClient("WorkloadManagement/WMSAdministrator")

        self.threadPool = ThreadPool(2, 5)

        self.lockObj = threading.RLock()

        self.infoForPanel_res = {}

#############################################################################

    def getInfo(self, granularity, name, useNewRes=False):
        """
    Standard method to get all the info to be published

    This method uses a ThreadPool (:class:`DIRAC.Core.Utilities.ThreadPool.ThreadPool`)
    with 2-5 threads. The threaded method is
    :meth:`DIRAC.ResourceStatusSystem.Utilities.Publisher.Publisher.getInfoForPanel`

    :params:
      :attr:`granularity`: string - a ValidRes

      :attr:`name`: string - name of the Validres

      :attr:`useNewRes`: boolean. When set to true, will get new results,
      otherwise it will get cached results (where available).
    """

        if granularity not in ValidRes:
            raise InvalidRes, where(self, self.getInfo)

        self.infoForPanel_res = {}

        status = None
        formerStatus = None
        siteType = None
        serviceType = None
        resourceType = None

        if granularity in ('Resource', 'Resources'):
            try:
                resourceType = self.rsDB.getMonitoredsList(
                    'Resource', ['ResourceType'], resourceName=name)[0][0]
            except IndexError:
                return "%s does not exist!" % name

        if granularity in ('StorageElement', 'StorageElements'):
            try:
                siteType = self.rsDB.getMonitoredsList(
                    'StorageElement', ['SiteType'],
                    storageElementName=name)[0][0]
            except IndexError:
                return "%s does not exist!" % name

        paramNames = [
            'Type', 'Group', 'Name', 'Policy', 'DIRAC Status', 'RSS Status',
            'Reason', 'Description'
        ]

        infoToGet = self.ig.getInfoToApply(('view_info', ),
                                           granularity,
                                           status=status,
                                           formerStatus=formerStatus,
                                           siteType=siteType,
                                           serviceType=serviceType,
                                           resourceType=resourceType,
                                           useNewRes=useNewRes)[0]['Panels']
        infoToGet_res = {}

        recordsList = []

        infosForPolicy = {}

        for panel in infoToGet.keys():

            (granularityForPanel,
             nameForPanel) = self.__getNameForPanel(granularity, name, panel)

            if not self._resExist(granularityForPanel, nameForPanel):
                #        completeInfoForPanel_res = None
                continue

            #take composite RSS result for name
            nameStatus_res = self._getStatus(nameForPanel, panel)

            recordBase = [None, None, None, None, None, None, None, None]

            recordBase[1] = panel.replace('_Panel', '')
            recordBase[2] = nameForPanel  #nameForPanel
            try:
                recordBase[4] = nameStatus_res[nameForPanel][
                    'DIRACStatus']  #DIRAC Status
            except:
                pass
            recordBase[5] = nameStatus_res[nameForPanel][
                'RSSStatus']  #RSS Status

            record = copy.deepcopy(recordBase)
            record[0] = 'ResultsForResource'

            recordsList.append(record)

            #take info that goes into the panel
            infoForPanel = infoToGet[panel]

            for info in infoForPanel:

                self.threadPool.generateJobAndQueueIt(
                    self.getInfoForPanel,
                    args=(info, granularityForPanel, nameForPanel))

            self.threadPool.processAllResults()

            for policy in [x.keys()[0] for x in infoForPanel]:
                record = copy.deepcopy(recordBase)
                record[0] = 'SpecificInformation'
                record[3] = policy  #policyName
                record[4] = None  #DIRAC Status
                record[5] = self.infoForPanel_res[policy][
                    'Status']  #RSS status for the policy
                record[6] = self.infoForPanel_res[policy]['Reason']  #Reason
                record[7] = self.infoForPanel_res[policy]['desc']  #Description
                recordsList.append(record)

                infosForPolicy[policy] = self.infoForPanel_res[policy]['infos']

        infoToGet_res['TotalRecords'] = len(recordsList)
        infoToGet_res['ParameterNames'] = paramNames
        infoToGet_res['Records'] = recordsList

        infoToGet_res['Extras'] = infosForPolicy

        return infoToGet_res

#############################################################################

    def getInfoForPanel(self, info, granularityForPanel, nameForPanel):

        #get single RSS policy results
        policyResToGet = info.keys()[0]
        pol_res = self.rsDB.getPolicyRes(nameForPanel, policyResToGet)
        if pol_res != []:
            pol_res_dict = {'Status': pol_res[0], 'Reason': pol_res[1]}
        else:
            pol_res_dict = {'Status': 'Unknown', 'Reason': 'Unknown'}
        self.lockObj.acquire()
        try:
            self.infoForPanel_res[policyResToGet] = pol_res_dict
        finally:
            self.lockObj.release()

        #get policy description
        desc = self._getPolicyDesc(policyResToGet)

        #get other info
        othersInfo = info.values()[0]
        if not isinstance(othersInfo, list):
            othersInfo = [othersInfo]

        info_res = {}

        for oi in othersInfo:
            format = oi.keys()[0]
            what = oi.values()[0]

            info_bit_got = self._getInfo(granularityForPanel, nameForPanel,
                                         format, what)

            info_res[format] = info_bit_got

        self.lockObj.acquire()
        try:
            self.infoForPanel_res[policyResToGet]['infos'] = info_res
            self.infoForPanel_res[policyResToGet]['desc'] = desc
        finally:
            self.lockObj.release()

#############################################################################

    def _getStatus(self, name, panel):

        #get RSS status
        RSSStatus = self._getInfoFromRSSDB(name, panel)[0][1]

        #get DIRAC status
        if panel in ('Site_Panel', 'SE_Panel'):

            if panel == 'Site_Panel':
                DIRACStatus = self.WMSAdmin.getSiteMaskLogging(name)
                if DIRACStatus['OK']:
                    DIRACStatus = DIRACStatus['Value'][name].pop()[0]
                else:
                    raise RSSException, where(self, self._getStatus)

            elif panel == 'SE_Panel':
                ra = getStorageElementStatus(name, 'ReadAccess')['Value']
                wa = getStorageElementStatus(name, 'WriteAccess')['Value']
                DIRACStatus = {'ReadAccess': ra, 'WriteAccess': wa}

            status = {
                name: {
                    'RSSStatus': RSSStatus,
                    'DIRACStatus': DIRACStatus
                }
            }

        else:
            status = {name: {'RSSStatus': RSSStatus}}

        return status

#############################################################################

    def _getInfo(self, granularity, name, format, what):

        if format == 'RSS':
            info_bit_got = self._getInfoFromRSSDB(name, what)
        else:
            if isinstance(what, dict):
                command = what['CommandIn']
                extraArgs = what['args']
            else:
                command = what
                extraArgs = None

            info_bit_got = self.cc.commandInvocation(granularity, name, None,
                                                     None, command, extraArgs)

            try:
                info_bit_got = info_bit_got['Result']
            except:
                pass

        return info_bit_got

#############################################################################

    def _getInfoFromRSSDB(self, name, what):

        paramsL = ['Status']

        siteName = None
        serviceName = None
        resourceName = None
        storageElementName = None
        serviceType = None
        gridSiteName = None

        if what == 'ServiceOfSite':
            gran = 'Service'
            paramsL.insert(0, 'ServiceName')
            paramsL.append('Reason')
            siteName = name
        elif what == 'ResOfCompService':
            gran = 'Resources'
            paramsL.insert(0, 'ResourceName')
            paramsL.append('Reason')
            serviceType = name.split('@')[0]
            gridSiteName = getGOCSiteName(name.split('@')[1])
            if not gridSiteName['OK']:
                raise RSSException, gridSiteName['Message']
            gridSiteName = gridSiteName['Value']
        elif what == 'ResOfStorService':
            gran = 'Resources'
            paramsL.insert(0, 'ResourceName')
            paramsL.append('Reason')
            serviceType = name.split('@')[0]
            gridSiteName = getGOCSiteName(name.split('@')[1])
            if not gridSiteName['OK']:
                raise RSSException, gridSiteName['Message']
            gridSiteName = gridSiteName['Value']
        elif what == 'ResOfStorEl':
            gran = 'StorageElements'
            paramsL.insert(0, 'ResourceName')
            paramsL.append('Reason')
            storageElementName = name
        elif what == 'StorageElementsOfSite':
            gran = 'StorageElements'
            paramsL.insert(0, 'StorageElementName')
            paramsL.append('Reason')
            if '@' in name:
                DIRACsiteName = name.split('@').pop()
            else:
                DIRACsiteName = name
            gridSiteName = getGOCSiteName(DIRACsiteName)
            if not gridSiteName['OK']:
                raise RSSException, gridSiteName['Message']
            gridSiteName = gridSiteName['Value']
        elif what == 'Site_Panel':
            gran = 'Site'
            paramsL.insert(0, 'SiteName')
            siteName = name
        elif what == 'Service_Computing_Panel':
            gran = 'Service'
            paramsL.insert(0, 'ServiceName')
            serviceName = name
        elif what == 'Service_Storage_Panel':
            gran = 'Service'
            paramsL.insert(0, 'ServiceName')
            serviceName = name
        elif what == 'Service_VO-BOX_Panel':
            gran = 'Services'
            paramsL.insert(0, 'ServiceName')
            serviceName = name
        elif what == 'Service_VOMS_Panel':
            gran = 'Services'
            paramsL.insert(0, 'ServiceName')
            serviceName = name
        elif what == 'Resource_Panel':
            gran = 'Resource'
            paramsL.insert(0, 'ResourceName')
            resourceName = name
        elif what == 'SE_Panel':
            gran = 'StorageElement'
            paramsL.insert(0, 'StorageElementName')
            storageElementName = name

        info_bit_got = self.rsDB.getMonitoredsList(
            gran,
            paramsList=paramsL,
            siteName=siteName,
            serviceName=serviceName,
            serviceType=serviceType,
            resourceName=resourceName,
            storageElementName=storageElementName,
            gridSiteName=gridSiteName)

        return info_bit_got

#############################################################################

    def _getPolicyDesc(self, policyName):

        return self.configModule.Policies[policyName]['Description']

#############################################################################

    def __getNameForPanel(self, granularity, name, panel):

        if granularity in ('Site', 'Sites'):
            if panel == 'Service_Computing_Panel':
                granularity = 'Service'
                name = 'Computing@' + name
            elif panel == 'Service_Storage_Panel':
                granularity = 'Service'
                name = 'Storage@' + name
            elif panel == 'OtherServices_Panel':
                granularity = 'Service'
                name = 'OtherS@' + name
            elif panel == 'Service_VOMS_Panel':
                granularity = 'Service'
                name = 'VOMS@' + name
            elif panel == 'Service_VO-BOX_Panel':
                granularity = 'Service'
                name = 'VO-BOX@' + name
#      else:
#        granularity = granularity
#        name = name
#    else:
#      granularity = granularity
#      name = name

        return (granularity, name)

#############################################################################

    def _resExist(self, granularity, name):

        siteName = None
        serviceName = None
        resourceName = None
        storageElementName = None

        if granularity in ('Site', 'Sites'):
            siteName = name
        elif granularity in ('Service', 'Services'):
            serviceName = name
        elif granularity in ('Resource', 'Resources'):
            resourceName = name
        elif granularity in ('StorageElement', 'StorageElements'):
            storageElementName = name

        res = self.rsDB.getMonitoredsList(
            granularity,
            siteName=siteName,
            serviceName=serviceName,
            resourceName=resourceName,
            storageElementName=storageElementName)

        if res == []:
            return False
        else:
            return True
Beispiel #10
0
class SystemAdministratorIntegrator(object):
    def __init__(self, **kwargs):
        """ Constructor
    """
        if 'hosts' in kwargs:
            self.__hosts = kwargs['hosts']
            del kwargs['hosts']
        else:
            result = Registry.getHosts()
            if result['OK']:
                self.__hosts = result['Value']
            else:
                self.__hosts = []
            # Excluded hosts
            if 'exclude' in kwargs:
                self.__hosts = list(set(self.__hosts) - set(kwargs['exclude']))

        # Ping the hosts to remove those that don't have a SystemAdministrator service
        sysAdminHosts = []
        self.silentHosts = []
        self.__resultDict = {}
        self.__kwargs = {}
        pool = ThreadPool(len(self.__hosts))
        for host in self.__hosts:
            pool.generateJobAndQueueIt(self.__executeClient,
                                       args=[host, "ping"],
                                       kwargs={},
                                       oCallback=self.__processResult)

        pool.processAllResults()
        for host, result in self.__resultDict.items():
            if result['OK']:
                sysAdminHosts.append(host)
            else:
                self.silentHosts.append(host)
        del pool

        self.__hosts = sysAdminHosts

        self.__kwargs = dict(kwargs)
        self.__pool = ThreadPool(len(self.__hosts))
        self.__resultDict = {}

    def getSilentHosts(self):
        """ Get a list of non-responding hosts

    :return: list of hosts
    """
        return self.silentHosts

    def getRespondingHosts(self):
        """ Get a list of responding hosts

    :return: list of hosts
    """
        return self.__hosts

    def __getattr__(self, name):
        self.call = name
        return self.execute

    def __executeClient(self, host, method, *parms, **kwargs):
        """ Execute RPC method on a given host
    """
        hostName = Registry.getHostOption(host, 'Host', host)
        client = SystemAdministratorClient(hostName, **self.__kwargs)
        result = getattr(client, method)(*parms, **kwargs)
        result['Host'] = host
        return result

    def __processResult(self, id_, result):
        """ Collect results in the final structure
    """
        host = result['Host']
        del result['Host']
        self.__resultDict[host] = result

    def execute(self, *args, **kwargs):
        """ Main execution method
    """
        self.__resultDict = {}
        for host in self.__hosts:
            self.__pool.generateJobAndQueueIt(self.__executeClient,
                                              args=[host, self.call] +
                                              list(args),
                                              kwargs=kwargs,
                                              oCallback=self.__processResult)

        self.__pool.processAllResults()
        return S_OK(self.__resultDict)
Beispiel #11
0
class RemovalAgent( AgentModule, RequestAgentMixIn ):
  """
    This Agent takes care of executing "removal" request from the RequestManagement system
  """

  def __init__( self, *args ):
    """
    Initialize the base class and define some extra data members
    """
    AgentModule.__init__( self, *args )
    self.requestDBClient = None
    self.replicaManager = None
    self.maxNumberOfThreads = 4
    self.maxRequestsInQueue = 100
    self.threadPool = None

  def initialize( self ):
    """
      Called by the framework upon startup, before any cycle (execute method bellow)
    """
    self.requestDBClient = RequestClient()
    self.replicaManager = ReplicaManager()

    gMonitor.registerActivity( "Iteration", "Agent Loops", "RemovalAgent", "Loops/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "Execute", "Request Processed", "RemovalAgent", "Requests/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "Done", "Request Completed", "RemovalAgent", "Requests/min", gMonitor.OP_SUM )

    gMonitor.registerActivity( "PhysicalRemovalAtt", "Physical removals attempted",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "PhysicalRemovalDone", "Successful physical removals",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "PhysicalRemovalFail", "Failed physical removals",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "PhysicalRemovalSize", "Physically removed size",
                               "RemovalAgent", "Bytes", gMonitor.OP_ACUM )

    gMonitor.registerActivity( "ReplicaRemovalAtt", "Replica removal attempted",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicaRemovalDone", "Successful replica removals",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicaRemovalFail", "Failed replica removals",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )

    gMonitor.registerActivity( "RemoveFileAtt", "File removal attempted",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RemoveFileDone", "File removal done",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RemoveFileFail", "File removal failed",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )

    self.maxNumberOfThreads = self.am_getOption( 'NumberOfThreads', self.maxNumberOfThreads )
    self.maxRequestsInQueue = self.am_getOption( 'RequestsInQueue', self.maxRequestsInQueue )
    self.threadPool = ThreadPool( 1, self.maxNumberOfThreads, self.maxRequestsInQueue )

    # Set the ThreadPool in daemon mode to process new ThreadedJobs as they are inserted
    self.threadPool.daemonize()

    # This sets the Default Proxy to used as that defined under
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    return S_OK()

  def execute( self ):
    """
    Fill the TreadPool with ThreadJobs
    """

    while True:
      requestExecutor = ThreadedJob( self.executeRequest )
      ret = self.threadPool.queueJob( requestExecutor )
      if not ret['OK']:
        break

    return S_OK()

  def executeRequest( self ):
    """
    Do the actual work in the Thread
    """
    ################################################
    # Get a request from request DB
    gMonitor.addMark( "Iteration", 1 )
    res = self.requestDBClient.getRequest( 'removal' )
    if not res['OK']:
      gLogger.info( "RemovalAgent.execute: Failed to get request from database." )
      return S_OK()
    elif not res['Value']:
      gLogger.info( "RemovalAgent.execute: No requests to be executed found." )
      return S_OK()
    requestString = res['Value']['RequestString']
    requestName = res['Value']['RequestName']
    sourceServer = res['Value']['Server']
    try:
      jobID = int( res['Value']['JobID'] )
    except ValueError:
      jobID = 0
    gLogger.info( "RemovalAgent.execute: Obtained request %s" % requestName )

    result = self.requestDBClient.getCurrentExecutionOrder( requestName, sourceServer )
    if result['OK']:
      currentOrder = result['Value']
    else:
      gLogger.error( 'Can not get the request execution order' )
      return S_OK( 'Can not get the request execution order' )

    oRequest = RequestContainer( request = requestString )

    ################################################
    # Find the number of sub-requests from the request
    res = oRequest.getNumSubRequests( 'removal' )
    if not res['OK']:
      errStr = "RemovalAgent.execute: Failed to obtain number of removal subrequests."
      gLogger.error( errStr, res['Message'] )
      return S_OK()
    gLogger.info( "RemovalAgent.execute: Found %s sub requests." % res['Value'] )

    ################################################
    # For all the sub-requests in the request
    modified = False
    for ind in range( res['Value'] ):
      gMonitor.addMark( "Execute", 1 )
      gLogger.info( "RemovalAgent.execute: Processing sub-request %s." % ind )
      subRequestAttributes = oRequest.getSubRequestAttributes( ind, 'removal' )['Value']
      subExecutionOrder = int( subRequestAttributes['ExecutionOrder'] )
      subStatus = subRequestAttributes['Status']
      if subStatus == 'Waiting' and subExecutionOrder <= currentOrder:
        subRequestFiles = oRequest.getSubRequestFiles( ind, 'removal' )['Value']
        operation = subRequestAttributes['Operation']

        ################################################
        #  If the sub-request is a physical removal operation
        if operation == 'physicalRemoval':
          gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation )
          diracSEs = subRequestAttributes['TargetSE'].split( ',' )
          physicalFiles = []
          pfnToLfn = {}
          for subRequestFile in subRequestFiles:
            if subRequestFile['Status'] == 'Waiting':
              pfn = str( subRequestFile['PFN'] )
              lfn = str( subRequestFile['LFN'] )
              pfnToLfn[pfn] = lfn
              physicalFiles.append( pfn )
          gMonitor.addMark( 'PhysicalRemovalAtt', len( physicalFiles ) )
          failed = {}
          errMsg = {}
          for diracSE in diracSEs:
            res = self.replicaManager.removeStorageFile( physicalFiles, diracSE )
            if res['OK']:
              for pfn in res['Value']['Failed'].keys():
                if not failed.has_key( pfn ):
                  failed[pfn] = {}
                failed[pfn][diracSE] = res['Value']['Failed'][pfn]
            else:
              errMsg[diracSE] = res['Message']
              for pfn in physicalFiles:
                if not failed.has_key( pfn ):
                  failed[pfn] = {}
                failed[pfn][diracSE] = 'Completely'
          # Now analyse the results
          failedPFNs = failed.keys()
          pfnsOK = [pfn for pfn in physicalFiles if not pfn in failedPFNs]
          gMonitor.addMark( 'PhysicalRemovalDone', len( pfnsOK ) )
          for pfn in pfnsOK:
            gLogger.info( "RemovalAgent.execute: Successfully removed %s at %s" % ( pfn, str( diracSEs ) ) )
            res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', pfnToLfn[pfn], 'Status', 'Done' )
            if not res['OK']:
              gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', pfnToLfn[pfn] ) )
            modified = True
          if failed:
            gMonitor.addMark( 'PhysicalRemovalFail', len( failedPFNs ) )
            for pfn in failedPFNs:
              for diracSE in failed[pfn].keys():
                if type( failed[pfn][diracSE] ) in StringTypes:
                  if re.search( 'no such file or directory', failed[pfn][diracSE].lower() ):
                    gLogger.info( "RemovalAgent.execute: File did not exist.", pfn )
                    res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', pfnToLfn[pfn], 'Status', 'Done' )
                    if not res['OK']:
                      gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', pfnToLfn[pfn] ) )
                    modified = True
                  else:
                    gLogger.info( "RemovalAgent.execute: Failed to remove file.", "%s at %s - %s" % ( pfn, diracSE, failed[pfn][diracSE] ) )
          if errMsg:
            for diracSE in errMsg.keys():
              errStr = "RemovalAgent.execute: Completely failed to remove replicas. At %s", diracSE
              gLogger.error( errStr, errMsg[diracSE] )


        ################################################
        #  If the sub-request is a physical removal operation
        elif operation == 'removeFile':
          gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation )
          lfns = []
          for subRequestFile in subRequestFiles:
            if subRequestFile['Status'] == 'Waiting':
              lfn = str( subRequestFile['LFN'] )
              lfns.append( lfn )
          gMonitor.addMark( 'RemoveFileAtt', len( lfns ) )
          res = self.replicaManager.removeFile( lfns )
          if res['OK']:
            gMonitor.addMark( 'RemoveFileDone', len( res['Value']['Successful'].keys() ) )
            for lfn in res['Value']['Successful'].keys():
              gLogger.info( "RemovalAgent.execute: Successfully removed %s." % lfn )
              result = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' )
              if not result['OK']:
                gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) )
              modified = True
            gMonitor.addMark( 'RemoveFileFail', len( res['Value']['Failed'].keys() ) )
            for lfn in res['Value']['Failed'].keys():
              if type( res['Value']['Failed'][lfn] ) in StringTypes:
                if re.search( 'no such file or directory', res['Value']['Failed'][lfn].lower() ):
                  gLogger.info( "RemovalAgent.execute: File did not exist.", lfn )
                  result = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' )
                  if not result['OK']:
                    gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) )
                  modified = True
                else:
                  gLogger.info( "RemovalAgent.execute: Failed to remove file:",
                                "%s %s" % ( lfn, res['Value']['Failed'][lfn] ) )
          else:
            gMonitor.addMark( 'RemoveFileFail', len( lfns ) )
            errStr = "RemovalAgent.execute: Completely failed to remove files files."
            gLogger.error( errStr, res['Message'] )

        ################################################
        #  If the sub-request is a physical removal operation
        elif operation == 'replicaRemoval':
          gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation )
          diracSEs = subRequestAttributes['TargetSE'].split( ',' )
          lfns = []
          for subRequestFile in subRequestFiles:
            if subRequestFile['Status'] == 'Waiting':
              lfn = str( subRequestFile['LFN'] )
              lfns.append( lfn )
          gMonitor.addMark( 'ReplicaRemovalAtt', len( lfns ) )

          failed = {}
          errMsg = {}
          for diracSE in diracSEs:
            res = self.replicaManager.removeReplica( diracSE, lfns )
            if res['OK']:
              for lfn in res['Value']['Failed'].keys():
                if not failed.has_key( lfn ):
                  failed[lfn] = {}
                failed[lfn][diracSE] = res['Value']['Failed'][lfn]
            else:
              errMsg[diracSE] = res['Message']
              for lfn in lfns:
                if not failed.has_key( lfn ):
                  failed[lfn] = {}
                failed[lfn][diracSE] = 'Completely'
          # Now analyse the results
          failedLFNs = failed.keys()
          lfnsOK = [lfn for lfn in lfns if not lfn in failedLFNs]
          gMonitor.addMark( 'ReplicaRemovalDone', len( lfnsOK ) )
          for lfn in lfnsOK:
            gLogger.info( "RemovalAgent.execute: Successfully removed %s at %s" % ( lfn, str( diracSEs ) ) )
            res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' )
            if not res['OK']:
              gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) )
            modified = True
          if failed:
            gMonitor.addMark( 'PhysicalRemovalFail', len( failedLFNs ) )
            for lfn in failedLFNs:
              for diracSE in failed[lfn].keys():
                if type( failed[lfn][diracSE] ) in StringTypes:
                  if re.search( 'no such file or directory', failed[lfn][diracSE].lower() ):
                    gLogger.info( "RemovalAgent.execute: File did not exist.", lfn )
                    res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' )
                    if not res['OK']:
                      gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) )
                    modified = True
                  else:
                    gLogger.info( "RemovalAgent.execute: Failed to remove file.", "%s at %s - %s" % ( lfn, diracSE, failed[lfn][diracSE] ) )
          if errMsg:
            for diracSE in errMsg.keys():
              errStr = "RemovalAgent.execute: Completely failed to remove replicas. At %s", diracSE
              gLogger.error( errStr, errMsg[diracSE] )

        ################################################
        #  If the sub-request is a request to the online system to retransfer
        elif operation == 'reTransfer':
          gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation )
          diracSE = subRequestAttributes['TargetSE']
          for subRequestFile in subRequestFiles:
            if subRequestFile['Status'] == 'Waiting':
              pfn = str( subRequestFile['PFN'] )
              lfn = str( subRequestFile['LFN'] )
              res = self.replicaManager.onlineRetransfer( diracSE, pfn )
              if res['OK']:
                if res['Value']['Successful'].has_key( pfn ):
                  gLogger.info( "RemovalAgent.execute: Successfully requested retransfer of %s." % pfn )
                  result = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' )
                  if not result['OK']:
                    gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) )
                  modified = True
                else:
                  errStr = "RemovalAgent.execute: Failed to request retransfer."
                  gLogger.error( errStr, "%s %s %s" % ( pfn, diracSE, res['Value']['Failed'][pfn] ) )
              else:
                errStr = "RemovalAgent.execute: Completely failed to request retransfer."
                gLogger.error( errStr, res['Message'] )
            else:
              gLogger.info( "RemovalAgent.execute: File already completed." )

        ################################################
        #  If the sub-request is none of the above types
        else:
          gLogger.error( "RemovalAgent.execute: Operation not supported.", operation )

        ################################################
        #  Determine whether there are any active files
        if oRequest.isSubRequestEmpty( ind, 'removal' )['Value']:
          oRequest.setSubRequestStatus( ind, 'removal', 'Done' )
          gMonitor.addMark( "Done", 1 )

      ################################################
      #  If the sub-request is already in terminal state
      else:
        gLogger.info( "RemovalAgent.execute:",
                      "Sub-request %s is status '%s' and not to be executed." %
                      ( ind, subRequestAttributes['Status'] ) )

    ################################################
    #  Generate the new request string after operation
    requestString = oRequest.toXML()['Value']
    res = self.requestDBClient.updateRequest( requestName, requestString, sourceServer )

    if modified and jobID:
      result = self.finalizeRequest( requestName, jobID, sourceServer )

    return S_OK()

  def finalize( self ):
    """
    Called by the Agent framework to cleanly end execution.
    In this case this module will wait until all pending ThreadedJbos in the
    ThreadPool get executed
    """

    self.threadPool.processAllResults()
    return S_OK()
Beispiel #12
0
class RemovalAgent(AgentModule, RequestAgentMixIn):
    """
    This Agent takes care of executing "removal" request from the RequestManagement system
  """
    def __init__(self, *args):
        """
    Initialize the base class and define some extra data members
    """
        AgentModule.__init__(self, *args)
        self.requestDBClient = None
        self.replicaManager = None
        self.maxNumberOfThreads = 4
        self.maxRequestsInQueue = 100
        self.threadPool = None
        self.timeOutCounter = 0
        self.pendingRequests = True

    def initialize(self):
        """
      Called by the framework upon startup, before any cycle (execute method bellow)
    """
        self.requestDBClient = RequestClient()
        # the RequestAgentMixIn needs the capitalized version, until is is fixed keep this.
        self.RequestDBClient = self.requestDBClient
        self.replicaManager = ReplicaManager()

        gMonitor.registerActivity("Iteration", "Agent Loops", "RemovalAgent",
                                  "Loops/min", gMonitor.OP_SUM)
        gMonitor.registerActivity("Execute", "Request Processed",
                                  "RemovalAgent", "Requests/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("Done", "Request Completed", "RemovalAgent",
                                  "Requests/min", gMonitor.OP_SUM)

        gMonitor.registerActivity("PhysicalRemovalAtt",
                                  "Physical removals attempted",
                                  "RemovalAgent", "Removal/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("PhysicalRemovalDone",
                                  "Successful physical removals",
                                  "RemovalAgent", "Removal/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("PhysicalRemovalFail",
                                  "Failed physical removals", "RemovalAgent",
                                  "Removal/min", gMonitor.OP_SUM)
        gMonitor.registerActivity("PhysicalRemovalSize",
                                  "Physically removed size", "RemovalAgent",
                                  "Bytes", gMonitor.OP_ACUM)

        gMonitor.registerActivity("ReplicaRemovalAtt",
                                  "Replica removal attempted", "RemovalAgent",
                                  "Removal/min", gMonitor.OP_SUM)
        gMonitor.registerActivity("ReplicaRemovalDone",
                                  "Successful replica removals",
                                  "RemovalAgent", "Removal/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("ReplicaRemovalFail",
                                  "Failed replica removals", "RemovalAgent",
                                  "Removal/min", gMonitor.OP_SUM)

        gMonitor.registerActivity("RemoveFileAtt", "File removal attempted",
                                  "RemovalAgent", "Removal/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("RemoveFileDone", "File removal done",
                                  "RemovalAgent", "Removal/min",
                                  gMonitor.OP_SUM)
        gMonitor.registerActivity("RemoveFileFail", "File removal failed",
                                  "RemovalAgent", "Removal/min",
                                  gMonitor.OP_SUM)

        self.maxNumberOfThreads = self.am_getOption('NumberOfThreads',
                                                    self.maxNumberOfThreads)
        self.maxRequestsInQueue = self.am_getOption('RequestsInQueue',
                                                    self.maxRequestsInQueue)
        self.threadPool = ThreadPool(1, self.maxNumberOfThreads,
                                     self.maxRequestsInQueue)

        # Set the ThreadPool in daemon mode to process new ThreadedJobs as they are inserted
        self.threadPool.daemonize()

        self.maxRequests = self.am_getOption('MaxRequestsPerCycle', 1200.)

        # This sets the Default Proxy to used as that defined under
        # /Operations/Shifter/DataManager
        # the shifterProxy option in the Configuration can be used to change this default.
        self.am_setOption('shifterProxy', 'DataManager')

        return S_OK()

    def execute(self):
        """
    Fill the TreadPool with ThreadJobs
    """
        self.pendingRequests = True
        self.maxRequests = min(
            10000., self.am_getOption('MaxRequestsPerCycle', self.maxRequests))
        requestCounter = 0
        while self.pendingRequests:
            if requestCounter > self.maxRequests:
                break
            requestCounter += 1
            requestExecutor = ThreadedJob(self.executeRequest)
            ret = self.threadPool.queueJob(requestExecutor)
            if not ret['OK']:
                break
            time.sleep(0.1)

        if self.timeOutCounter:
            gLogger.error('Timeouts during removal execution:',
                          self.timeOutCounter)

        return S_OK()

    def executeRequest(self):
        """
    Do the actual work in the Thread
    """
        ################################################
        # Get a request from request DB
        gMonitor.addMark("Iteration", 1)
        res = self.requestDBClient.getRequest('removal')
        if not res['OK']:
            gLogger.info(
                "RemovalAgent.execute: Failed to get request from database.")
            return S_OK()
        elif not res['Value']:
            gLogger.info(
                "RemovalAgent.execute: No requests to be executed found.")
            self.pendingRequests = False
            return S_OK()
        requestString = res['Value']['RequestString']
        requestName = res['Value']['RequestName']
        sourceServer = res['Value']['Server']

        jobID = 0
        try:
            jobID = int(res['Value']['JobID'])
        except:
            gLogger.warn(
                "RemovalAgent.execute: JobID not present or malformed in request '%s', will use 0 instead."
                % requestName)

        gLogger.info("RemovalAgent.execute: Obtained request %s" % requestName)

        try:

            result = self.requestDBClient.getCurrentExecutionOrder(
                requestName, sourceServer)
            if result['OK']:
                currentOrder = result['Value']
            else:
                gLogger.error('Can not get the request execution order')
                self.requestDBClient.updateRequest(requestName, requestString,
                                                   sourceServer)
                return S_OK('Can not get the request execution order')

            oRequest = RequestContainer(request=requestString)

            ################################################
            # Find the number of sub-requests from the request
            res = oRequest.getNumSubRequests('removal')
            if not res['OK']:
                errStr = "RemovalAgent.execute: Failed to obtain number of removal subrequests."
                gLogger.error(errStr, res['Message'])
                return S_OK()
            gLogger.info("RemovalAgent.execute: Found %s sub requests." %
                         res['Value'])

            ################################################
            # For all the sub-requests in the request
            modified = False
            for ind in range(res['Value']):
                gMonitor.addMark("Execute", 1)
                gLogger.info(
                    "RemovalAgent.execute: Processing sub-request %s." % ind)
                subRequestAttributes = oRequest.getSubRequestAttributes(
                    ind, 'removal')['Value']
                subExecutionOrder = int(subRequestAttributes['ExecutionOrder'])
                subStatus = subRequestAttributes['Status']
                if subStatus == 'Waiting' and subExecutionOrder <= currentOrder:
                    subRequestFiles = oRequest.getSubRequestFiles(
                        ind, 'removal')['Value']
                    operation = subRequestAttributes['Operation']

                    ################################################
                    #  If the sub-request is a physical removal operation
                    if operation == 'physicalRemoval':
                        gLogger.info(
                            "RemovalAgent.execute: Attempting to execute %s sub-request."
                            % operation)
                        diracSEs = subRequestAttributes['TargetSE'].split(',')
                        physicalFiles = []
                        pfnToLfn = {}
                        for subRequestFile in subRequestFiles:
                            if subRequestFile['Status'] == 'Waiting':
                                pfn = str(subRequestFile['PFN'])
                                lfn = str(subRequestFile['LFN'])
                                pfnToLfn[pfn] = lfn
                                physicalFiles.append(pfn)
                        gMonitor.addMark('PhysicalRemovalAtt',
                                         len(physicalFiles))
                        failed = {}
                        errMsg = {}
                        for diracSE in diracSEs:
                            res = self.replicaManager.removeStorageFile(
                                physicalFiles, diracSE)
                            if res['OK']:
                                for pfn in res['Value']['Failed'].keys():
                                    if not failed.has_key(pfn):
                                        failed[pfn] = {}
                                    failed[pfn][diracSE] = res['Value'][
                                        'Failed'][pfn]
                            else:
                                errMsg[diracSE] = res['Message']
                                for pfn in physicalFiles:
                                    if not failed.has_key(pfn):
                                        failed[pfn] = {}
                                    failed[pfn][diracSE] = 'Completely'
                        # Now analyse the results
                        failedPFNs = failed.keys()
                        pfnsOK = [
                            pfn for pfn in physicalFiles
                            if not pfn in failedPFNs
                        ]
                        gMonitor.addMark('PhysicalRemovalDone', len(pfnsOK))
                        for pfn in pfnsOK:
                            gLogger.info(
                                "RemovalAgent.execute: Successfully removed %s at %s"
                                % (pfn, str(diracSEs)))
                            res = oRequest.setSubRequestFileAttributeValue(
                                ind, 'removal', pfnToLfn[pfn], 'Status',
                                'Done')
                            if not res['OK']:
                                gLogger.error(
                                    "RemovalAgent.execute: Error setting status to %s for %s"
                                    % ('Done', pfnToLfn[pfn]))
                            modified = True
                        if failed:
                            gMonitor.addMark('PhysicalRemovalFail',
                                             len(failedPFNs))
                            for pfn in failedPFNs:
                                for diracSE in failed[pfn].keys():
                                    if type(failed[pfn]
                                            [diracSE]) in StringTypes:
                                        if re.search(
                                                'no such file or directory',
                                                failed[pfn][diracSE].lower()):
                                            gLogger.info(
                                                "RemovalAgent.execute: File did not exist.",
                                                pfn)
                                            res = oRequest.setSubRequestFileAttributeValue(
                                                ind, 'removal', pfnToLfn[pfn],
                                                'Status', 'Done')
                                            if not res['OK']:
                                                gLogger.error(
                                                    "RemovalAgent.execute: Error setting status to %s for %s"
                                                    % ('Done', pfnToLfn[pfn]))
                                            modified = True
                                        else:
                                            gLogger.info(
                                                "RemovalAgent.execute: Failed to remove file.",
                                                "%s at %s - %s" %
                                                (pfn, diracSE,
                                                 failed[pfn][diracSE]))
                        if errMsg:
                            for diracSE in errMsg.keys():
                                errStr = "RemovalAgent.execute: Completely failed to remove replicas. At %s", diracSE
                                gLogger.error(errStr, errMsg[diracSE])

                    ################################################
                    #  If the sub-request is a physical removal operation
                    elif operation == 'removeFile':
                        gLogger.info(
                            "RemovalAgent.execute: Attempting to execute %s sub-request."
                            % operation)
                        lfns = []
                        for subRequestFile in subRequestFiles:
                            if subRequestFile['Status'] == 'Waiting':
                                lfn = str(subRequestFile['LFN'])
                                lfns.append(lfn)
                        gMonitor.addMark('RemoveFileAtt', len(lfns))
                        res = self.replicaManager.removeFile(lfns)
                        if res['OK']:
                            gMonitor.addMark(
                                'RemoveFileDone',
                                len(res['Value']['Successful'].keys()))
                            for lfn in res['Value']['Successful'].keys():
                                gLogger.info(
                                    "RemovalAgent.execute: Successfully removed %s."
                                    % lfn)
                                result = oRequest.setSubRequestFileAttributeValue(
                                    ind, 'removal', lfn, 'Status', 'Done')
                                if not result['OK']:
                                    gLogger.error(
                                        "RemovalAgent.execute: Error setting status to %s for %s"
                                        % ('Done', lfn))
                                modified = True
                            gMonitor.addMark(
                                'RemoveFileFail',
                                len(res['Value']['Failed'].keys()))
                            for lfn in res['Value']['Failed'].keys():
                                if type(res['Value']['Failed']
                                        [lfn]) in StringTypes:
                                    if re.search(
                                            'no such file or directory',
                                            res['Value']['Failed']
                                        [lfn].lower()):
                                        gLogger.info(
                                            "RemovalAgent.execute: File did not exist.",
                                            lfn)
                                        result = oRequest.setSubRequestFileAttributeValue(
                                            ind, 'removal', lfn, 'Status',
                                            'Done')
                                        if not result['OK']:
                                            gLogger.error(
                                                "RemovalAgent.execute: Error setting status to %s for %s"
                                                % ('Done', lfn))
                                        modified = True
                                    else:
                                        gLogger.info(
                                            "RemovalAgent.execute: Failed to remove file:",
                                            "%s %s" %
                                            (lfn, res['Value']['Failed'][lfn]))
                        else:
                            gMonitor.addMark('RemoveFileFail', len(lfns))
                            errStr = "RemovalAgent.execute: Completely failed to remove files files."
                            gLogger.error(errStr, res['Message'])

                    ################################################
                    #  If the sub-request is a physical removal operation
                    elif operation == 'replicaRemoval':
                        gLogger.info(
                            "RemovalAgent.execute: Attempting to execute %s sub-request."
                            % operation)
                        diracSEs = subRequestAttributes['TargetSE'].split(',')
                        lfns = []
                        for subRequestFile in subRequestFiles:
                            if subRequestFile['Status'] == 'Waiting':
                                lfn = str(subRequestFile['LFN'])
                                lfns.append(lfn)
                        gMonitor.addMark('ReplicaRemovalAtt', len(lfns))

                        failed = {}
                        errMsg = {}
                        for diracSE in diracSEs:
                            res = self.replicaManager.removeReplica(
                                diracSE, lfns)
                            if res['OK']:
                                for lfn in res['Value']['Failed'].keys():
                                    errorMessage = str(
                                        res['Value']['Failed'][lfn])
                                    if errorMessage.find(
                                            'Write access not permitted for this credential.'
                                    ) != -1:
                                        if self.__getProxyAndRemoveReplica(
                                                diracSE, lfn):
                                            continue
                                    if errorMessage.find(
                                            'seconds timeout for "__gfal_wrapper" call'
                                    ) != -1:
                                        self.timeOutCounter += 1
                                    if not failed.has_key(lfn):
                                        failed[lfn] = {}
                                    failed[lfn][diracSE] = res['Value'][
                                        'Failed'][lfn]
                            else:
                                errMsg[diracSE] = res['Message']
                                for lfn in lfns:
                                    if not failed.has_key(lfn):
                                        failed[lfn] = {}
                                    failed[lfn][diracSE] = 'Completely'
                        # Now analyse the results
                        failedLFNs = failed.keys()
                        lfnsOK = [lfn for lfn in lfns if not lfn in failedLFNs]
                        gMonitor.addMark('ReplicaRemovalDone', len(lfnsOK))
                        for lfn in lfnsOK:
                            gLogger.info(
                                "RemovalAgent.execute: Successfully removed %s at %s"
                                % (lfn, str(diracSEs)))
                            res = oRequest.setSubRequestFileAttributeValue(
                                ind, 'removal', lfn, 'Status', 'Done')
                            if not res['OK']:
                                gLogger.error(
                                    "RemovalAgent.execute: Error setting status to %s for %s"
                                    % ('Done', lfn))
                            modified = True
                        if failed:
                            gMonitor.addMark('PhysicalRemovalFail',
                                             len(failedLFNs))
                            for lfn in failedLFNs:
                                for diracSE in failed[lfn].keys():
                                    if type(failed[lfn]
                                            [diracSE]) in StringTypes:
                                        if re.search(
                                                'no such file or directory',
                                                failed[lfn][diracSE].lower()):
                                            gLogger.info(
                                                "RemovalAgent.execute: File did not exist.",
                                                lfn)
                                            res = oRequest.setSubRequestFileAttributeValue(
                                                ind, 'removal', lfn, 'Status',
                                                'Done')
                                            if not res['OK']:
                                                gLogger.error(
                                                    "RemovalAgent.execute: Error setting status to %s for %s"
                                                    % ('Done', lfn))
                                            modified = True
                                        else:
                                            gLogger.info(
                                                "RemovalAgent.execute: Failed to remove file.",
                                                "%s at %s - %s" %
                                                (lfn, diracSE,
                                                 failed[lfn][diracSE]))
                        if errMsg:
                            for diracSE in errMsg.keys():
                                errStr = "RemovalAgent.execute: Completely failed to remove replicas. At %s", diracSE
                                gLogger.error(errStr, errMsg[diracSE])

                    ################################################
                    #  If the sub-request is a request to the online system to retransfer
                    elif operation == 'reTransfer':
                        gLogger.info(
                            "RemovalAgent.execute: Attempting to execute %s sub-request."
                            % operation)
                        diracSE = subRequestAttributes['TargetSE']
                        for subRequestFile in subRequestFiles:
                            if subRequestFile['Status'] == 'Waiting':
                                pfn = str(subRequestFile['PFN'])
                                lfn = str(subRequestFile['LFN'])
                                res = self.replicaManager.onlineRetransfer(
                                    diracSE, pfn)
                                if res['OK']:
                                    if res['Value']['Successful'].has_key(pfn):
                                        gLogger.info(
                                            "RemovalAgent.execute: Successfully requested retransfer of %s."
                                            % pfn)
                                        result = oRequest.setSubRequestFileAttributeValue(
                                            ind, 'removal', lfn, 'Status',
                                            'Done')
                                        if not result['OK']:
                                            gLogger.error(
                                                "RemovalAgent.execute: Error setting status to %s for %s"
                                                % ('Done', lfn))
                                        modified = True
                                    else:
                                        errStr = "RemovalAgent.execute: Failed to request retransfer."
                                        gLogger.error(
                                            errStr, "%s %s %s" %
                                            (pfn, diracSE,
                                             res['Value']['Failed'][pfn]))
                                else:
                                    errStr = "RemovalAgent.execute: Completely failed to request retransfer."
                                    gLogger.error(errStr, res['Message'])
                            else:
                                gLogger.info(
                                    "RemovalAgent.execute: File already completed."
                                )

                    ################################################
                    #  If the sub-request is none of the above types
                    else:
                        gLogger.error(
                            "RemovalAgent.execute: Operation not supported.",
                            operation)

                    ################################################
                    #  Determine whether there are any active files
                    if oRequest.isSubRequestEmpty(ind, 'removal')['Value']:
                        oRequest.setSubRequestStatus(ind, 'removal', 'Done')
                        gMonitor.addMark("Done", 1)

                ################################################
                #  If the sub-request is already in terminal state
                else:
                    gLogger.info(
                        "RemovalAgent.execute:",
                        "Sub-request %s is status '%s' and not to be executed."
                        % (ind, subRequestAttributes['Status']))

            ################################################
            #  Generate the new request string after operation
            newrequestString = oRequest.toXML()['Value']
        except:
            # if something fails return the original request back to the server
            res = self.requestDBClient.updateRequest(requestName,
                                                     requestString,
                                                     sourceServer)
            return S_OK()

        res = self.requestDBClient.updateRequest(requestName, newrequestString,
                                                 sourceServer)

        if modified and jobID:
            result = self.finalizeRequest(requestName, jobID, sourceServer)

        return S_OK()

    def __getProxyAndRemoveReplica(self, diracSE, lfn):
        """
    get a proxy from the owner of the file and try to remove it
    returns True if it succeeds, False otherwise
    """

        result = self.replicaManager.getCatalogDirectoryMetadata(
            lfn, singleFile=True)
        if not result['OK']:
            gLogger.error("Could not get metadata info", result['Message'])
            return False
        ownerRole = result['Value']['OwnerRole']
        ownerDN = result['Value']['OwnerDN']
        if ownerRole[0] != "/":
            ownerRole = "/%s" % ownerRole

        userProxy = ''
        for ownerGroup in Registry.getGroupsWithVOMSAttribute(ownerRole):
            result = gProxyManager.downloadVOMSProxy(
                ownerDN,
                ownerGroup,
                limited=True,
                requiredVOMSAttribute=ownerRole)
            if not result['OK']:
                gLogger.verbose(
                    'Failed to retrieve voms proxy for %s : %s:' %
                    (ownerDN, ownerRole), result['Message'])
                continue
            userProxy = result['Value']
            gLogger.verbose("Got proxy for %s@%s [%s]" %
                            (ownerDN, ownerGroup, ownerRole))
            break
        if not userProxy:
            return False

        result = userProxy.dumpAllToFile()
        if not result['OK']:
            gLogger.verbose(result['Message'])
            return False

        upFile = result['Value']
        prevProxyEnv = os.environ['X509_USER_PROXY']
        os.environ['X509_USER_PROXY'] = upFile

        try:
            res = self.replicaManager.removeReplica(diracSE, lfn)
            if res['OK'] and lfn in res['Value']['Successful']:
                gLogger.verbose('Removed %s from %s' % (lfn, diracSE))
                return True
        finally:
            os.environ['X509_USER_PROXY'] = prevProxyEnv
            os.unlink(upFile)

        return False

    def finalize(self):
        """
    Called by the Agent framework to cleanly end execution.
    In this case this module will wait until all pending ThreadedJbos in the
    ThreadPool get executed
    """

        self.threadPool.processAllResults()
        return S_OK()
Beispiel #13
0
class FTSMonitorAgent( AgentModule ):
  """
  .. class:: FTSMonitorAgent

  Monitor submitted FTS jobs.
  """
  # # transfer DB handle
  transferDB = None
  # # thread pool
  threadPool = None
  # # min threads
  minThreads = 1
  # # max threads
  maxThreads = 10

  # # missing source regexp patterns
  missingSourceErrors = [
    re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] Failed" ),
    re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] No such file or directory" ),
    re.compile( r"SOURCE error during PREPARATION phase: \[INVALID_PATH\] Failed" ),
    re.compile( r"SOURCE error during PREPARATION phase: \[INVALID_PATH\] The requested file either does not exist" ),
    re.compile( r"TRANSFER error during TRANSFER phase: \[INVALID_PATH\] the server sent an error response: 500 500"\
               " Command failed. : open error: No such file or directory" ),
    re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[USER_ERROR\] source file doesnt exist" ) ]

  def initialize( self ):
    """ agent's initialisation """
    self.transferDB = TransferDB()
    self.am_setOption( "shifterProxy", "DataManager" )
    self.minThreads = self.am_getOption( "MinThreads", self.minThreads )
    self.maxThreads = self.am_getOption( "MaxThreads", self.maxThreads )
    minmax = ( abs( self.minThreads ), abs( self.maxThreads ) )
    self.minThreads, self.maxThreads = min( minmax ), max( minmax )
    self.log.info( "ThreadPool min threads = %s" % self.minThreads )
    self.log.info( "ThreadPool max threads = %s" % self.maxThreads )
    self.threadPool = ThreadPool( self.minThreads, self.maxThreads )
    self.threadPool.daemonize()
    return S_OK()

  def execute( self ):
    """ push jobs to the thread pool """
    self.log.info( "Obtaining requests to monitor" )
    res = self.transferDB.getFTSReq()
    if not res["OK"]:
      self.log.error( "Failed to get FTS requests", res['Message'] )
      return res
    if not res["Value"]:
      self.log.info( "No FTS requests found to monitor." )
      return S_OK()
    ftsReqs = res["Value"]
    self.log.info( "Found %s FTS jobs" % len( ftsReqs ) )
    i = 1
    for ftsJob in ftsReqs:
      while True:
        self.log.debug( "submitting FTS Job %s FTSReqID=%s to monitor" % ( i, ftsJob["FTSReqID"] ) )
        ret = self.threadPool.generateJobAndQueueIt( self.monitorTransfer, args = ( ftsJob, ), )
        if ret["OK"]:
          i += 1
          break
        # # sleep 1 second to proceed
        time.sleep( 1 )

    self.threadPool.processAllResults()
    return S_OK()

  def ftsJobExpired( self, ftsReqID, channelID ):
    """ clean up when FTS job had expired on the server side

    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    """
    log = gLogger.getSubLogger( "@%s" % str( ftsReqID ) )
    fileIDs = self.transferDB.getFTSReqFileIDs( ftsReqID )
    if not fileIDs["OK"]:
      log.error( "Unable to retrieve FileIDs associated to %s request" % ftsReqID )
      return fileIDs
    fileIDs = fileIDs["Value"]

    # # update FileToFTS table, this is just a clean up, no worry if somethings goes wrong
    for fileID in fileIDs:
      fileStatus = self.transferDB.setFileToFTSFileAttribute( ftsReqID, fileID,
                                                              "Status", "Failed" )
      if not fileStatus["OK"]:
        log.error( "Unable to set FileToFTS status to 'Failed' for FileID %s: %s" % ( fileID,
                                                                                     fileStatus["Message"] ) )

      failReason = self.transferDB.setFileToFTSFileAttribute( ftsReqID, fileID,
                                                              "Reason", "FTS job expired on server" )
      if not failReason["OK"]:
        log.error( "Unable to set FileToFTS reason for FileID %s: %s" % ( fileID,
                                                                         failReason["Message"] ) )
    # # update Channel table
    resetChannels = self.transferDB.resetFileChannelStatus( channelID, fileIDs )
    if not resetChannels["OK"]:
      log.error( "Failed to reset Channel table for files to retry" )
      return resetChannels

    # # update FTSReq table
    log.info( "Setting FTS request status to 'Finished'" )
    ftsReqStatus = self.transferDB.setFTSReqStatus( ftsReqID, "Finished" )
    if not ftsReqStatus["OK"]:
      log.error( "Failed update FTS Request status", ftsReqStatus["Message"] )
      return ftsReqStatus

    # # if we land here, everything should be OK
    return S_OK()

  def monitorTransfer( self, ftsReqDict ):
    """ monitors transfer obtained from TransferDB

    :param dict ftsReqDict: FTS job dictionary
    """
    ftsReqID = ftsReqDict.get( "FTSReqID" )
    ftsGUID = ftsReqDict.get( "FTSGuid" )
    ftsServer = ftsReqDict.get( "FTSServer" )
    channelID = ftsReqDict.get( "ChannelID" )
    sourceSE = ftsReqDict.get( "SourceSE" )
    targetSE = ftsReqDict.get( "TargetSE" )

    oFTSRequest = FTSRequest()
    oFTSRequest.setFTSServer( ftsServer )
    oFTSRequest.setFTSGUID( ftsGUID )
    oFTSRequest.setSourceSE( sourceSE )
    oFTSRequest.setTargetSE( targetSE )

    log = gLogger.getSubLogger( "@%s" % str( ftsReqID ) )

    #########################################################################
    # Perform summary update of the FTS Request and update FTSReq entries.
    log.info( "Perform summary update of the FTS Request" )
    infoStr = [ "glite-transfer-status -s %s -l %s" % ( ftsServer, ftsGUID ) ]
    infoStr.append( "FTS GUID:   %s" % ftsGUID )
    infoStr.append( "FTS Server: %s" % ftsServer )
    log.info( "\n".join( infoStr ) )
    res = oFTSRequest.summary()
    self.transferDB.setFTSReqLastMonitor( ftsReqID )
    if not res["OK"]:
      log.error( "Failed to update the FTS request summary", res["Message"] )
      if "getTransferJobSummary2: Not authorised to query request" in res["Message"]:
        log.error( "FTS job is not existing at the FTS server anymore, will clean it up on TransferDB side" )
        cleanUp = self.ftsJobExpired( ftsReqID, channelID )
        if not cleanUp["OK"]:
          log.error( cleanUp["Message"] )
        return cleanUp
      return res

    res = oFTSRequest.dumpSummary()
    if not res['OK']:
      log.error( "Failed to get FTS request summary", res["Message"] )
      return res
    log.info( res['Value'] )
    res = oFTSRequest.getPercentageComplete()
    if not res['OK']:
      log.error( "Failed to get FTS percentage complete", res["Message"] )
      return res
    log.info( 'FTS Request found to be %.1f percent complete' % res["Value"] )
    self.transferDB.setFTSReqAttribute( ftsReqID, "PercentageComplete", res["Value"] )
    self.transferDB.addLoggingEvent( ftsReqID, res["Value"] )

    #########################################################################
    # Update the information in the TransferDB if the transfer is terminal.
    res = oFTSRequest.isRequestTerminal()
    if not res["OK"]:
      log.error( "Failed to determine whether FTS request terminal", res["Message"] )
      return res
    if not res["Value"]:
      return S_OK()
    # # request is terminal
    return self.terminalRequest( oFTSRequest, ftsReqID, channelID, sourceSE )

  def terminalRequest( self, oFTSRequest, ftsReqID, channelID, sourceSE ):
    """ process terminal FTS job

    :param FTSRequest oFTSRequest: FTSRequest instance
    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    :param str sourceSE: FTSReq.SourceSE
    """
    log = gLogger.getSubLogger( "@%s" % ftsReqID )

    log.info( "FTS Request found to be terminal, updating file states" )
    #########################################################################
    # Get the LFNS associated to the FTS request
    log.info( "Obtaining the LFNs associated to this request" )
    res = self.transferDB.getFTSReqLFNs( ftsReqID, channelID, sourceSE )
    if not res["OK"]:
      log.error( "Failed to obtain FTS request LFNs", res['Message'] )
      return res
    files = res["Value"]
    if not files:
      log.error( "No files present for transfer" )
      return S_ERROR( "No files were found in the DB" )

    lfns = files.keys()
    log.debug( "Obtained %s files" % len( lfns ) )
    for lfn in lfns:
      oFTSRequest.setLFN( lfn )

    res = oFTSRequest.monitor()
    if not res["OK"]:
      log.error( "Failed to perform detailed monitoring of FTS request", res["Message"] )
      return res
    res = oFTSRequest.getFailed()
    if not res["OK"]:
      log.error( "Failed to obtained failed files for FTS request", res["Message"] )
      return res
    failedFiles = res["Value"]
    res = oFTSRequest.getDone()
    if not res["OK"]:
      log.error( "Failed to obtained successful files for FTS request", res["Message"] )
      return res
    completedFiles = res["Value"]

    # An LFN can be included more than once if it was entered into more than one Request.
    # FTS will only do the transfer once. We need to identify all FileIDs
    res = self.transferDB.getFTSReqFileIDs( ftsReqID )
    if not res["OK"]:
      log.error( "Failed to get FileIDs associated to FTS Request", res["Message"] )
      return res
    fileIDs = res["Value"]
    res = self.transferDB.getAttributesForFilesList( fileIDs, ["LFN"] )
    if not res["OK"]:
      log.error( "Failed to get LFNs associated to FTS Request", res["Message"] )
      return res
    fileIDDict = res["Value"]

    fileToFTSUpdates = []
    completedFileIDs = []
    filesToRetry = []
    filesToFail = []

    for fileID, fileDict in fileIDDict.items():
      lfn = fileDict['LFN']
      if lfn in completedFiles:
        completedFileIDs.append( fileID )
        transferTime = 0
        res = oFTSRequest.getTransferTime( lfn )
        if res["OK"]:
          transferTime = res["Value"]
        fileToFTSUpdates.append( ( fileID, "Completed", "", 0, transferTime ) )

      if lfn in failedFiles:
        failReason = ""
        res = oFTSRequest.getFailReason( lfn )
        if res["OK"]:
          failReason = res["Value"]
        if "Source file/user checksum mismatch" in failReason:
          filesToFail.append( fileID )
          continue
        if self.missingSource( failReason ):
          log.error( "The source SURL does not exist.", "%s %s" % ( lfn, oFTSRequest.getSourceSURL( lfn ) ) )
          filesToFail.append( fileID )
        else:
          filesToRetry.append( fileID )
        log.error( "Failed to replicate file on channel.", "%s %s" % ( channelID, failReason ) )
        fileToFTSUpdates.append( ( fileID, "Failed", failReason, 0, 0 ) )

    # # update TransferDB.FileToFTS table
    updateFileToFTS = self.updateFileToFTS( ftsReqID, channelID,
                                            filesToRetry, filesToFail,
                                            completedFileIDs, fileToFTSUpdates )

    if updateFileToFTS["OK"] and updateFileToFTS["Value"]:
      res = oFTSRequest.finalize()
      if not res["OK"]:
        log.error( "Failed to perform the finalization for the FTS request", res["Message"] )
        return res

      log.info( 'Adding logging event for FTS request' )
      # Now set the FTSReq status to terminal so that it is not monitored again
      res = self.transferDB.addLoggingEvent( ftsReqID, 'Finished' )
      if not res['OK']:
        log.error( 'Failed to add logging event for FTS Request', res['Message'] )

      # update TransferDB.FileToCat table
      updateFileToCat = self.updateFileToCat( oFTSRequest, channelID, fileIDDict, completedFiles, filesToFail )
      if not updateFileToCat["OK"]:
        log.error( updateFileToCat["Message"] )

      log.debug( "Updating FTS request status" )
      res = self.transferDB.setFTSReqStatus( ftsReqID, 'Finished' )
      if not res['OK']:
        log.error( 'Failed update FTS Request status', res['Message'] )
    return S_OK()


  def updateFileToFTS( self, ftsReqID, channelID, filesToRetry, filesToFail, completedFileIDs, fileToFTSUpdates ):
    """ update TransferDB.FileToFTS table for finished request

    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    :param list filesToRetry: FileIDs to retry
    :param list filesToFail: FileIDs for failed files
    :param list completedFileIDs: files completed
    :param list fileToFTSUpdates: ???
    """
    log = gLogger.getSubLogger( "@%s" % ftsReqID )

    allUpdated = True

    res = self.transferDB.resetFileChannelStatus( channelID, filesToRetry ) if filesToRetry else S_OK()
    if not res["OK"]:
      log.error( "Failed to update the Channel table for file to retry.", res["Message"] )
      allUpdated = False

    for fileID in filesToFail:
      log.info( "Updating the Channel table for files to reschedule" )
      res = self.transferDB.setFileToReschedule( fileID )
      if not res["OK"]:
        log.error( "Failed to update Channel table for failed files.", res["Message"] )
        allUpdated = False
      elif res["Value"] == "max reschedule attempt reached":
        log.error( "setting Channel status to 'Failed' : " % res["Value"] )
        res = self.transferDB.setFileChannelStatus( channelID, fileID, 'Failed' )
        if not res["OK"]:
          log.error( "Failed to update Channel table for failed files.", res["Message"] )
          allUpdated = False

    if completedFileIDs:
      res = self.transferDB.updateCompletedChannelStatus( channelID, completedFileIDs )
      if not res["OK"]:
        log.error( "Failed to update the Channel table for successful files.", res["Message"] )
        allUpdated = False
      res = self.transferDB.updateAncestorChannelStatus( channelID, completedFileIDs )
      if not res["OK"]:
        log.error( 'Failed to update the Channel table for ancestors of successful files.', res['Message'] )
        allUpdated = False

    if fileToFTSUpdates:
      res = self.transferDB.setFileToFTSFileAttributes( ftsReqID, channelID, fileToFTSUpdates )
      if not res["OK"]:
        log.error( "Failed to update the FileToFTS table for files.", res["Message"] )
        allUpdated = False

    return S_OK( allUpdated )

  def updateFileToCat( self, oFTSRequest, channelID, fileIDDict, completedFiles, filesToFail ):
    """ update TransferDB.FileToCat table for finished request

    :param FTSRequest oFTSRequest: FTSRequest instance
    :param int ftsReqID: FTSReq.FTSReqID
    :param dict fileIDDict: fileIDs dictionary
    :param int channelID: FTSReq.ChannelID
    """
    res = oFTSRequest.getFailedRegistrations()
    failedRegistrations = res["Value"]
    regFailedFileIDs = []
    regDoneFileIDs = []
    regForgetFileIDs = []
    for fileID, fileDict in fileIDDict.items():
      lfn = fileDict['LFN']

      if lfn in failedRegistrations:
        regFailedFileIDs.append( fileID )
        # if the LFN appears more than once, FileToCat needs to be reset only once
        del failedRegistrations[lfn]
      elif lfn in completedFiles:
        regDoneFileIDs.append( fileID )
      elif fileID in filesToFail:
        regForgetFileIDs.append( fileID )

    res = self.transferDB.setRegistrationWaiting( channelID, regFailedFileIDs ) if regFailedFileIDs else S_OK()
    if not res["OK"]:
      res["Message"] = "Failed to reset entries in FileToCat: %s" % res["Message"]
      return res

    res = self.transferDB.setRegistrationDone( channelID, regDoneFileIDs ) if regDoneFileIDs else S_OK()
    if not res["OK"]:
      res["Message"] = "Failed to set entries Done in FileToCat: %s" % res["Message"]
      return res

    # This entries could also be set to Failed, but currently there is no method to do so.
    res = self.transferDB.setRegistrationDone( channelID, regForgetFileIDs ) if regForgetFileIDs else S_OK()
    if not res["OK"]:
      res["Message"] = "Failed to set entries Done in FileToCat: %s" % res["Message"]
      return res

    return S_OK()

  @classmethod
  def missingSource( cls, failReason ):
    """ check if message sent by FTS server is concering missing source file

    :param str failReason: message sent by FTS server
    """
    for error in cls.missingSourceErrors:
      if error.search( failReason ):
        return 1
    return 0
Beispiel #14
0
class RemovalAgent( AgentModule, RequestAgentMixIn ):
  """
    This Agent takes care of executing "removal" request from the RequestManagement system
  """

  def __init__( self, *args ):
    """
    Initialize the base class and define some extra data members
    """
    AgentModule.__init__( self, *args )
    self.requestDBClient = None
    self.replicaManager = None
    self.maxNumberOfThreads = 4
    self.maxRequestsInQueue = 100
    self.threadPool = None
    self.timeOutCounter = 0
    self.pendingRequests = True

  def initialize( self ):
    """
      Called by the framework upon startup, before any cycle (execute method bellow)
    """
    self.requestDBClient = RequestClient()
    # the RequestAgentMixIn needs the capitalized version, until is is fixed keep this.
    self.RequestDBClient = self.requestDBClient
    self.replicaManager = ReplicaManager()

    gMonitor.registerActivity( "Iteration", "Agent Loops", "RemovalAgent", "Loops/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "Execute", "Request Processed", "RemovalAgent", "Requests/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "Done", "Request Completed", "RemovalAgent", "Requests/min", gMonitor.OP_SUM )

    gMonitor.registerActivity( "PhysicalRemovalAtt", "Physical removals attempted",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "PhysicalRemovalDone", "Successful physical removals",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "PhysicalRemovalFail", "Failed physical removals",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "PhysicalRemovalSize", "Physically removed size",
                               "RemovalAgent", "Bytes", gMonitor.OP_ACUM )

    gMonitor.registerActivity( "ReplicaRemovalAtt", "Replica removal attempted",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicaRemovalDone", "Successful replica removals",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "ReplicaRemovalFail", "Failed replica removals",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )

    gMonitor.registerActivity( "RemoveFileAtt", "File removal attempted",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RemoveFileDone", "File removal done",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )
    gMonitor.registerActivity( "RemoveFileFail", "File removal failed",
                               "RemovalAgent", "Removal/min", gMonitor.OP_SUM )

    self.maxNumberOfThreads = self.am_getOption( 'NumberOfThreads', self.maxNumberOfThreads )
    self.maxRequestsInQueue = self.am_getOption( 'RequestsInQueue', self.maxRequestsInQueue )
    self.threadPool = ThreadPool( 1, self.maxNumberOfThreads, self.maxRequestsInQueue )

    # Set the ThreadPool in daemon mode to process new ThreadedJobs as they are inserted
    self.threadPool.daemonize()

    # This sets the Default Proxy to used as that defined under
    # /Operations/Shifter/DataManager
    # the shifterProxy option in the Configuration can be used to change this default.
    self.am_setOption( 'shifterProxy', 'DataManager' )

    return S_OK()

  def execute( self ):
    """
    Fill the TreadPool with ThreadJobs
    """
    self.pendingRequests = True
    while self.pendingRequests:
      requestExecutor = ThreadedJob( self.executeRequest )
      ret = self.threadPool.queueJob( requestExecutor )
      if not ret['OK']:
        break
      time.sleep( 0.1 )

    if self.timeOutCounter:
      gLogger.error( 'Timeouts during removal execution:', self.timeOutCounter )

    return S_OK()

  def executeRequest( self ):
    """
    Do the actual work in the Thread
    """
    ################################################
    # Get a request from request DB
    gMonitor.addMark( "Iteration", 1 )
    res = self.requestDBClient.getRequest( 'removal' )
    if not res['OK']:
      gLogger.info( "RemovalAgent.execute: Failed to get request from database." )
      return S_OK()
    elif not res['Value']:
      gLogger.info( "RemovalAgent.execute: No requests to be executed found." )
      self.pendingRequests = False
      return S_OK()
    requestString = res['Value']['RequestString']
    requestName = res['Value']['RequestName']
    sourceServer = res['Value']['Server']
    try:
      jobID = int( res['Value']['JobID'] )
    except ValueError:
      jobID = 0
    gLogger.info( "RemovalAgent.execute: Obtained request %s" % requestName )

    try:

      result = self.requestDBClient.getCurrentExecutionOrder( requestName, sourceServer )
      if result['OK']:
        currentOrder = result['Value']
      else:
        gLogger.error( 'Can not get the request execution order' )
        self.requestDBClient.updateRequest( requestName, requestString, sourceServer )
        return S_OK( 'Can not get the request execution order' )

      oRequest = RequestContainer( request = requestString )

      ################################################
      # Find the number of sub-requests from the request
      res = oRequest.getNumSubRequests( 'removal' )
      if not res['OK']:
        errStr = "RemovalAgent.execute: Failed to obtain number of removal subrequests."
        gLogger.error( errStr, res['Message'] )
        return S_OK()
      gLogger.info( "RemovalAgent.execute: Found %s sub requests." % res['Value'] )

      ################################################
      # For all the sub-requests in the request
      modified = False
      for ind in range( res['Value'] ):
        gMonitor.addMark( "Execute", 1 )
        gLogger.info( "RemovalAgent.execute: Processing sub-request %s." % ind )
        subRequestAttributes = oRequest.getSubRequestAttributes( ind, 'removal' )['Value']
        subExecutionOrder = int( subRequestAttributes['ExecutionOrder'] )
        subStatus = subRequestAttributes['Status']
        if subStatus == 'Waiting' and subExecutionOrder <= currentOrder:
          subRequestFiles = oRequest.getSubRequestFiles( ind, 'removal' )['Value']
          operation = subRequestAttributes['Operation']

          ################################################
          #  If the sub-request is a physical removal operation
          if operation == 'physicalRemoval':
            gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation )
            diracSEs = subRequestAttributes['TargetSE'].split( ',' )
            physicalFiles = []
            pfnToLfn = {}
            for subRequestFile in subRequestFiles:
              if subRequestFile['Status'] == 'Waiting':
                pfn = str( subRequestFile['PFN'] )
                lfn = str( subRequestFile['LFN'] )
                pfnToLfn[pfn] = lfn
                physicalFiles.append( pfn )
            gMonitor.addMark( 'PhysicalRemovalAtt', len( physicalFiles ) )
            failed = {}
            errMsg = {}
            for diracSE in diracSEs:
              res = self.replicaManager.removeStorageFile( physicalFiles, diracSE )
              if res['OK']:
                for pfn in res['Value']['Failed'].keys():
                  if not failed.has_key( pfn ):
                    failed[pfn] = {}
                  failed[pfn][diracSE] = res['Value']['Failed'][pfn]
              else:
                errMsg[diracSE] = res['Message']
                for pfn in physicalFiles:
                  if not failed.has_key( pfn ):
                    failed[pfn] = {}
                  failed[pfn][diracSE] = 'Completely'
            # Now analyse the results
            failedPFNs = failed.keys()
            pfnsOK = [pfn for pfn in physicalFiles if not pfn in failedPFNs]
            gMonitor.addMark( 'PhysicalRemovalDone', len( pfnsOK ) )
            for pfn in pfnsOK:
              gLogger.info( "RemovalAgent.execute: Successfully removed %s at %s" % ( pfn, str( diracSEs ) ) )
              res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', pfnToLfn[pfn], 'Status', 'Done' )
              if not res['OK']:
                gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', pfnToLfn[pfn] ) )
              modified = True
            if failed:
              gMonitor.addMark( 'PhysicalRemovalFail', len( failedPFNs ) )
              for pfn in failedPFNs:
                for diracSE in failed[pfn].keys():
                  if type( failed[pfn][diracSE] ) in StringTypes:
                    if re.search( 'no such file or directory', failed[pfn][diracSE].lower() ):
                      gLogger.info( "RemovalAgent.execute: File did not exist.", pfn )
                      res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', pfnToLfn[pfn], 'Status', 'Done' )
                      if not res['OK']:
                        gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', pfnToLfn[pfn] ) )
                      modified = True
                    else:
                      gLogger.info( "RemovalAgent.execute: Failed to remove file.", "%s at %s - %s" % ( pfn, diracSE, failed[pfn][diracSE] ) )
            if errMsg:
              for diracSE in errMsg.keys():
                errStr = "RemovalAgent.execute: Completely failed to remove replicas. At %s", diracSE
                gLogger.error( errStr, errMsg[diracSE] )


          ################################################
          #  If the sub-request is a physical removal operation
          elif operation == 'removeFile':
            gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation )
            lfns = []
            for subRequestFile in subRequestFiles:
              if subRequestFile['Status'] == 'Waiting':
                lfn = str( subRequestFile['LFN'] )
                lfns.append( lfn )
            gMonitor.addMark( 'RemoveFileAtt', len( lfns ) )
            res = self.replicaManager.removeFile( lfns )
            if res['OK']:
              gMonitor.addMark( 'RemoveFileDone', len( res['Value']['Successful'].keys() ) )
              for lfn in res['Value']['Successful'].keys():
                gLogger.info( "RemovalAgent.execute: Successfully removed %s." % lfn )
                result = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' )
                if not result['OK']:
                  gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) )
                modified = True
              gMonitor.addMark( 'RemoveFileFail', len( res['Value']['Failed'].keys() ) )
              for lfn in res['Value']['Failed'].keys():
                if type( res['Value']['Failed'][lfn] ) in StringTypes:
                  if re.search( 'no such file or directory', res['Value']['Failed'][lfn].lower() ):
                    gLogger.info( "RemovalAgent.execute: File did not exist.", lfn )
                    result = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' )
                    if not result['OK']:
                      gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) )
                    modified = True
                  else:
                    gLogger.info( "RemovalAgent.execute: Failed to remove file:",
                                  "%s %s" % ( lfn, res['Value']['Failed'][lfn] ) )
            else:
              gMonitor.addMark( 'RemoveFileFail', len( lfns ) )
              errStr = "RemovalAgent.execute: Completely failed to remove files files."
              gLogger.error( errStr, res['Message'] )

          ################################################
          #  If the sub-request is a physical removal operation
          elif operation == 'replicaRemoval':
            gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation )
            diracSEs = subRequestAttributes['TargetSE'].split( ',' )
            lfns = []
            for subRequestFile in subRequestFiles:
              if subRequestFile['Status'] == 'Waiting':
                lfn = str( subRequestFile['LFN'] )
                lfns.append( lfn )
            gMonitor.addMark( 'ReplicaRemovalAtt', len( lfns ) )

            failed = {}
            errMsg = {}
            for diracSE in diracSEs:
              res = self.replicaManager.removeReplica( diracSE, lfns )
              if res['OK']:
                for lfn in res['Value']['Failed'].keys():
                  errorMessage = str( res['Value']['Failed'][lfn] )
                  if errorMessage.find( 'Write access not permitted for this credential.' ) != -1:
                    if self.__getProxyAndRemoveReplica( diracSE, lfn ):
                      continue
                  if errorMessage.find( 'seconds timeout for "__gfal_wrapper" call' ) != -1:
                    self.timeOutCounter += 1
                  if not failed.has_key( lfn ):
                    failed[lfn] = {}
                  failed[lfn][diracSE] = res['Value']['Failed'][lfn]
              else:
                errMsg[diracSE] = res['Message']
                for lfn in lfns:
                  if not failed.has_key( lfn ):
                    failed[lfn] = {}
                  failed[lfn][diracSE] = 'Completely'
            # Now analyse the results
            failedLFNs = failed.keys()
            lfnsOK = [lfn for lfn in lfns if not lfn in failedLFNs]
            gMonitor.addMark( 'ReplicaRemovalDone', len( lfnsOK ) )
            for lfn in lfnsOK:
              gLogger.info( "RemovalAgent.execute: Successfully removed %s at %s" % ( lfn, str( diracSEs ) ) )
              res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' )
              if not res['OK']:
                gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) )
              modified = True
            if failed:
              gMonitor.addMark( 'PhysicalRemovalFail', len( failedLFNs ) )
              for lfn in failedLFNs:
                for diracSE in failed[lfn].keys():
                  if type( failed[lfn][diracSE] ) in StringTypes:
                    if re.search( 'no such file or directory', failed[lfn][diracSE].lower() ):
                      gLogger.info( "RemovalAgent.execute: File did not exist.", lfn )
                      res = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' )
                      if not res['OK']:
                        gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) )
                      modified = True
                    else:
                      gLogger.info( "RemovalAgent.execute: Failed to remove file.", "%s at %s - %s" % ( lfn, diracSE, failed[lfn][diracSE] ) )
            if errMsg:
              for diracSE in errMsg.keys():
                errStr = "RemovalAgent.execute: Completely failed to remove replicas. At %s", diracSE
                gLogger.error( errStr, errMsg[diracSE] )

          ################################################
          #  If the sub-request is a request to the online system to retransfer
          elif operation == 'reTransfer':
            gLogger.info( "RemovalAgent.execute: Attempting to execute %s sub-request." % operation )
            diracSE = subRequestAttributes['TargetSE']
            for subRequestFile in subRequestFiles:
              if subRequestFile['Status'] == 'Waiting':
                pfn = str( subRequestFile['PFN'] )
                lfn = str( subRequestFile['LFN'] )
                res = self.replicaManager.onlineRetransfer( diracSE, pfn )
                if res['OK']:
                  if res['Value']['Successful'].has_key( pfn ):
                    gLogger.info( "RemovalAgent.execute: Successfully requested retransfer of %s." % pfn )
                    result = oRequest.setSubRequestFileAttributeValue( ind, 'removal', lfn, 'Status', 'Done' )
                    if not result['OK']:
                      gLogger.error( "RemovalAgent.execute: Error setting status to %s for %s" % ( 'Done', lfn ) )
                    modified = True
                  else:
                    errStr = "RemovalAgent.execute: Failed to request retransfer."
                    gLogger.error( errStr, "%s %s %s" % ( pfn, diracSE, res['Value']['Failed'][pfn] ) )
                else:
                  errStr = "RemovalAgent.execute: Completely failed to request retransfer."
                  gLogger.error( errStr, res['Message'] )
              else:
                gLogger.info( "RemovalAgent.execute: File already completed." )

          ################################################
          #  If the sub-request is none of the above types
          else:
            gLogger.error( "RemovalAgent.execute: Operation not supported.", operation )

          ################################################
          #  Determine whether there are any active files
          if oRequest.isSubRequestEmpty( ind, 'removal' )['Value']:
            oRequest.setSubRequestStatus( ind, 'removal', 'Done' )
            gMonitor.addMark( "Done", 1 )

        ################################################
        #  If the sub-request is already in terminal state
        else:
          gLogger.info( "RemovalAgent.execute:",
                        "Sub-request %s is status '%s' and not to be executed." %
                        ( ind, subRequestAttributes['Status'] ) )

      ################################################
      #  Generate the new request string after operation
      newrequestString = oRequest.toXML()['Value']
    except:
      # if something fails return the original request back to the server 
      res = self.requestDBClient.updateRequest( requestName, requestString, sourceServer )
      return S_OK()

    res = self.requestDBClient.updateRequest( requestName, newrequestString, sourceServer )

    if modified and jobID:
      result = self.finalizeRequest( requestName, jobID, sourceServer )

    return S_OK()

  def __getProxyAndRemoveReplica( self, diracSE, lfn ):
    """
    get a proxy from the owner of the file and try to remove it
    returns True if it succeeds, False otherwise
    """

    result = self.replicaManager.getCatalogDirectoryMetadata( lfn, singleFile = True )
    if not result[ 'OK' ]:
      gLogger.error( "Could not get metadata info", result[ 'Message' ] )
      return False
    ownerRole = result[ 'Value' ][ 'OwnerRole' ]
    ownerDN = result[ 'Value' ][ 'OwnerDN' ]
    if ownerRole[0] != "/":
      ownerRole = "/%s" % ownerRole

    userProxy = ''
    for ownerGroup in Registry.getGroupsWithVOMSAttribute( ownerRole ):
      result = gProxyManager.downloadVOMSProxy( ownerDN, ownerGroup, limited = True,
                                                requiredVOMSAttribute = ownerRole )
      if not result[ 'OK' ]:
        gLogger.verbose ( 'Failed to retrieve voms proxy for %s : %s:' % ( ownerDN, ownerRole ),
                          result[ 'Message' ] )
        continue
      userProxy = result[ 'Value' ]
      gLogger.verbose( "Got proxy for %s@%s [%s]" % ( ownerDN, ownerGroup, ownerRole ) )
      break
    if not userProxy:
      return False

    result = userProxy.dumpAllToFile()
    if not result[ 'OK' ]:
      gLogger.verbose( result[ 'Message' ] )
      return False

    upFile = result[ 'Value' ]
    prevProxyEnv = os.environ[ 'X509_USER_PROXY' ]
    os.environ[ 'X509_USER_PROXY' ] = upFile

    try:
      res = self.replicaManager.removeReplica( diracSE, lfn )
      if res['OK'] and lfn in res[ 'Value' ]['Successful']:
        gLogger.verbose( 'Removed %s from %s' % ( lfn, diracSE ) )
        return True
    finally:
      os.environ[ 'X509_USER_PROXY' ] = prevProxyEnv
      os.unlink( upFile )

    return False

  def finalize( self ):
    """
    Called by the Agent framework to cleanly end execution.
    In this case this module will wait until all pending ThreadedJbos in the
    ThreadPool get executed
    """

    self.threadPool.processAllResults()
    return S_OK()
class OutputDataExecutor:

  def __init__( self, csPath = "" ):
    self.log = gLogger.getSubLogger( "OutputDataExecutor" )
    if not csPath:
      vo = gConfig.getValue( "/DIRAC/VirtualOrganization", "" )
      self.__transfersCSPath = '/Operations/%s/OutputData' % vo
    else:
      self.__transfersCSPath = csPath
    self.log.verbose( "Reading transfer paths from %s" % self.__transfersCSPath )
    self.__requiredCSOptions = ['InputPath', 'InputFC', 'OutputPath', 'OutputFC', 'OutputSE']

    self.__threadPool = ThreadPool( gConfig.getValue( "%s/MinTransfers" % self.__transfersCSPath, 1 ),
                                    gConfig.getValue( "%s/MaxTransfers" % self.__transfersCSPath, 4 ),
                                    gConfig.getValue( "%s/MaxQueuedTransfers" % self.__transfersCSPath, 100 ) )
    self.__threadPool.daemonize()
    self.__processingFiles = set()
    self.__okTransferredFiles = 0
    self.__okTransferredBytes = 0
    self.__failedFiles = {}

  def getNumOKTransferredFiles( self ):
    return self.__okTransferredFiles

  def getNumOKTransferredBytes( self ):
    return self.__okTransferredBytes

  def transfersPending( self ):
    return self.__threadPool.isWorking()

  def getDefinedTransferPaths( self ):
    result = gConfig.getSections( self.__transfersCSPath )
    if not result['OK']:
      self.log.info( 'No Input/Output Pair defined in CS' )
      return S_OK()

    pathList = result['Value']

    tPaths = {}
    for name in pathList:
      csPath = self.__transfersCSPath + '/%s' % name
      result = gConfig.getOptionsDict( csPath )
      if not result['OK']:
        continue
      transferDict = result['Value']
      ok = True
      for i in self.__requiredCSOptions:
        if i not in transferDict:
          self.log.error( 'Missing Option %s in %s' % ( i, csPath ) )
          ok = False
          break
      if not ok:
        continue
      tPaths[ name ] = transferDict

    return S_OK( tPaths )

  def getNumLocalOutgoingFiles( self ):
    result = self.getDefinedTransferPaths()
    if not result[ 'OK' ]:
      return 0
    localOutgoing = 0
    tPaths = result[ 'Value' ]
    for name in tPaths:
      transferDict = tPaths[ name ]
      if 'LocalDisk' != transferDict['InputFC']:
        continue
      localOutgoing += len( self.getOutgoingFiles( transferDict ) )
    return localOutgoing

  def getOutgoingFiles( self, transferDict ):
    """
    Get list of files to be processed from InputPath
    """
    inputFCName = transferDict['InputFC']
    inputPath = transferDict['InputPath']

    if inputFCName == 'LocalDisk':
      files = []
      try:
        for fileName in os.listdir( inputPath ):
          if os.path.isfile( os.path.join( inputPath, fileName ) ):
            files.append( fileName )
      except:
        pass
      return files

    inputFC = FileCatalog( [inputFCName] )
    result = inputFC.listDirectory( inputPath, True )

    if not result['OK']:
      self.log.error( result['Message'] )
      return []
    if not inputPath in result['Value']['Successful']:
      self.log.error( result['Value']['Failed'][inputPath] )
      return []

    subDirs = result['Value']['Successful'][inputPath]['SubDirs']
    files = result['Value']['Successful'][inputPath]['Files']
    for subDir in subDirs:
      self.log.info( 'Ignoring subdirectory:', subDir )
    return files.keys()

  def checkForTransfers( self ):
    """
    Check for transfers to do and start them
    """
    result = self.getDefinedTransferPaths()
    if not result[ 'OK' ]:
      return result
    tPaths = result[ 'Value' ]
    for name in tPaths:
      transferPath = tPaths[ name ]
      self.log.verbose( "Checking %s transfer path" % name )
      filesToTransfer = self.getOutgoingFiles( tPaths[ name ] )
      self.log.info( "Transfer path %s has %d files" % ( name, len( filesToTransfer ) ) )
      ret = self.__addFilesToThreadPool( filesToTransfer, transferPath )
      if not ret['OK']:
        # The thread pool got full 
        break

  def processAllPendingTransfers( self ):
    self.__threadPool.processAllResults()

  @transferSync
  def __addFilesToThreadPool( self, files, transferDict ):
    for fileName in files:
      fileName = os.path.basename( fileName )
      if fileName in self.__processingFiles:
        continue
      self.__processingFiles.add( fileName )
      time.sleep( 1 )
      ret = self.__threadPool.generateJobAndQueueIt( self.__transferIfNotRegistered,
                                            args = ( fileName, transferDict ),
                                            oCallback = self.transferCallback,
                                            blocking = False )
      if not ret['OK']:
        # The thread pool got full 
        return ret
    return S_OK()

  def __transferIfNotRegistered( self, file, transferDict ):
    result = self.isRegisteredInOutputCatalog( file, transferDict )
    if not result[ 'OK' ]:
      self.log.error( result[ 'Message' ] )
      return result
    #Already registered. Need to delete
    if result[ 'Value' ]:
      self.log.info( "Transfer file %s is already registered in the output catalog" % file )
      #Delete
      filePath = os.path.join( transferDict[ 'InputPath' ], file )
      if transferDict[ 'InputFC' ] == 'LocalDisk':
        os.unlink( filePath )
      #FIXME: what is inFile supposed to be ??
      else:
        inputFC = FileCatalog( [ transferDict['InputFC'] ] )
        replicaDict = inputFC.getReplicas( filePath )
        if not replicaDict['OK']:
          self.log.error( "Error deleting file", replicaDict['Message'] )
        elif not inFile in replicaDict['Value']['Successful']:
          self.log.error( "Error deleting file", replicaDict['Value']['Failed'][inFile] )
        else:
          seList = replicaDict['Value']['Successful'][inFile].keys()
          for se in seList:
            se = StorageElement( se )
            self.log.info( 'Removing from %s:' % se.name, inFile )
            se.removeFile( inFile )
          inputFC.removeFile( file )
      self.log.info( "File %s deleted from %s" % ( file, transferDict[ 'InputFC' ] ) )
      self.__processingFiles.discard( file )
      return S_OK( file )
    #Do the transfer
    return self.__retrieveAndUploadFile( file, transferDict )

  def isRegisteredInOutputCatalog( self, file, transferDict ):
    fc = FileCatalog( [ transferDict[ 'OutputFC' ] ] )
    lfn = os.path.join( transferDict['OutputPath'], os.path.basename( file ) )
    result = fc.getReplicas( lfn )
    if not result[ 'OK' ]:
      return result
    if lfn not in result[ 'Value' ][ 'Successful' ]:
      return S_OK( False )
    replicas = result[ 'Value' ][ 'Successful' ][ lfn ]
    for seName in List.fromChar( transferDict[ 'OutputSE' ], "," ):
      if seName in replicas:
        self.log.verbose( "Transfer file %s is already registered in %s SE" % ( file, seName ) )
        return S_OK( True )
    return S_OK( False )

  def __retrieveAndUploadFile( self, file, outputDict ):
    """
    Retrieve, Upload, and remove
    """
    fileName = file
    inputPath = outputDict['InputPath']
    inputFCName = outputDict['InputFC']
    inBytes = 0
    if inputFCName == 'LocalDisk':
      inFile = file
      file = os.path.join( inputPath, file )
    else:
      inputFC = FileCatalog( [inputFCName] )

      inFile = os.path.join( inputPath, file )
      replicaDict = inputFC.getReplicas( inFile )
      if not replicaDict['OK']:
        self.log.error( replicaDict['Message'] )
        return S_ERROR( fileName )
      if not inFile in replicaDict['Value']['Successful']:
        self.log.error( replicaDict['Value']['Failed'][inFile] )
        return S_ERROR( fileName )
      seList = replicaDict['Value']['Successful'][inFile].keys()

      inputSE = StorageElement( seList[0] )
      self.log.info( 'Retrieving from %s:' % inputSE.name, inFile )
      # ret = inputSE.getFile( inFile )
      # lcg_util binding prevent multithreading, use subprocess instead
      res = pythonCall( 2 * 3600, inputSE.getFile, inFile )
      if not res['OK']:
        self.log.error( res['Message'] )
        return S_ERROR( fileName )
      ret = res['Value']
      if not ret['OK']:
        self.log.error( ret['Message'] )
        return S_ERROR( fileName )
      if not inFile in ret['Value']['Successful']:
        self.log.error( ret['Value']['Failed'][inFile] )
        return S_ERROR( fileName )

    if os.path.isfile( file ):
      inBytes = os.stat( file )[6]

    outputPath = outputDict['OutputPath']
    outputFCName = outputDict['OutputFC']
    replicaManager = ReplicaManager()
    outFile = os.path.join( outputPath, os.path.basename( file ) )
    transferOK = False
    for outputSEName in List.fromChar( outputDict['OutputSE'], "," ):
      outputSE = StorageElement( outputSEName )
      self.log.info( 'Trying to upload to %s:' % outputSE.name, outFile )
      # ret = replicaManager.putAndRegister( outFile, os.path.realpath( file ), outputSE.name, catalog=outputFCName )
      # lcg_util binding prevent multithreading, use subprocess instead
      result = pythonCall( 2 * 3600, replicaManager.putAndRegister, outFile, os.path.realpath( file ), outputSE.name, catalog = outputFCName )
      if result['OK'] and result['Value']['OK']:
        if outFile in result['Value']['Value']['Successful']:
          transferOK = True
          break
        else:
          self.log.error( result['Value']['Value']['Failed'][outFile] )
      else:
        if result['OK']:
          self.log.error( result['Value']['Message'] )
        else:
          self.log.error( result['Message'] )

    if not transferOK:
      return S_ERROR( fileName )

    if result['OK'] or not inputFCName == 'LocalDisk':
      os.unlink( file )

    if not result['OK']:
      self.log.error( ret['Message'] )
      return S_ERROR( fileName )

    self.log.info( "Finished transferring %s [%s bytes]" % ( inFile, inBytes ) )
    self.__okTransferredFiles += 1
    self.__okTransferredBytes += inBytes

    if inputFCName == 'LocalDisk':
      return S_OK( fileName )

    # Now the file is on final SE/FC, remove from input SE/FC
    for se in seList:
      se = StorageElement( se )
      self.log.info( 'Removing from %s:' % se.name, inFile )
      se.removeFile( inFile )

    inputFC.removeFile( inFile )

    return S_OK( fileName )

  @transferSync
  def transferCallback( self, threadedJob, submitResult ):
    if not submitResult['OK']:
      fileName = submitResult['Message']
      if fileName not in self.__failedFiles:
        self.__failedFiles[fileName] = 0
      self.__failedFiles[fileName] += 1
    else:
      fileName = submitResult['Value']
      if fileName in self.__failedFiles:
        del self.__failedFiles[fileName]
    #Take out from processing files
    if fileName in self.__processingFiles:
      self.__processingFiles.discard( fileName )
Beispiel #16
0
class Publisher:
  """
  Class Publisher is in charge of getting dispersed information, to be published on the web.
  """

#############################################################################

  def __init__(self, VOExtension, rsDBIn = None, commandCallerIn = None, infoGetterIn = None,
               WMSAdminIn = None):
    """
    Standard constructor

    :params:
      :attr:`VOExtension`: string, VO Extension (e.g. 'LHCb')

      :attr:`rsDBIn`: optional ResourceStatusDB object
      (see :class: `DIRAC.ResourceStatusSystem.DB.ResourceStatusDB.ResourceStatusDB`)

      :attr:`commandCallerIn`: optional CommandCaller object
      (see :class: `DIRAC.ResourceStatusSystem.Command.CommandCaller.CommandCaller`)

      :attr:`infoGetterIn`: optional InfoGetter object
      (see :class: `DIRAC.ResourceStatusSystem.Utilities.InfoGetter.InfoGetter`)

      :attr:`WMSAdminIn`: optional RPCClient object for WMSAdmin
      (see :class: `DIRAC.Core.DISET.RPCClient.RPCClient`)
    """

    self.configModule = Utils.voimport("DIRAC.ResourceStatusSystem.Policy.Configurations", VOExtension)

    if rsDBIn is not None:
      self.rsDB = rsDBIn
    else:
      from DIRAC.ResourceStatusSystem.DB.ResourceStatusDB import ResourceStatusDB
      self.rsDB = ResourceStatusDB()

    from DIRAC.ResourceStatusSystem.DB.ResourceManagementDB import ResourceManagementDB
    self.rmDB = ResourceManagementDB()

    if commandCallerIn is not None:
      self.cc = commandCallerIn
    else:
      from DIRAC.ResourceStatusSystem.Command.CommandCaller import CommandCaller
      self.cc = CommandCaller()

    if infoGetterIn is not None:
      self.ig = infoGetterIn
    else:
      from DIRAC.ResourceStatusSystem.Utilities.InfoGetter import InfoGetter
      self.ig = InfoGetter(VOExtension)

    if WMSAdminIn is not None:
      self.WMSAdmin = WMSAdminIn
    else:
      from DIRAC.Core.DISET.RPCClient import RPCClient
      self.WMSAdmin = RPCClient("WorkloadManagement/WMSAdministrator")

    self.threadPool = ThreadPool( 2, 5 )

    self.lockObj = threading.RLock()

    self.infoForPanel_res = {}

#############################################################################

  def getInfo(self, granularity, name, useNewRes = False):
    """
    Standard method to get all the info to be published

    This method uses a ThreadPool (:class:`DIRAC.Core.Utilities.ThreadPool.ThreadPool`)
    with 2-5 threads. The threaded method is
    :meth:`DIRAC.ResourceStatusSystem.Utilities.Publisher.Publisher.getInfoForPanel`

    :params:
      :attr:`granularity`: string - a ValidRes

      :attr:`name`: string - name of the Validres

      :attr:`useNewRes`: boolean. When set to true, will get new results,
      otherwise it will get cached results (where available).
    """

    if granularity not in ValidRes:
      raise InvalidRes, Utils.where(self, self.getInfo)

    self.infoForPanel_res = {}

    status = None
    formerStatus = None
    siteType = None
    serviceType = None
    resourceType = None

    if granularity in ('Resource', 'Resources'):
      try:
        resourceType = self.rsDB.getMonitoredsList('Resource', ['ResourceType'],
                                              resourceName = name)[0][0]
      except IndexError:
        return "%s does not exist!" %name

    if granularity in ('StorageElement', 'StorageElements'):
      try:
        siteType = self.rsDB.getMonitoredsList('StorageElement', ['SiteType'],
                                              storageElementName = name)[0][0]
      except IndexError:
        return "%s does not exist!" %name

    paramNames = ['Type', 'Group', 'Name', 'Policy', 'DIRAC Status',
                  'RSS Status', 'Reason', 'Description']

    infoToGet = self.ig.getInfoToApply(('view_info', ), granularity, status = status,
                                       formerStatus = formerStatus, siteType = siteType,
                                       serviceType = serviceType, resourceType = resourceType,
                                       useNewRes = useNewRes)[0]['Panels']
    infoToGet_res = {}

    recordsList = []

    infosForPolicy = {}

    for panel in infoToGet.keys():

      (granularityForPanel, nameForPanel) = self.__getNameForPanel(granularity, name, panel)

      if not self._resExist(granularityForPanel, nameForPanel):
#        completeInfoForPanel_res = None
        continue

      #take composite RSS result for name
      nameStatus_res = self._getStatus(nameForPanel, panel)

      recordBase = [None, None, None, None, None, None, None, None]

      recordBase[1] = panel.replace('_Panel', '')
      recordBase[2] = nameForPanel #nameForPanel
      try:
        recordBase[4] = nameStatus_res[nameForPanel]['DIRACStatus'] #DIRAC Status
      except:
        pass
      recordBase[5] = nameStatus_res[nameForPanel]['RSSStatus'] #RSS Status

      record = copy.deepcopy(recordBase)
      record[0] = 'ResultsForResource'

      recordsList.append(record)

      #take info that goes into the panel
      infoForPanel = infoToGet[panel]

      for info in infoForPanel:

        self.threadPool.generateJobAndQueueIt(self.getInfoForPanel,
                                              args = (info, granularityForPanel, nameForPanel) )

      self.threadPool.processAllResults()

      for policy in [x.keys()[0] for x in infoForPanel]:
        record = copy.deepcopy(recordBase)
        record[0] = 'SpecificInformation'
        record[3] = policy #policyName
        record[4] = None #DIRAC Status
        record[5] = self.infoForPanel_res[policy]['Status'] #RSS status for the policy
        record[6] = self.infoForPanel_res[policy]['Reason'] #Reason
        record[7] = self.infoForPanel_res[policy]['desc'] #Description
        recordsList.append(record)

        infosForPolicy[policy] = self.infoForPanel_res[policy]['infos']

    infoToGet_res['TotalRecords'] = len(recordsList)
    infoToGet_res['ParameterNames'] = paramNames
    infoToGet_res['Records'] = recordsList

    infoToGet_res['Extras'] = infosForPolicy

    return infoToGet_res

#############################################################################

  def getInfoForPanel(self, info, granularityForPanel, nameForPanel):

    #get single RSS policy results
    policyResToGet = info.keys()[0]
    pol_res = self.rmDB.getPolicyRes(nameForPanel, policyResToGet)
    if pol_res != []:
      pol_res_dict = {'Status' : pol_res[0], 'Reason' : pol_res[1]}
    else:
      pol_res_dict = {'Status' : 'Unknown', 'Reason' : 'Unknown'}
    self.lockObj.acquire()
    try:
      self.infoForPanel_res[policyResToGet] = pol_res_dict
    finally:
      self.lockObj.release()

    #get policy description
    desc = self._getPolicyDesc(policyResToGet)

    #get other info
    othersInfo = info.values()[0]
    if not isinstance(othersInfo, list):
      othersInfo = [othersInfo]

    info_res = {}

    for oi in othersInfo:
      format_ = oi.keys()[0]
      what = oi.values()[0]

      info_bit_got = self._getInfo(granularityForPanel, nameForPanel, format_, what)

      info_res[format_] = info_bit_got

    self.lockObj.acquire()
    try:
      self.infoForPanel_res[policyResToGet]['infos'] = info_res
      self.infoForPanel_res[policyResToGet]['desc'] = desc
    finally:
      self.lockObj.release()

#############################################################################

  def _getStatus(self, name, panel):

    #get RSS status
    RSSStatus = self._getInfoFromRSSDB(name, panel)[0][1]

    #get DIRAC status
    if panel in ('Site_Panel', 'SE_Panel'):

      if panel == 'Site_Panel':
        DIRACStatus = self.WMSAdmin.getSiteMaskLogging(name)
        if DIRACStatus['OK']:
          DIRACStatus = DIRACStatus['Value'][name].pop()[0]
        else:
          raise RSSException, Utils.where(self, self._getStatus)

      elif panel == 'SE_Panel':
        ra = getStorageElementStatus(name, 'ReadAccess')['Value']
        wa = getStorageElementStatus(name, 'WriteAccess')['Value']
        DIRACStatus = {'ReadAccess': ra, 'WriteAccess': wa}

      status = { name : { 'RSSStatus': RSSStatus, 'DIRACStatus': DIRACStatus } }

    else:
      status = { name : { 'RSSStatus': RSSStatus} }


    return status

#############################################################################

  def _getInfo(self, granularity, name, format_, what):

    if format_ == 'RSS':
      info_bit_got = self._getInfoFromRSSDB(name, what)
    else:
      if isinstance(what, dict):
        command = what['CommandIn']
        extraArgs = what['args']
      else:
        command = what
        extraArgs = None

      info_bit_got = self.cc.commandInvocation(granularity, name, None,
                                               None, command, extraArgs)

      try:
        info_bit_got = info_bit_got['Result']
      except:
        pass

    return info_bit_got

#############################################################################

  def _getInfoFromRSSDB(self, name, what):

    paramsL = ['Status']

    siteName = None
    serviceName = None
    resourceName = None
    storageElementName = None
    serviceType = None
    gridSiteName = None

    if what == 'ServiceOfSite':
      gran = 'Service'
      paramsL.insert(0, 'ServiceName')
      paramsL.append('Reason')
      siteName = name
    elif what == 'ResOfCompService':
      gran = 'Resources'
      paramsL.insert(0, 'ResourceName')
      paramsL.append('Reason')
      serviceType = name.split('@')[0]
      gridSiteName = getGOCSiteName(name.split('@')[1])
      if not gridSiteName['OK']:
        raise RSSException, gridSiteName['Message']
      gridSiteName = gridSiteName['Value']
    elif what == 'ResOfStorService':
      gran = 'Resources'
      paramsL.insert(0, 'ResourceName')
      paramsL.append('Reason')
      serviceType = name.split('@')[0]
      gridSiteName = getGOCSiteName(name.split('@')[1])
      if not gridSiteName['OK']:
        raise RSSException, gridSiteName['Message']
      gridSiteName = gridSiteName['Value']
    elif what == 'ResOfStorEl':
      gran = 'StorageElements'
      paramsL.insert(0, 'ResourceName')
      paramsL.append('Reason')
      storageElementName = name
    elif what == 'StorageElementsOfSite':
      gran = 'StorageElements'
      paramsL.insert(0, 'StorageElementName')
      paramsL.append('Reason')
      if '@' in name:
        DIRACsiteName = name.split('@').pop()
      else:
        DIRACsiteName = name
      gridSiteName = getGOCSiteName(DIRACsiteName)
      if not gridSiteName['OK']:
        raise RSSException, gridSiteName['Message']
      gridSiteName = gridSiteName['Value']
    elif what == 'Site_Panel':
      gran = 'Site'
      paramsL.insert(0, 'SiteName')
      siteName = name
    elif what == 'Service_Computing_Panel':
      gran = 'Service'
      paramsL.insert(0, 'ServiceName')
      serviceName = name
    elif what == 'Service_Storage_Panel':
      gran = 'Service'
      paramsL.insert(0, 'ServiceName')
      serviceName = name
    elif what == 'Service_VO-BOX_Panel':
      gran = 'Services'
      paramsL.insert(0, 'ServiceName')
      serviceName = name
    elif what == 'Service_VOMS_Panel':
      gran = 'Services'
      paramsL.insert(0, 'ServiceName')
      serviceName = name
    elif what == 'Resource_Panel':
      gran = 'Resource'
      paramsL.insert(0, 'ResourceName')
      resourceName = name
    elif what == 'SE_Panel':
      gran = 'StorageElement'
      paramsL.insert(0, 'StorageElementName')
      storageElementName = name

    info_bit_got = self.rsDB.getMonitoredsList(gran, paramsList = paramsL, siteName = siteName,
                                               serviceName = serviceName, serviceType = serviceType,
                                               resourceName = resourceName,
                                               storageElementName = storageElementName,
                                               gridSiteName = gridSiteName)

    return info_bit_got

#############################################################################

  def _getPolicyDesc(self, policyName):

    return self.configModule.Policies[policyName]['Description']

#############################################################################

  def __getNameForPanel(self, granularity, name, panel):

    if granularity in ('Site', 'Sites'):
      if panel == 'Service_Computing_Panel':
        granularity = 'Service'
        name = 'Computing@' + name
      elif panel == 'Service_Storage_Panel':
        granularity = 'Service'
        name = 'Storage@' + name
      elif panel == 'OtherServices_Panel':
        granularity = 'Service'
        name = 'OtherS@' + name
      elif panel == 'Service_VOMS_Panel':
        granularity = 'Service'
        name = 'VOMS@' + name
      elif panel == 'Service_VO-BOX_Panel':
        granularity = 'Service'
        name = 'VO-BOX@' + name
#      else:
#        granularity = granularity
#        name = name
#    else:
#      granularity = granularity
#      name = name

    return (granularity, name)

#############################################################################

  def _resExist(self, granularity, name):

    siteName = None
    serviceName = None
    resourceName = None
    storageElementName = None

    if granularity in ('Site', 'Sites'):
      siteName = name
    elif granularity in ('Service', 'Services'):
      serviceName = name
    elif granularity in ('Resource', 'Resources'):
      resourceName = name
    elif granularity in ('StorageElement', 'StorageElements'):
      storageElementName = name

    res = self.rsDB.getMonitoredsList(granularity, siteName = siteName,
                                      serviceName = serviceName, resourceName = resourceName,
                                      storageElementName = storageElementName)

    if res == []:
      return False
    else:
      return True
Beispiel #17
0
class FTSMonitorAgent(AgentModule):
    """
  .. class:: FTSMonitorAgent

  Monitor submitted FTS jobs.
  """
    # # transfer DB handle
    transferDB = None
    # # thread pool
    threadPool = None
    # # min threads
    minThreads = 1
    # # max threads
    maxThreads = 10

    # # missing source regexp patterns
    missingSourceErrors = [
      re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] Failed" ),
      re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[INVALID_PATH\] No such file or directory" ),
      re.compile( r"SOURCE error during PREPARATION phase: \[INVALID_PATH\] Failed" ),
      re.compile( r"SOURCE error during PREPARATION phase: \[INVALID_PATH\] The requested file either does not exist" ),
      re.compile( r"TRANSFER error during TRANSFER phase: \[INVALID_PATH\] the server sent an error response: 500 500"\
                 " Command failed. : open error: No such file or directory" ),
      re.compile( r"SOURCE error during TRANSFER_PREPARATION phase: \[USER_ERROR\] source file doesnt exist" ) ]

    def initialize(self):
        """ agent's initialisation """
        self.transferDB = TransferDB()
        self.am_setOption("shifterProxy", "DataManager")
        self.minThreads = self.am_getOption("MinThreads", self.minThreads)
        self.maxThreads = self.am_getOption("MaxThreads", self.maxThreads)
        minmax = (abs(self.minThreads), abs(self.maxThreads))
        self.minThreads, self.maxThreads = min(minmax), max(minmax)
        self.log.info("ThreadPool min threads = %s" % self.minThreads)
        self.log.info("ThreadPool max threads = %s" % self.maxThreads)
        self.threadPool = ThreadPool(self.minThreads, self.maxThreads)
        self.threadPool.daemonize()
        return S_OK()

    def execute(self):
        """ push jobs to the thread pool """
        self.log.info("Obtaining requests to monitor")
        res = self.transferDB.getFTSReq()
        if not res["OK"]:
            self.log.error("Failed to get FTS requests", res['Message'])
            return res
        if not res["Value"]:
            self.log.info("No FTS requests found to monitor.")
            return S_OK()
        ftsReqs = res["Value"]
        self.log.info("Found %s FTS jobs" % len(ftsReqs))
        i = 1
        for ftsJob in ftsReqs:
            while True:
                self.log.debug("submitting FTS Job %s FTSReqID=%s to monitor" %
                               (i, ftsJob["FTSReqID"]))
                ret = self.threadPool.generateJobAndQueueIt(
                    self.monitorTransfer,
                    args=(ftsJob, ),
                )
                if ret["OK"]:
                    i += 1
                    break
                # # sleep 1 second to proceed
                time.sleep(1)

        self.threadPool.processAllResults()
        return S_OK()

    def ftsJobExpired(self, ftsReqID, channelID):
        """ clean up when FTS job had expired on the server side

    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    """
        log = gLogger.getSubLogger("@%s" % str(ftsReqID))
        fileIDs = self.transferDB.getFTSReqFileIDs(ftsReqID)
        if not fileIDs["OK"]:
            log.error("Unable to retrieve FileIDs associated to %s request" %
                      ftsReqID)
            return fileIDs
        fileIDs = fileIDs["Value"]

        # # update FileToFTS table, this is just a clean up, no worry if somethings goes wrong
        for fileID in fileIDs:
            fileStatus = self.transferDB.setFileToFTSFileAttribute(
                ftsReqID, fileID, "Status", "Failed")
            if not fileStatus["OK"]:
                log.error(
                    "Unable to set FileToFTS status to 'Failed' for FileID %s: %s"
                    % (fileID, fileStatus["Message"]))

            failReason = self.transferDB.setFileToFTSFileAttribute(
                ftsReqID, fileID, "Reason", "FTS job expired on server")
            if not failReason["OK"]:
                log.error("Unable to set FileToFTS reason for FileID %s: %s" %
                          (fileID, failReason["Message"]))
        # # update Channel table
        resetChannels = self.transferDB.resetFileChannelStatus(
            channelID, fileIDs)
        if not resetChannels["OK"]:
            log.error("Failed to reset Channel table for files to retry")
            return resetChannels

        # # update FTSReq table
        log.info("Setting FTS request status to 'Finished'")
        ftsReqStatus = self.transferDB.setFTSReqStatus(ftsReqID, "Finished")
        if not ftsReqStatus["OK"]:
            log.error("Failed update FTS Request status",
                      ftsReqStatus["Message"])
            return ftsReqStatus

        # # if we land here, everything should be OK
        return S_OK()

    def monitorTransfer(self, ftsReqDict):
        """ monitors transfer obtained from TransferDB

    :param dict ftsReqDict: FTS job dictionary
    """
        ftsReqID = ftsReqDict.get("FTSReqID")
        ftsGUID = ftsReqDict.get("FTSGuid")
        ftsServer = ftsReqDict.get("FTSServer")
        channelID = ftsReqDict.get("ChannelID")
        sourceSE = ftsReqDict.get("SourceSE")
        targetSE = ftsReqDict.get("TargetSE")

        oFTSRequest = FTSRequest()
        oFTSRequest.setFTSServer(ftsServer)
        oFTSRequest.setFTSGUID(ftsGUID)
        oFTSRequest.setSourceSE(sourceSE)
        oFTSRequest.setTargetSE(targetSE)

        log = gLogger.getSubLogger("@%s" % str(ftsReqID))

        #########################################################################
        # Perform summary update of the FTS Request and update FTSReq entries.
        log.info("Perform summary update of the FTS Request")
        infoStr = ["glite-transfer-status -s %s -l %s" % (ftsServer, ftsGUID)]
        infoStr.append("FTS GUID:   %s" % ftsGUID)
        infoStr.append("FTS Server: %s" % ftsServer)
        log.info("\n".join(infoStr))
        res = oFTSRequest.summary()
        self.transferDB.setFTSReqLastMonitor(ftsReqID)
        if not res["OK"]:
            log.error("Failed to update the FTS request summary",
                      res["Message"])
            if "getTransferJobSummary2: Not authorised to query request" in res[
                    "Message"]:
                log.error(
                    "FTS job is not existing at the FTS server anymore, will clean it up on TransferDB side"
                )
                cleanUp = self.ftsJobExpired(ftsReqID, channelID)
                if not cleanUp["OK"]:
                    log.error(cleanUp["Message"])
                return cleanUp
            return res

        res = oFTSRequest.dumpSummary()
        if not res['OK']:
            log.error("Failed to get FTS request summary", res["Message"])
            return res
        log.info(res['Value'])
        res = oFTSRequest.getPercentageComplete()
        if not res['OK']:
            log.error("Failed to get FTS percentage complete", res["Message"])
            return res
        log.info('FTS Request found to be %.1f percent complete' %
                 res["Value"])
        self.transferDB.setFTSReqAttribute(ftsReqID, "PercentageComplete",
                                           res["Value"])
        self.transferDB.addLoggingEvent(ftsReqID, res["Value"])

        #########################################################################
        # Update the information in the TransferDB if the transfer is terminal.
        res = oFTSRequest.isRequestTerminal()
        if not res["OK"]:
            log.error("Failed to determine whether FTS request terminal",
                      res["Message"])
            return res
        if not res["Value"]:
            return S_OK()
        # # request is terminal
        return self.terminalRequest(oFTSRequest, ftsReqID, channelID, sourceSE)

    def terminalRequest(self, oFTSRequest, ftsReqID, channelID, sourceSE):
        """ process terminal FTS job

    :param FTSRequest oFTSRequest: FTSRequest instance
    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    :param str sourceSE: FTSReq.SourceSE
    """
        log = gLogger.getSubLogger("@%s" % ftsReqID)

        log.info("FTS Request found to be terminal, updating file states")
        #########################################################################
        # Get the LFNS associated to the FTS request
        log.info("Obtaining the LFNs associated to this request")
        res = self.transferDB.getFTSReqLFNs(ftsReqID, channelID, sourceSE)
        if not res["OK"]:
            log.error("Failed to obtain FTS request LFNs", res['Message'])
            return res
        files = res["Value"]
        if not files:
            log.error("No files present for transfer")
            return S_ERROR("No files were found in the DB")

        lfns = files.keys()
        log.debug("Obtained %s files" % len(lfns))
        for lfn in lfns:
            oFTSRequest.setLFN(lfn)

        res = oFTSRequest.monitor()
        if not res["OK"]:
            log.error("Failed to perform detailed monitoring of FTS request",
                      res["Message"])
            return res
        res = oFTSRequest.getFailed()
        if not res["OK"]:
            log.error("Failed to obtained failed files for FTS request",
                      res["Message"])
            return res
        failedFiles = res["Value"]
        res = oFTSRequest.getDone()
        if not res["OK"]:
            log.error("Failed to obtained successful files for FTS request",
                      res["Message"])
            return res
        completedFiles = res["Value"]

        # An LFN can be included more than once if it was entered into more than one Request.
        # FTS will only do the transfer once. We need to identify all FileIDs
        res = self.transferDB.getFTSReqFileIDs(ftsReqID)
        if not res["OK"]:
            log.error("Failed to get FileIDs associated to FTS Request",
                      res["Message"])
            return res
        fileIDs = res["Value"]
        res = self.transferDB.getAttributesForFilesList(fileIDs, ["LFN"])
        if not res["OK"]:
            log.error("Failed to get LFNs associated to FTS Request",
                      res["Message"])
            return res
        fileIDDict = res["Value"]

        fileToFTSUpdates = []
        completedFileIDs = []
        filesToRetry = []
        filesToFail = []

        for fileID, fileDict in fileIDDict.items():
            lfn = fileDict['LFN']
            if lfn in completedFiles:
                completedFileIDs.append(fileID)
                transferTime = 0
                res = oFTSRequest.getTransferTime(lfn)
                if res["OK"]:
                    transferTime = res["Value"]
                fileToFTSUpdates.append(
                    (fileID, "Completed", "", 0, transferTime))

            if lfn in failedFiles:
                failReason = ""
                res = oFTSRequest.getFailReason(lfn)
                if res["OK"]:
                    failReason = res["Value"]
                if "Source file/user checksum mismatch" in failReason:
                    filesToFail.append(fileID)
                    continue
                if self.missingSource(failReason):
                    log.error("The source SURL does not exist.",
                              "%s %s" % (lfn, oFTSRequest.getSourceSURL(lfn)))
                    filesToFail.append(fileID)
                else:
                    filesToRetry.append(fileID)
                log.error("Failed to replicate file on channel.",
                          "%s %s" % (channelID, failReason))
                fileToFTSUpdates.append((fileID, "Failed", failReason, 0, 0))

        # # update TransferDB.FileToFTS table
        updateFileToFTS = self.updateFileToFTS(ftsReqID, channelID,
                                               filesToRetry, filesToFail,
                                               completedFileIDs,
                                               fileToFTSUpdates)

        if updateFileToFTS["OK"] and updateFileToFTS["Value"]:
            res = oFTSRequest.finalize()
            if not res["OK"]:
                log.error(
                    "Failed to perform the finalization for the FTS request",
                    res["Message"])
                return res

            log.info('Adding logging event for FTS request')
            # Now set the FTSReq status to terminal so that it is not monitored again
            res = self.transferDB.addLoggingEvent(ftsReqID, 'Finished')
            if not res['OK']:
                log.error('Failed to add logging event for FTS Request',
                          res['Message'])

            # update TransferDB.FileToCat table
            updateFileToCat = self.updateFileToCat(oFTSRequest, channelID,
                                                   fileIDDict, completedFiles,
                                                   filesToFail)
            if not updateFileToCat["OK"]:
                log.error(updateFileToCat["Message"])

            log.debug("Updating FTS request status")
            res = self.transferDB.setFTSReqStatus(ftsReqID, 'Finished')
            if not res['OK']:
                log.error('Failed update FTS Request status', res['Message'])
        return S_OK()

    def updateFileToFTS(self, ftsReqID, channelID, filesToRetry, filesToFail,
                        completedFileIDs, fileToFTSUpdates):
        """ update TransferDB.FileToFTS table for finished request

    :param int ftsReqID: FTSReq.FTSReqID
    :param int channelID: FTSReq.ChannelID
    :param list filesToRetry: FileIDs to retry
    :param list filesToFail: FileIDs for failed files
    :param list completedFileIDs: files completed
    :param list fileToFTSUpdates: ???
    """
        log = gLogger.getSubLogger("@%s" % ftsReqID)

        allUpdated = True

        res = self.transferDB.resetFileChannelStatus(
            channelID, filesToRetry) if filesToRetry else S_OK()
        if not res["OK"]:
            log.error("Failed to update the Channel table for file to retry.",
                      res["Message"])
            allUpdated = False

        for fileID in filesToFail:
            log.info("Updating the Channel table for files to reschedule")
            res = self.transferDB.setFileToReschedule(fileID)
            if not res["OK"]:
                log.error("Failed to update Channel table for failed files.",
                          res["Message"])
                allUpdated = False
            elif res["Value"] == "max reschedule attempt reached":
                log.error("setting Channel status to 'Failed' : " %
                          res["Value"])
                res = self.transferDB.setFileChannelStatus(
                    channelID, fileID, 'Failed')
                if not res["OK"]:
                    log.error(
                        "Failed to update Channel table for failed files.",
                        res["Message"])
                    allUpdated = False

        if completedFileIDs:
            res = self.transferDB.updateCompletedChannelStatus(
                channelID, completedFileIDs)
            if not res["OK"]:
                log.error(
                    "Failed to update the Channel table for successful files.",
                    res["Message"])
                allUpdated = False
            res = self.transferDB.updateAncestorChannelStatus(
                channelID, completedFileIDs)
            if not res["OK"]:
                log.error(
                    'Failed to update the Channel table for ancestors of successful files.',
                    res['Message'])
                allUpdated = False

        if fileToFTSUpdates:
            res = self.transferDB.setFileToFTSFileAttributes(
                ftsReqID, channelID, fileToFTSUpdates)
            if not res["OK"]:
                log.error("Failed to update the FileToFTS table for files.",
                          res["Message"])
                allUpdated = False

        return S_OK(allUpdated)

    def updateFileToCat(self, oFTSRequest, channelID, fileIDDict,
                        completedFiles, filesToFail):
        """ update TransferDB.FileToCat table for finished request

    :param FTSRequest oFTSRequest: FTSRequest instance
    :param int ftsReqID: FTSReq.FTSReqID
    :param dict fileIDDict: fileIDs dictionary
    :param int channelID: FTSReq.ChannelID
    """
        res = oFTSRequest.getFailedRegistrations()
        failedRegistrations = res["Value"]
        regFailedFileIDs = []
        regDoneFileIDs = []
        regForgetFileIDs = []
        for fileID, fileDict in fileIDDict.items():
            lfn = fileDict['LFN']

            if lfn in failedRegistrations:
                regFailedFileIDs.append(fileID)
                # if the LFN appears more than once, FileToCat needs to be reset only once
                del failedRegistrations[lfn]
            elif lfn in completedFiles:
                regDoneFileIDs.append(fileID)
            elif fileID in filesToFail:
                regForgetFileIDs.append(fileID)

        res = self.transferDB.setRegistrationWaiting(
            channelID, regFailedFileIDs) if regFailedFileIDs else S_OK()
        if not res["OK"]:
            res["Message"] = "Failed to reset entries in FileToCat: %s" % res[
                "Message"]
            return res

        res = self.transferDB.setRegistrationDone(
            channelID, regDoneFileIDs) if regDoneFileIDs else S_OK()
        if not res["OK"]:
            res["Message"] = "Failed to set entries Done in FileToCat: %s" % res[
                "Message"]
            return res

        # This entries could also be set to Failed, but currently there is no method to do so.
        res = self.transferDB.setRegistrationDone(
            channelID, regForgetFileIDs) if regForgetFileIDs else S_OK()
        if not res["OK"]:
            res["Message"] = "Failed to set entries Done in FileToCat: %s" % res[
                "Message"]
            return res

        return S_OK()

    @classmethod
    def missingSource(cls, failReason):
        """ check if message sent by FTS server is concering missing source file

    :param str failReason: message sent by FTS server
    """
        for error in cls.missingSourceErrors:
            if error.search(failReason):
                return 1
        return 0
class SystemAdministratorIntegrator( object ):

  def __init__( self, **kwargs ):
    """ Constructor  
    """
    if 'hosts' in kwargs:
      self.__hosts = kwargs['hosts']
      del kwargs['hosts']
    else:  
      result = Registry.getHosts()
      if result['OK']:
        self.__hosts = result['Value']
      else:
        self.__hosts = []
      # Excluded hosts
      if 'exclude' in kwargs:
        self.__hosts = list ( set( self.__hosts ) - set( kwargs[ 'exclude' ] ) )

    # Ping the hosts to remove those that don't have a SystemAdministrator service
    sysAdminHosts = []
    self.silentHosts = []
    self.__resultDict = {}
    self.__kwargs = {}
    pool = ThreadPool( len( self.__hosts ) )
    for host in self.__hosts:
      pool.generateJobAndQueueIt( self.__executeClient,
                                         args = [ host, "ping" ],
                                         kwargs = {},
                                         oCallback = self.__processResult )

    pool.processAllResults()
    for host, result in self.__resultDict.items():
      if result['OK']:
        sysAdminHosts.append( host )
      else:
        self.silentHosts.append( host )
    del pool

    self.__hosts = sysAdminHosts
      
    self.__kwargs = dict( kwargs )  
    self.__pool = ThreadPool( len( self.__hosts ) )  
    self.__resultDict = {}

  def getSilentHosts( self ):
    """ Get a list of non-responding hosts
    :return: list of hosts
    """
    return self.silentHosts

  def getRespondingHosts( self ):
    """ Get a list of responding hosts
    :return: list of hosts
    """
    return self.__hosts

  def __getattr__( self, name ):
    self.call = name
    return self.execute

  def __executeClient( self, host, method, *parms, **kwargs ):
    """ Execute RPC method on a given host 
    """        
    hostName = Registry.getHostOption( host, 'Host', host)
    client = SystemAdministratorClient( hostName, **self.__kwargs )
    result = getattr( client, method )( *parms, **kwargs )
    result['Host'] = host   
    return result
    
  def __processResult( self, id_, result ):
    """ Collect results in the final structure
    """
    host = result['Host']
    del result['Host']
    self.__resultDict[host] = result  
       
  def execute(self, *args, **kwargs ):
    """ Main execution method
    """
    self.__resultDict = {}
    for host in self.__hosts:
      self.__pool.generateJobAndQueueIt( self.__executeClient,
                                         args = [ host, self.call ] + list(args),
                                         kwargs = kwargs,
                                         oCallback = self.__processResult )
    
    self.__pool.processAllResults()
    return S_OK( self.__resultDict )
Beispiel #19
0
class OutputDataExecutor:
    def __init__(self, csPath=""):
        self.log = gLogger.getSubLogger("OutputDataExecutor")
        if not csPath:
            vo = gConfig.getValue("/DIRAC/VirtualOrganization", "")
            self.__transfersCSPath = '/Operations/%s/OutputData' % vo
        else:
            self.__transfersCSPath = csPath
        self.log.verbose("Reading transfer paths from %s" %
                         self.__transfersCSPath)
        self.__requiredCSOptions = [
            'InputPath', 'InputFC', 'OutputPath', 'OutputFC', 'OutputSE'
        ]

        self.__threadPool = ThreadPool(
            gConfig.getValue("%s/MinTransfers" % self.__transfersCSPath, 1),
            gConfig.getValue("%s/MaxTransfers" % self.__transfersCSPath, 4),
            gConfig.getValue("%s/MaxQueuedTransfers" % self.__transfersCSPath,
                             100))
        self.__threadPool.daemonize()
        self.__processingFiles = set()
        self.__okTransferredFiles = 0
        self.__okTransferredBytes = 0
        self.__failedFiles = {}

    def getNumOKTransferredFiles(self):
        return self.__okTransferredFiles

    def getNumOKTransferredBytes(self):
        return self.__okTransferredBytes

    def transfersPending(self):
        return self.__threadPool.isWorking()

    def getDefinedTransferPaths(self):
        result = gConfig.getSections(self.__transfersCSPath)
        if not result['OK']:
            self.log.info('No Input/Output Pair defined in CS')
            return S_OK()

        pathList = result['Value']

        tPaths = {}
        for name in pathList:
            csPath = self.__transfersCSPath + '/%s' % name
            result = gConfig.getOptionsDict(csPath)
            if not result['OK']:
                continue
            transferDict = result['Value']
            ok = True
            for i in self.__requiredCSOptions:
                if i not in transferDict:
                    self.log.error('Missing Option %s in %s' % (i, csPath))
                    ok = False
                    break
            if not ok:
                continue
            tPaths[name] = transferDict

        return S_OK(tPaths)

    def getNumLocalOutgoingFiles(self):
        result = self.getDefinedTransferPaths()
        if not result['OK']:
            return 0
        localOutgoing = 0
        tPaths = result['Value']
        for name in tPaths:
            transferDict = tPaths[name]
            if 'LocalDisk' != transferDict['InputFC']:
                continue
            localOutgoing += len(self.getOutgoingFiles(transferDict))
        return localOutgoing

    def getOutgoingFiles(self, transferDict):
        """
    Get list of files to be processed from InputPath
    """
        inputFCName = transferDict['InputFC']
        inputPath = transferDict['InputPath']

        if inputFCName == 'LocalDisk':
            files = []
            try:
                for file in os.listdir(inputPath):
                    if os.path.isfile(os.path.join(inputPath, file)):
                        files.append(file)
            except:
                pass
            return files

        inputFC = FileCatalog([inputFCName])
        result = inputFC.listDirectory(inputPath, True)

        if not result['OK']:
            self.log.error(result['Message'])
            return []
        if not inputPath in result['Value']['Successful']:
            self.log.error(result['Value']['Failed'][inputPath])
            return []

        subDirs = result['Value']['Successful'][inputPath]['SubDirs']
        files = result['Value']['Successful'][inputPath]['Files']
        for dir in subDirs:
            self.log.info('Ignoring subdirectory:', dir)
        return files.keys()

    def checkForTransfers(self):
        """
    Check for transfers to do and start them
    """
        result = self.getDefinedTransferPaths()
        if not result['OK']:
            return result
        tPaths = result['Value']
        for name in tPaths:
            transferPath = tPaths[name]
            self.log.verbose("Checking %s transfer path" % name)
            filesToTransfer = self.getOutgoingFiles(tPaths[name])
            self.log.info("Transfer path %s has %d files" %
                          (name, len(filesToTransfer)))
            ret = self.__addFilesToThreadPool(filesToTransfer, transferPath)
            if not ret['OK']:
                # The thread pool got full
                break

    def processAllPendingTransfers(self):
        self.__threadPool.processAllResults()

    @transferSync
    def __addFilesToThreadPool(self, files, transferDict):
        for file in files:
            file = os.path.basename(file)
            if file in self.__processingFiles:
                continue
            self.__processingFiles.add(file)
            time.sleep(1)
            ret = self.__threadPool.generateJobAndQueueIt(
                self.__transferIfNotRegistered,
                args=(file, transferDict),
                oCallback=self.transferCallback,
                blocking=False)
            if not ret['OK']:
                # The thread pool got full
                return ret
        return S_OK()

    def __transferIfNotRegistered(self, file, transferDict):
        result = self.isRegisteredInOutputCatalog(file, transferDict)
        if not result['OK']:
            self.log.error(result['Message'])
            return result
        #Already registered. Need to delete
        if result['Value']:
            self.log.info(
                "Transfer file %s is already registered in the output catalog"
                % file)
            #Delete
            filePath = os.path.join(transferDict['InputPath'], file)
            if transferDict['InputFC'] == 'LocalDisk':
                os.unlink(filePath)
            else:
                inputFC = FileCatalog([transferDict['InputFC']])
                replicaDict = inputFC.getReplicas(filePath)
                if not replicaDict['OK']:
                    self.log.error("Error deleting file",
                                   replicaDict['Message'])
                elif not inFile in replicaDict['Value']['Successful']:
                    self.log.error("Error deleting file",
                                   replicaDict['Value']['Failed'][inFile])
                else:
                    seList = replicaDict['Value']['Successful'][inFile].keys()
                    for se in seList:
                        se = StorageElement(se)
                        self.log.info('Removing from %s:' % se.name, inFile)
                        se.removeFile(inFile)
                    inputFC.removeFile(file)
            self.log.info("File %s deleted from %s" %
                          (file, transferDict['InputFC']))
            self.__processingFiles.discard(file)
            return S_OK(file)
        #Do the transfer
        return self.__retrieveAndUploadFile(file, transferDict)

    def isRegisteredInOutputCatalog(self, file, transferDict):
        fc = FileCatalog([transferDict['OutputFC']])
        lfn = os.path.join(transferDict['OutputPath'], os.path.basename(file))
        result = fc.getReplicas(lfn)
        if not result['OK']:
            return result
        if lfn not in result['Value']['Successful']:
            return S_OK(False)
        replicas = result['Value']['Successful'][lfn]
        for seName in List.fromChar(transferDict['OutputSE'], ","):
            if seName in replicas:
                self.log.verbose(
                    "Transfer file %s is already registered in %s SE" %
                    (file, seName))
                return S_OK(True)
        return S_OK(False)

    def __retrieveAndUploadFile(self, file, outputDict):
        """
    Retrieve, Upload, and remove
    """
        fileName = file
        inputPath = outputDict['InputPath']
        inputFCName = outputDict['InputFC']
        inBytes = 0
        if inputFCName == 'LocalDisk':
            inFile = file
            file = os.path.join(inputPath, file)
        else:
            inputFC = FileCatalog([inputFCName])

            inFile = os.path.join(inputPath, file)
            replicaDict = inputFC.getReplicas(inFile)
            if not replicaDict['OK']:
                self.log.error(replicaDict['Message'])
                return S_ERROR(fileName)
            if not inFile in replicaDict['Value']['Successful']:
                self.log.error(replicaDict['Value']['Failed'][inFile])
                return S_ERROR(fileName)
            seList = replicaDict['Value']['Successful'][inFile].keys()

            inputSE = StorageElement(seList[0])
            self.log.info('Retrieving from %s:' % inputSE.name, inFile)
            # ret = inputSE.getFile( inFile )
            # lcg_util binding prevent multithreading, use subprocess instead
            res = pythonCall(2 * 3600, inputSE.getFile, inFile)
            if not res['OK']:
                self.log.error(res['Message'])
                return S_ERROR(fileName)
            ret = res['Value']
            if not ret['OK']:
                self.log.error(ret['Message'])
                return S_ERROR(fileName)
            if not inFile in ret['Value']['Successful']:
                self.log.error(ret['Value']['Failed'][inFile])
                return S_ERROR(fileName)

        if os.path.isfile(file):
            inBytes = os.stat(file)[6]

        outputPath = outputDict['OutputPath']
        outputFCName = outputDict['OutputFC']
        replicaManager = ReplicaManager()
        outFile = os.path.join(outputPath, os.path.basename(file))
        transferOK = False
        for outputSEName in List.fromChar(outputDict['OutputSE'], ","):
            outputSE = StorageElement(outputSEName)
            self.log.info('Trying to upload to %s:' % outputSE.name, outFile)
            # ret = replicaManager.putAndRegister( outFile, os.path.realpath( file ), outputSE.name, catalog=outputFCName )
            # lcg_util binding prevent multithreading, use subprocess instead
            result = pythonCall(2 * 3600,
                                replicaManager.putAndRegister,
                                outFile,
                                os.path.realpath(file),
                                outputSE.name,
                                catalog=outputFCName)
            if result['OK'] and result['Value']['OK']:
                if outFile in result['Value']['Value']['Successful']:
                    transferOK = True
                    break
                else:
                    self.log.error(result['Value']['Value']['Failed'][outFile])
            else:
                if result['OK']:
                    self.log.error(result['Value']['Message'])
                else:
                    self.log.error(result['Message'])

        if not transferOK:
            return S_ERROR(fileName)

        if result['OK'] or not inputFCName == 'LocalDisk':
            os.unlink(file)

        if not result['OK']:
            self.log.error(ret['Message'])
            return S_ERROR(fileName)

        self.log.info("Finished transferring %s [%s bytes]" %
                      (inFile, inBytes))
        self.__okTransferredFiles += 1
        self.__okTransferredBytes += inBytes

        if inputFCName == 'LocalDisk':
            return S_OK(fileName)

        # Now the file is on final SE/FC, remove from input SE/FC
        for se in seList:
            se = StorageElement(se)
            self.log.info('Removing from %s:' % se.name, inFile)
            se.removeFile(inFile)

        inputFC.removeFile(inFile)

        return S_OK(fileName)

    @transferSync
    def transferCallback(self, threadedJob, submitResult):
        if not submitResult['OK']:
            file = submitResult['Message']
            if file not in self.__failedFiles:
                self.__failedFiles[file] = 0
            self.__failedFiles[file] += 1
        else:
            file = submitResult['Value']
            if file in self.__failedFiles:
                del self.__failedFiles[file]
        #Take out from processing files
        if file in self.__processingFiles:
            self.__processingFiles.discard(file)