コード例 #1
0
class StorageFactory:

  def __init__( self, useProxy = False, vo = None ):
    self.rootConfigPath = '/Resources/StorageElements'
    self.proxy = False
    self.proxy = useProxy
    self.resourceStatus = ResourceStatus()
    self.vo = vo
    if self.vo is None:
      result = getVOfromProxyGroup()
      if result['OK']:
        self.vo = result['Value']
      else:
        RuntimeError( "Can not get the current VO context" )
    self.remotePlugins = []
    self.localPlugins = []
    self.name = ''
    self.options = {}
    self.protocolDetails = []
    self.storages = []

  ###########################################################################################
  #
  # Below are public methods for obtaining storage objects
  #

  def getStorageName( self, initialName ):
    return self._getConfigStorageName( initialName, 'Alias' )

  def getStorage( self, parameterDict ):
    """ This instantiates a single storage for the details provided and doesn't check the CS.
    """
    # The storage name must be supplied.
    if parameterDict.has_key( 'StorageName' ):
      storageName = parameterDict['StorageName']
    else:
      errStr = "StorageFactory.getStorage: StorageName must be supplied"
      gLogger.error( errStr )
      return S_ERROR( errStr )

    # PluginName must be supplied otherwise nothing with work.
    if parameterDict.has_key( 'PluginName' ):
      pluginName = parameterDict['PluginName']
    # Temporary fix for backward compatibility
    elif parameterDict.has_key( 'ProtocolName' ):
      pluginName = parameterDict['ProtocolName']
    else:
      errStr = "StorageFactory.getStorage: PluginName must be supplied"
      gLogger.error( errStr )
      return S_ERROR( errStr )

    return self.__generateStorageObject( storageName, pluginName, parameterDict )

  def getStorages( self, storageName, pluginList = None ):
    """ Get an instance of a Storage based on the DIRAC SE name based on the CS entries CS

        'storageName' is the DIRAC SE name i.e. 'CERN-RAW'
        'pluginList' is an optional list of protocols if a sub-set is desired i.e ['SRM2','SRM1']
    """
    self.remotePlugins = []
    self.localPlugins = []
    self.name = ''
    self.options = {}
    self.protocolDetails = []
    self.storages = []
    if pluginList is None:
      pluginList = []
    if not self.vo:
      return S_ERROR( 'Mandatory vo parameter is not defined' )

    # Get the name of the storage provided
    res = self._getConfigStorageName( storageName, 'Alias' )
    if not res['OK']:
      return res
    storageName = res['Value']
    self.name = storageName

    # In case the storage is made from a base SE, get this information
    res = self._getConfigStorageName( storageName, 'BaseSE' )
    if not res['OK']:
      return res
    storageName = res['Value']

    # Get the options defined in the CS for this storage
    res = self._getConfigStorageOptions( storageName )
    if not res['OK']:
      return res
    self.options = res['Value']

    # Get the protocol specific details
    res = self._getConfigStorageProtocols( storageName )
    if not res['OK']:
      return res
    self.protocolDetails = res['Value']

    requestedLocalPlugins = []
    requestedRemotePlugins = []
    requestedProtocolDetails = []
    turlProtocols = []
    # Generate the protocol specific plug-ins
    for protocolDict in self.protocolDetails:
      pluginName = protocolDict.get( 'PluginName' )
      if pluginList and pluginName not in pluginList:
        continue
      protocol = protocolDict['Protocol']
      result = self.__generateStorageObject( storageName, pluginName, protocolDict )
      if result['OK']:
        self.storages.append( result['Value'] )
        if pluginName in self.localPlugins:
          turlProtocols.append( protocol )
          requestedLocalPlugins.append( pluginName )
        if pluginName in self.remotePlugins:
          requestedRemotePlugins.append( pluginName )
        requestedProtocolDetails.append( protocolDict )
      else:
        gLogger.info( result['Message'] )

    if len( self.storages ) > 0:
      resDict = {}
      resDict['StorageName'] = self.name
      resDict['StorageOptions'] = self.options
      resDict['StorageObjects'] = self.storages
      resDict['LocalPlugins'] = requestedLocalPlugins
      resDict['RemotePlugins'] = requestedRemotePlugins
      resDict['ProtocolOptions'] = requestedProtocolDetails
      resDict['TurlProtocols'] = turlProtocols
      return S_OK( resDict )
    else:
      errStr = "StorageFactory.getStorages: Failed to instantiate any storage protocols."
      gLogger.error( errStr, self.name )
      return S_ERROR( errStr )
  ###########################################################################################
  #
  # Below are internal methods for obtaining section/option/value configuration
  #

  def _getConfigStorageName( self, storageName, referenceType ):
    """
      This gets the name of the storage the configuration service.
      If the storage is a reference to another SE the resolution is performed.

      'storageName' is the storage section to check in the CS
    """
    configPath = '%s/%s' % ( self.rootConfigPath, storageName )
    res = gConfig.getOptions( configPath )
    if not res['OK']:
      errStr = "StorageFactory._getConfigStorageName: Failed to get storage options"
      gLogger.error( errStr, res['Message'] )
      return S_ERROR( errStr )
    if not res['Value']:
      errStr = "StorageFactory._getConfigStorageName: Supplied storage doesn't exist."
      gLogger.error( errStr, configPath )
      return S_ERROR( errStr )
    if referenceType in res['Value']:
      configPath = cfgPath( self.rootConfigPath, storageName, referenceType )
      referenceName = gConfig.getValue( configPath )
      result = self._getConfigStorageName( referenceName, 'Alias' )
      if not result['OK']:
        return result
      resolvedName = result['Value']
    else:
      resolvedName = storageName
    return S_OK( resolvedName )

  def _getConfigStorageOptions( self, storageName ):
    """ Get the options associated to the StorageElement as defined in the CS
    """
    storageConfigPath = cfgPath( self.rootConfigPath, storageName )
    res = gConfig.getOptions( storageConfigPath )
    if not res['OK']:
      errStr = "StorageFactory._getStorageOptions: Failed to get storage options."
      gLogger.error( errStr, "%s: %s" % ( storageName, res['Message'] ) )
      return S_ERROR( errStr )
    options = res['Value']
    optionsDict = {}
    for option in options:

      if option in [ 'ReadAccess', 'WriteAccess', 'CheckAccess', 'RemoveAccess']:
        continue
      optionConfigPath = cfgPath( storageConfigPath, option )
      if option in [ 'VO' ]:
        optionsDict[option] = gConfig.getValue( optionConfigPath, [] )
      else:
        optionsDict[option] = gConfig.getValue( optionConfigPath, '' )

    res = self.resourceStatus.getStorageElementStatus( storageName )
    if not res[ 'OK' ]:
      errStr = "StorageFactory._getStorageOptions: Failed to get storage status"
      gLogger.error( errStr, "%s: %s" % ( storageName, res['Message'] ) )
      return S_ERROR( errStr )

    # For safety, we did not add the ${statusType}Access keys
    # this requires modifications in the StorageElement class

    # We add the dictionary with the statusTypes and values
    # { 'statusType1' : 'status1', 'statusType2' : 'status2' ... }
    optionsDict.update( res[ 'Value' ][ storageName ] )

    return S_OK( optionsDict )

  def _getConfigStorageProtocols( self, storageName ):
    """ Protocol specific information is present as sections in the Storage configuration
    """
    storageConfigPath = cfgPath( self.rootConfigPath, storageName )
    res = gConfig.getSections( storageConfigPath )
    if not res['OK']:
      errStr = "StorageFactory._getConfigStorageProtocols: Failed to get storage sections"
      gLogger.error( errStr, "%s: %s" % ( storageName, res['Message'] ) )
      return S_ERROR( errStr )
    protocolSections = res['Value']
    sortedProtocolSections = sortList( protocolSections )
    protocolDetails = []
    for protocolSection in sortedProtocolSections:
      res = self._getConfigStorageProtocolDetails( storageName, protocolSection )
      if not res['OK']:
        return res
      protocolDetails.append( res['Value'] )
    return S_OK( protocolDetails )

  def _getConfigStorageProtocolDetails( self, storageName, protocolSection ):
    """
      Parse the contents of the protocol block
    """
    # First obtain the options that are available
    protocolConfigPath = cfgPath( self.rootConfigPath, storageName, protocolSection )
    res = gConfig.getOptions( protocolConfigPath )
    if not res['OK']:
      errStr = "StorageFactory.__getProtocolDetails: Failed to get protocol options."
      gLogger.error( errStr, "%s: %s" % ( storageName, protocolSection ) )
      return S_ERROR( errStr )
    options = res['Value']

    # We must have certain values internally even if not supplied in CS
    protocolDict = {'Access':'', 'Host':'', 'Path':'', 'Port':'', 'Protocol':'', 'PluginName':'', 'SpaceToken':'', 'WSUrl':''}
    for option in options:
      configPath = cfgPath( protocolConfigPath, option )
      optionValue = gConfig.getValue( configPath, '' )
      protocolDict[option] = optionValue

    # This is a temporary for backward compatibility
    if "ProtocolName" in protocolDict and not protocolDict['PluginName']:
      protocolDict['PluginName'] = protocolDict['ProtocolName']
    protocolDict.pop( 'ProtocolName', None )

    # Evaluate the base path taking into account possible VO specific setting
    if self.vo:
      result = gConfig.getOptionsDict( cfgPath( protocolConfigPath, 'VOPath' ) )
      voPath = ''
      if result['OK']:
        voPath = result['Value'].get( self.vo, '' )
      if voPath:
        protocolDict['Path'] = voPath

    # Now update the local and remote protocol lists.
    # A warning will be given if the Access option is not set.
    if protocolDict['Access'].lower() == 'remote':
      self.remotePlugins.append( protocolDict['PluginName'] )
    elif protocolDict['Access'].lower() == 'local':
      self.localPlugins.append( protocolDict['PluginName'] )
    else:
      errStr = "StorageFactory.__getProtocolDetails: The 'Access' option for %s:%s is neither 'local' or 'remote'." % ( storageName, protocolSection )
      gLogger.warn( errStr )

    # The PluginName option must be defined
    if not protocolDict['PluginName']:
      errStr = "StorageFactory.__getProtocolDetails: 'PluginName' option is not defined."
      gLogger.error( errStr, "%s: %s" % ( storageName, protocolSection ) )
      return S_ERROR( errStr )

    return S_OK( protocolDict )

  ###########################################################################################
  #
  # Below is the method for obtaining the object instantiated for a provided storage configuration
  #

  def __generateStorageObject( self, storageName, pluginName, parameters ):

    storageType = pluginName
    if self.proxy:
      storageType = 'Proxy'

    objectLoader = ObjectLoader()
    result = objectLoader.loadObject( 'Resources.Storage.%sStorage' % storageType, storageType + 'Storage' )
    if not result['OK']:
      gLogger.error( 'Failed to load storage object: %s' % result['Message'] )
      return result

    storageClass = result['Value']
    try:
      storage = storageClass( storageName, parameters )
    except Exception, x:
      errStr = "StorageFactory._generateStorageObject: Failed to instantiate %s: %s" % ( storageName, x )
      gLogger.exception( errStr )
      return S_ERROR( errStr )

    return S_OK( storage )
コード例 #2
0
ファイル: dirac-dms-show-se-status.py プロジェクト: bmb/DIRAC
storageCFGBase = "/Resources/StorageElements"

res = gConfig.getSections( storageCFGBase, True )
if not res[ 'OK' ]:
  gLogger.error( 'Failed to get storage element info' )
  gLogger.error( res[ 'Message' ] )
  DIRAC.exit( -1 )
  
gLogger.info( "%s %s %s" % ( 'Storage Element'.ljust( 25 ), 'Read Status'.rjust( 15 ), 'Write Status'.rjust( 15 ) ) )

seList = sortList( res[ 'Value' ] )

resourceStatus = ResourceStatus()
 
res    = resourceStatus.getStorageElementStatus( seList )
if not res[ 'OK' ]:
  gLogger.error( "Failed to get StorageElement status for %s" % str( seList ) )

for k,v in res[ 'Value' ].items():
  
  readState, writeState = 'Active', 'Active'
  
  if v.has_key( 'Read' ):
    readState = v[ 'Read' ]  
  
  if v.has_key( 'Write' ):
    writeState = v[ 'Write']
  gLogger.notice("%s %s %s" % ( k.ljust(25),readState.rjust(15),writeState.rjust(15)) )

コード例 #3
0
ファイル: FTS3Placement.py プロジェクト: CinziaLu/DIRAC
class FTS3Placement( FTSAbstractPlacement ):

  """
  This class manages all the FTS strategies, routes and what not
  """


  __serverPolicy = "Random"
  __nextServerID = 0
  __serverList = None
  __maxAttempts = 0


  def __init__( self, csPath = None, ftsHistoryViews = None ):
    """
        Call the init of the parent, and initialize the list of FTS3 servers
    """

    self.log = gLogger.getSubLogger( "FTS3Placement" )
    super( FTS3Placement, self ).__init__( csPath = csPath, ftsHistoryViews = ftsHistoryViews )
    srvList = getFTS3Servers()
    if not srvList['OK']:
      self.log.error( srvList['Message'] )

    self.__serverList = srvList.get( 'Value', [] )
    self.maxAttempts = len( self.__serverList )

    self.rssClient = ResourceStatus()



  def getReplicationTree( self, sourceSEs, targetSEs, size, strategy = None ):
    """ For multiple source to multiple destination, find the optimal replication
        strategy.

       :param sourceSEs : list of source SE
       :param targetSEs : list of destination SE
       :param size : size of the File
       :param strategy : which strategy to use

       :returns S_OK(dict) < route name :  { dict with key Ancestor, SourceSE, TargetSEtargetSE, Strategy } >

       For the time being, we are waiting for FTS3 to provide advisory mechanisms. So we just use
       simple techniques
    """

    # We will use a single random source
    sourceSE = random.choice( sourceSEs )

    tree = {}
    for targetSE in targetSEs:
      tree["%s#%s" % ( sourceSE, targetSE )] = { "Ancestor" : False, "SourceSE" : sourceSE,
                           "TargetSE" : targetSE, "Strategy" : "FTS3Simple" }

    return S_OK( tree )



  def refresh( self, ftsHistoryViews ):
    """
    Refresh, whatever that means... recalculate all what you need,
    fetches the latest conf and what not.
    """
    return super( FTS3Placement, self ).refresh( ftsHistoryViews = ftsHistoryViews )



  def __failoverServerPolicy(self, attempt = 0):
    """
       Returns always the server at a given position (normally the first one)

       :param attempt: position of the server in the list
    """
    if attempt >= len( self.__serverList ):
      raise Exception( "FTS3Placement.__failoverServerPolicy: attempt to reach non existing server index" )

    return self.__serverList[attempt]

  def __sequenceServerPolicy( self ):
    """
       Every time the this policy is called, return the next server on the list
    """

    fts3server = self.__serverList[self.__nextServerID]
    self.__nextServerID = ( self.__nextServerID + 1 ) % len( self.__serverList )
    return fts3server

  def __randomServerPolicy(self):
    """
      return a random server from the list
    """
    return random.choice( self.__serverList )


  def __chooseFTS3Server( self ):
    """
      Choose the appropriate FTS3 server depending on the policy
    """

    fts3Server = None
    attempt = 0
    # FIXME : need to get real valeu from RSS
    ftsServerStatus = True

    while not fts3Server and attempt < self.maxAttempts:
      if self.__serverPolicy == 'Random':
        fts3Server = self.__randomServerPolicy()
      elif self.__serverPolicy == 'Sequence':
        fts3Server = self.__sequenceServerPolicy()
      elif self.__serverPolicy == 'Failover':
        fts3Server = self.__failoverServerPolicy( attempt = attempt )
      else:
        self.log.error( 'Unknown server policy %s. Using Random instead' % self.__serverPolicy )
        fts3Server = self.__randomServerPolicy()

      if not ftsServerStatus:
        self.log.warn( 'FTS server %s is not in good shape. Choose another one' % fts3Server )
        fts3Server = None
      attempt += 1

        # FIXME : I need to get the FTS server status from RSS
#       ftsStatusFromRss = rss.ftsStatusOrSomethingLikeThat

    if fts3Server:
      return S_OK( fts3Server )

    return S_ERROR ( "Could not find an FTS3 server (max attempt reached)" )

  def findRoute( self, sourceSE, targetSE ):
    """ Find the appropriate route from point A to B
      :param sourceSE : source SE
      :param targetSE : destination SE

      :returns S_OK(FTSRoute)

    """

    fts3server = self.__chooseFTS3Server()

    if not fts3server['OK']:
      return fts3server

    fts3server = fts3server['Value']

    route = FTSRoute( sourceSE, targetSE, fts3server )

    return S_OK( route )

  def isRouteValid( self, route ):
    """
        FIXME: until RSS is ready, I check manually the status
        In FTS3, all routes are valid a priori.
        If a route was not valid for some reason, then FTS would know it
        thanks to the blacklist sent by RSS, and would deal with it itself.
       :param route : FTSRoute

       :returns S_OK or S_ERROR(reason)
    """
    
    rAccess = self.rssClient.getStorageElementStatus( route.sourceSE, "ReadAccess" )
    self.log.debug( "se read %s %s" % ( route.sourceSE, rAccess ) )
    if not rAccess["OK"]:
      self.log.error( rAccess["Message"] )
      return rAccess

    if rAccess["Value"][route.sourceSE]["ReadAccess"] not in ( "Active", "Degraded" ):
      return S_ERROR( "Source SE is not readable" )

    wAccess = self.rssClient.getStorageElementStatus( route.targetSE, "WriteAccess" )
    self.log.debug( "se write %s %s" % ( route.targetSE, wAccess ) )
    if not wAccess["OK"]:
      self.log.error( wAccess["Message"] )
      return wAccess
    if wAccess["Value"][route.targetSE]["WriteAccess"] not in ( "Active", "Degraded" ):
      return S_ERROR( "Target SE is not writable" )

    return S_OK()
コード例 #4
0
ファイル: StrategyHandler.py プロジェクト: jemtchou/DIRAC
class StrategyHandler( object ):
  """
  .. class:: StrategyHandler

  StrategyHandler is a helper class for determining optimal replication tree for given
  source files, their replicas and target storage elements.
  """

  def __init__( self, configSection, channels=None, bandwidths=None, failedFiles=None ):
    """c'tor

    :param self: self reference
    :param str configSection: path on CS to ReplicationScheduler agent
    :param bandwithds: observed throughput on active channels
    :param channels: active channels
    :param int failedFiles: max number of distinct failed files to allow scheduling
    """
    ## save config section
    self.configSection = configSection + "/" + self.__class__.__name__
    ## 

    ## sublogger
    self.log = gLogger.getSubLogger( "StrategyHandler", child=True )
    self.log.setLevel( gConfig.getValue( self.configSection + "/LogLevel", "DEBUG"  ) )
  
    self.supportedStrategies = [ 'Simple', 'DynamicThroughput', 'Swarm', 'MinimiseTotalWait' ]
    self.log.info( "Supported strategies = %s" % ", ".join( self.supportedStrategies ) )
  
    self.sigma = gConfig.getValue( self.configSection + '/HopSigma', 0.0 )
    self.log.info( "HopSigma = %s" % self.sigma )
    self.schedulingType = gConfig.getValue( self.configSection + '/SchedulingType', 'File' )
    self.log.info( "SchedulingType = %s" % self.schedulingType )
    self.activeStrategies = gConfig.getValue( self.configSection + '/ActiveStrategies', ['MinimiseTotalWait'] )
    self.log.info( "ActiveStrategies = %s" % ", ".join( self.activeStrategies ) )
    self.numberOfStrategies = len( self.activeStrategies )
    self.log.info( "Number of active strategies = %s" % self.numberOfStrategies )
    self.acceptableFailureRate = gConfig.getValue( self.configSection + '/AcceptableFailureRate', 75 )
    self.log.info( "AcceptableFailureRate = %s" % self.acceptableFailureRate )
    self.acceptableFailedFiles = gConfig.getValue( self.configSection + "/AcceptableFailedFiles", 5 )
    self.log.info( "AcceptableFailedFiles = %s" % self.acceptableFailedFiles )
    self.rwUpdatePeriod = gConfig.getValue( self.configSection + "/RssRWUpdatePeriod", 300 )
    self.log.info( "RSSUpdatePeriod = %s s" % self.rwUpdatePeriod )
    self.rwUpdatePeriod = datetime.timedelta( seconds=self.rwUpdatePeriod )
    ## bandwithds
    self.bandwidths = bandwidths if bandwidths else {}
    ## channels
    self.channels = channels if channels else {}
    ## distinct failed files per channel 
    self.failedFiles = failedFiles if failedFiles else {}
    ## chosen strategy
    self.chosenStrategy = 0
    ## fts graph
    self.ftsGraph = None
    ## timestamp for last update
    self.lastRssUpdate = datetime.datetime.now()    
    # dispatcher
    self.strategyDispatcher = { "MinimiseTotalWait" : self.minimiseTotalWait, 
                                "DynamicThroughput" : self.dynamicThroughput,
                                "Simple" : self.simple, 
                                "Swarm" : self.swarm }
    ## own RSS client
    self.resourceStatus = ResourceStatus()
    ## create fts graph
    ftsGraph = self.setup( self.channels, self.bandwidths, self.failedFiles )    
    if not ftsGraph["OK"]:
      raise SHGraphCreationError( ftsGraph["Message"] )
    self.log.info("%s has been constructed" % self.__class__.__name__ )

  def setup( self, channels, bandwithds, failedFiles ):
    """ prepare fts graph 

    :param dict channels: { channelID : { "Files" : long , Size = long, "ChannelName" : str, 
                                          "Source" : str, "Destination" : str , "ChannelName" : str, "Status" : str  } }
    :param dict bandwidths: { channelID { "Throughput" : float, "Fileput" : float, "SucessfulFiles" : long, "FailedFiles" : long  } }
    :param dict failedFiles: { channelID : int }

    channelInfo { channelName : { "ChannelID" : int, "TimeToStart" : float} }  
    """
    graph = FTSGraph( "sites" )
   
    result = getStorageElementSiteMapping()
    if not result['OK']:
      return result
    sitesDict = result['Value']

    ## create nodes 
    for site, ses in sitesDict.items():
      rwDict = self.__getRWAccessForSE( ses )
      if not rwDict["OK"]:
        return rwDict
      siteName = site
      if '.' in site:
        siteName = site.split('.')[1]  
      graph.addNode( LCGSite( siteName, { "SEs" : rwDict["Value"] } ) )
    ## channels { channelID : { "Files" : long , Size = long, "ChannelName" : str, 
    ##                          "Source" : str, "Destination" : str , 
    ##                          "ChannelName" : str, "Status" : str  } }
    ## bandwidths { channelID { "Throughput" : float, "Fileput" : float, 
    ##                           "SucessfulFiles" : long, "FailedFiles" : long  } }
    ## channelInfo { channelName : { "ChannelID" : int, "TimeToStart" : float} }
    for channelID, channelDict in channels.items():
      sourceName = channelDict["Source"]
      destName = channelDict["Destination"]
      fromNode = graph.getNode( sourceName )
      toNode = graph.getNode( destName )
      if fromNode and toNode:  
        rwAttrs = { "status" : channels[channelID]["Status"], 
                    "files" : channelDict["Files"],
                    "size" : channelDict["Size"],
                    "successfulAttempts" : bandwithds[channelID]["SuccessfulFiles"], 
                    "failedAttempts" : bandwithds[channelID]["FailedFiles"], 
                    "distinctFailedFiles" : failedFiles.get( channelID, 0 ),
                    "fileput" : bandwithds[channelID]["Fileput"], 
                    "throughput" : bandwithds[channelID]["Throughput"] }
        roAttrs = { "channelID" : channelID,
                    "channelName" : channelDict["ChannelName"],
                    "acceptableFailureRate" : self.acceptableFailureRate,
                    "acceptableFailedFiles" : self.acceptableFailedFiles,
                    "schedulingType" : self.schedulingType }
        ftsChannel = FTSChannel( fromNode, toNode, rwAttrs, roAttrs )
        graph.addEdge( ftsChannel ) 
    self.ftsGraph = graph
    self.lastRssUpdate = datetime.datetime.now()
    return S_OK()

  def updateGraph( self, rwAccess=False, replicationTree=None, size=0.0 ):
    """ update rw access for nodes (sites) and size anf files for edges (channels) """
    replicationTree = replicationTree if replicationTree else {}
    size = size if size else 0.0
    ## update nodes rw access for SEs
    if rwAccess:
      for lcgSite in self.ftsGraph.nodes():
        rwDict = self.__getRWAccessForSE( lcgSite.SEs.keys() )
        if not rwDict["OK"]:
          return rwDict
        lcgSite.SEs = rwDict["Value"]
    ## update channels size and files
    if replicationTree:
      for channel in self.ftsGraph.edges():
        if channel.channelID in replicationTree:
          channel.size += size 
          channel.files += 1
    return S_OK()
          
  def simple( self, sourceSEs, targetSEs ):
    """ simple strategy - one source, many targets

    :param list sourceSEs: list with only one sourceSE name
    :param list targetSEs: list with target SE names
    :param str lfn: logical file name
    :param dict metadata: file metadata read from catalogue
    """
    ## make targetSEs list unique 
    if len(sourceSEs) != 1:
      return S_ERROR( "simple: wrong argument supplied for sourceSEs, only one sourceSE allowed" )
    sourceSE = sourceSEs[0]
    tree = {}
    for targetSE in targetSEs:
      channel = self.ftsGraph.findChannel( sourceSE, targetSE )
      if not channel["OK"]:
        return S_ERROR( channel["Message"] )
      channel = channel["Value"]
      if not channel.fromNode.SEs[sourceSE]["read"]:
        return S_ERROR( "simple: sourceSE '%s' in banned for reading rigth now" % sourceSE )
      if not channel.toNode.SEs[targetSE]["write"]:
        return S_ERROR( "simple: targetSE '%s' is banned for writing rigth now" % targetSE )
      if channel.channelID in tree:
        return S_ERROR( "simple: unable to create replication tree, channel '%s' cannot be used twice" %\
                          channel.channelName )      
      tree[channel.channelID] = { "Ancestor" : False, "SourceSE" : sourceSE, 
                                  "DestSE" : targetSE, "Strategy" : "Simple" } 

    return S_OK(tree)
    
  def swarm( self, sourceSEs, targetSEs ):
    """ swarm strategy - one target, many sources, pick up the fastest 
    
    :param list sourceSEs: list of source SE 
    :param str targetSEs: on element list with name of target SE
    :param str lfn: logical file name
    :param dict metadata: file metadata read from catalogue
    """
    tree = {}
    channels = []
    if len(targetSEs) > 1:
      return S_ERROR("swarm: wrong argument supplied for targetSEs, only one targetSE allowed")
    targetSE = targetSEs[0]
    ## find channels
    for sourceSE in sourceSEs:
      channel = self.ftsGraph.findChannel( sourceSE, targetSE )
      if not channel["OK"]:
        self.log.warn( "swarm: %s" % channel["Message"] )
        continue
      channels.append( ( sourceSE, channel["Value"] ) )      
    ## exit - no channels 
    if not channels:
      return S_ERROR("swarm: unable to find FTS channels between '%s' and '%s'" % ( ",".join(sourceSEs), targetSE ) )
    ## filter out non active channels 
    channels = [ ( sourceSE, channel ) for sourceSE, channel in channels 
                 if channel.fromNode.SEs[sourceSE]["read"] and channel.toNode.SEs[targetSE]["write"] and 
                 channel.status == "Active" and channel.timeToStart < float("inf") ]
    ## exit - no active channels 
    if not channels:
      return S_ERROR( "swarm: no active channels found between %s and %s" % ( sourceSEs, targetSE ) )
    
    ## find min timeToStart
    minTimeToStart = float("inf")
    selSourceSE = selChannel = None
    for sourceSE, ftsChannel in channels:
      if ftsChannel.timeToStart < minTimeToStart:
        minTimeToStart = ftsChannel.timeToStart
        selSourceSE = sourceSE
        selChannel = ftsChannel
    
    if not selSourceSE:
      return S_ERROR( "swarm: no active channels found between %s and %s" % ( sourceSEs, targetSE ) )

    tree[selChannel.channelID] = { "Ancestor" : False, "SourceSE" : selSourceSE,
                                   "DestSE" : targetSE, "Strategy" : "Swarm" } 
    return S_OK( tree )
          
  def minimiseTotalWait( self, sourceSEs, targetSEs ):
    """ find dag that minimises start time 
    
    :param list sourceSEs: list of avialable source SEs
    :param list targetSEs: list of target SEs
    :param str lfn: logical file name
    :param dict metadata: file metadata read from catalogue
    """
    tree = {}
    primarySources = sourceSEs
    while targetSEs:
      minTimeToStart = float("inf")
      channels = []
      for targetSE in targetSEs:
        for sourceSE in sourceSEs:
          ftsChannel = self.ftsGraph.findChannel( sourceSE, targetSE )
          if not ftsChannel["OK"]:
            self.log.warn( "minimiseTotalWait: %s" % ftsChannel["Message"] )
            continue 
          ftsChannel = ftsChannel["Value"]
          channels.append( ( ftsChannel, sourceSE, targetSE ) )
      if not channels:
        msg = "minimiseTotalWait: FTS channels between %s and %s not defined" % ( ",".join(sourceSEs), 
                                                                                  ",".join(targetSEs) )
        self.log.error( msg )
        return S_ERROR( msg )
      ## filter out already used channels 
      channels = [ (channel, sourceSE, targetSE) for channel, sourceSE, targetSE in channels 
                   if channel.channelID not in tree ]
      if not channels:
        msg = "minimiseTotalWait: all FTS channels between %s and %s are already used in tree" % ( ",".join(sourceSEs),
                                                                                                   ",".join(targetSEs) )
        self.log.error( msg )
        return S_ERROR( msg )
      
      self.log.debug("minimiseTotalWait: found %s candiate channels, checking activity" % len( channels) )
      channels = [ ( channel, sourceSE, targetSE ) for channel, sourceSE, targetSE in channels
                   if channel.fromNode.SEs[sourceSE]["read"] and channel.toNode.SEs[targetSE]["write"] 
                   and channel.status == "Active" and channel.timeToStart < float("inf") ]
      
      if not channels:
        self.log.error("minimiseTotalWait: no active FTS channels found" )
        return S_ERROR("minimiseTotalWait: no active FTS channels found" )
      
      candidates = []
      for channel, sourceSE, targetSE in channels:
        timeToStart = channel.timeToStart
        if sourceSE not in primarySources:
          timeToStart += self.sigma        
        ## local found 
        if channel.fromNode == channel.toNode:
          self.log.debug("minimiseTotalWait: found local channel '%s'" % channel.channelName )
          candidates = [ ( channel, sourceSE, targetSE ) ]
          break
        if timeToStart <= minTimeToStart:
          minTimeToStart = timeToStart
          candidates = [ ( channel, sourceSE, targetSE ) ]
        elif timeToStart == minTimeToStart:
          candidates.append( (channel, sourceSE, targetSE ) )

      if not candidates:
        return S_ERROR("minimiseTotalWait: unable to find candidate FTS channels minimising total wait time")

      random.shuffle( candidates )
      selChannel, selSourceSE, selTargetSE = candidates[0]
      ancestor = False
      for channelID, treeItem in tree.items():
        if selSourceSE in treeItem["DestSE"]:
          ancestor = channelID
      tree[selChannel.channelID] = { "Ancestor" : ancestor,
                                     "SourceSE" : selSourceSE,
                                     "DestSE" : selTargetSE,
                                     "Strategy" : "MinimiseTotalWait" }
      sourceSEs.append( selTargetSE )
      targetSEs.remove( selTargetSE )

    return S_OK(tree)        

  def dynamicThroughput( self, sourceSEs, targetSEs ):
    """ dynamic throughput - many sources, many targets - find dag that minimises overall throughput 

    :param list sourceSEs: list of available source SE names
    :param list targetSE: list of target SE names
    :param str lfn: logical file name
    :param dict metadata: file metadata read from catalogue
    """
    tree = {}
    primarySources = sourceSEs
    timeToSite = {}
    while targetSEs:
      minTimeToStart = float("inf")
      channels = []
      for targetSE in targetSEs:
        for sourceSE in sourceSEs:
          ftsChannel = self.ftsGraph.findChannel( sourceSE, targetSE )
          if not ftsChannel["OK"]:
            self.log.warn( "dynamicThroughput: %s" % ftsChannel["Message"] )
            continue 
          ftsChannel = ftsChannel["Value"]
          channels.append( ( ftsChannel, sourceSE, targetSE ) )
      ## no candidate channels found
      if not channels:
        msg = "dynamicThroughput: FTS channels between %s and %s are not defined" % ( ",".join(sourceSEs), 
                                                                                      ",".join(targetSEs) )
        self.log.error( msg )
        return S_ERROR( msg )
      ## filter out already used channels
      channels = [ (channel, sourceSE, targetSE) for channel, sourceSE, targetSE in channels 
                   if channel.channelID not in tree ]
      if not channels:
        msg = "dynamicThroughput: all FTS channels between %s and %s are already used in tree" % ( ",".join(sourceSEs), 
                                                                                                   ",".join(targetSEs) )
        self.log.error( msg )
        return S_ERROR( msg )
      ## filter out non-active channels
      self.log.debug("dynamicThroughput: found %s candidate channels, checking activity" % len(channels) )
      channels = [ ( channel, sourceSE, targetSE ) for channel, sourceSE, targetSE in channels
                   if channel.fromNode.SEs[sourceSE]["read"] and channel.toNode.SEs[targetSE]["write"] 
                   and channel.status == "Active" and channel.timeToStart < float("inf") ]
      if not channels:
        self.log.info("dynamicThroughput: active candidate channels not found")
        return S_ERROR("dynamicThroughput: no active candidate FTS channels")
      
      candidates = []
      selTimeToStart = None
      for channel, sourceSE, targetSE in channels:
        timeToStart = channel.timeToStart
        if sourceSE not in primarySources:
          timeToStart += self.sigma        
        if sourceSE in timeToSite:
          timeToStart += timeToSite[sourceSE]
        ## local found 
        if channel.fromNode == channel.toNode:
          self.log.debug("dynamicThroughput: found local channel '%s'" % channel.channelName )
          candidates = [ ( channel, sourceSE, targetSE ) ]
          selTimeToStart = timeToStart
          break
        if timeToStart <= minTimeToStart:
          selTimeToStart = timeToStart
          minTimeToStart = timeToStart
          candidates = [ ( channel, sourceSE, targetSE ) ]
        elif timeToStart == minTimeToStart:
          candidates.append( (channel, sourceSE, targetSE ) )

      if not candidates:
        return S_ERROR("dynamicThroughput: unable to find candidate FTS channels")

      random.shuffle( candidates )
      selChannel, selSourceSE, selTargetSE = candidates[0]
      ancestor = False
      for channelID, treeItem in tree.items():
        if selSourceSE in treeItem["DestSE"]:
          ancestor = channelID
      tree[selChannel.channelID] = { "Ancestor" : ancestor,
                                     "SourceSE" : selSourceSE,
                                     "DestSE" : selTargetSE,
                                     "Strategy" : "DynamicThroughput" }
      timeToSite[selTargetSE] = selTimeToStart 
      sourceSEs.append( selTargetSE )
      targetSEs.remove( selTargetSE )
  
    return S_OK( tree )

  def reset( self ):
    """ reset :chosenStrategy: 

    :param self: self reference
    """
    self.chosenStrategy = 0

  def getSupportedStrategies( self ):
    """ Get supported strategies.

    :param self: self reference
    """    
    return self.supportedStrategies

  def replicationTree( self, sourceSEs, targetSEs, size, strategy=None ):
    """ get replication tree

    :param str lfn: LFN
    :param list sourceSEs: list of sources SE names to use
    :param list targetSEs: liost of target SE names to use
    :param long size: file size
    :param str strategy: strategy name
    """
    ## update SEs rwAccess every rwUpdatePertion timedelta (default 300 s)
    now = datetime.datetime.now()
    if now - self.lastRssUpdate > self.rwUpdatePeriod:
      update = self.updateGraph( rwAccess=True )
      if not update["OK"]:
        self.log.warn("replicationTree: unable to update FTS graph: %s" % update["Message"] )
      else:
        self.lastRssUpdate = now
    ## get strategy
    strategy = strategy if strategy else self.__selectStrategy()
    if strategy not in self.getSupportedStrategies():
      return S_ERROR("replicationTree: unsupported strategy '%s'" % strategy )

    self.log.info( "replicationTree: strategy=%s sourceSEs=%s targetSEs=%s size=%s" %\
                     ( strategy, sourceSEs, targetSEs, size ) )
    ## fire action from dispatcher
    tree = self.strategyDispatcher[strategy]( sourceSEs, targetSEs )
    if not tree["OK"]:
      self.log.error( "replicationTree: %s" % tree["Message"] )
      return tree
    ## update graph edges
    update = self.updateGraph( replicationTree=tree["Value"], size=size )
    if not update["OK"]:
      self.log.error( "replicationTree: unable to update FTS graph: %s" % update["Message"] )
      return update
    return tree
    
  def __selectStrategy( self ):
    """ If more than one active strategy use one after the other.

    :param self: self reference
    """
    chosenStrategy = self.activeStrategies[self.chosenStrategy]
    self.chosenStrategy += 1
    if self.chosenStrategy == self.numberOfStrategies:
      self.chosenStrategy = 0
    return chosenStrategy

  def __getRWAccessForSE( self, seList ):
    """ get RSS R/W for :seList: 

    :param list seList: SE list
    """
    rwDict = dict.fromkeys( seList )
    for se in rwDict:
      rwDict[se] = { "read" : False, "write" : False  }
    rAccess = self.resourceStatus.getStorageElementStatus( seList, statusType = "ReadAccess", default = 'Unknown' )
    if not rAccess["OK"]:
      return rAccess["Message"]
    rAccess = [ k for k, v in rAccess["Value"].items() if "ReadAccess" in v and v["ReadAccess"] in ( "Active", 
                                                                                                     "Degraded" ) ]
    wAccess = self.resourceStatus.getStorageElementStatus( seList, statusType = "WriteAccess", default = 'Unknown' )
    if not wAccess["OK"]:
      return wAccess["Message"]
    wAccess = [ k for k, v in wAccess["Value"].items() if "WriteAccess" in v and v["WriteAccess"] in ( "Active", 
                                                                                                       "Degraded" ) ]
    for se in rwDict:
      rwDict[se]["read"] = se in rAccess
      rwDict[se]["write"] = se in wAccess
    return S_OK( rwDict )
コード例 #5
0
    result = getVOfromProxyGroup()
    if not result['OK']:
        gLogger.notice('Error:', result['Message'])
        DIRAC.exit(1)
    vo = result['Value']
    resources = Resources(vo=vo)
    result = resources.getEligibleStorageElements()
    if not result['OK']:
        gLogger.notice('Error:', result['Message'])
        DIRAC.exit(2)
    seList = sortList(result['Value'])

    resourceStatus = ResourceStatus()

    result = resourceStatus.getStorageElementStatus(seList)
    if not result['OK']:
        gLogger.notice('Error:', result['Message'])
        DIRAC.exit(3)

    for k, v in result['Value'].items():

        readState, writeState = 'Active', 'Active'

        if v.has_key('ReadAccess'):
            readState = v['ReadAccess']

        if v.has_key('WriteAccess'):
            writeState = v['WriteAccess']
        gLogger.notice(
            "%s %s %s" %
コード例 #6
0
        gLogger.error('The provided site (%s) is not known.' % site)
        DIRAC.exit(-1)
    ses.extend(res['Value']['SE'].replace(' ', '').split(','))

if not ses:
    gLogger.error('There were no SEs provided')
    DIRAC.exit(-1)

readBanned = []
writeBanned = []
checkBanned = []
removeBanned = []

resourceStatus = ResourceStatus()

res = resourceStatus.getStorageElementStatus(ses)
if not res['OK']:
    gLogger.error("Storage Element %s does not exist" % ses)
    DIRAC.exit(-1)

reason = 'Forced with dirac-admin-ban-se by %s' % userName

for se, seOptions in res['Value'].items():

    resW = resC = resR = {'OK': False}

    # Eventually, we will get rid of the notion of InActive, as we always write Banned.
    if read and seOptions.has_key('ReadAccess'):

        if not seOptions['ReadAccess'] in ['Active', 'Degraded', 'Probing']:
            gLogger.notice('Read option for %s is %s, instead of %s' %
コード例 #7
0
class FTS3Placement(FTSAbstractPlacement):
    """
  This class manages all the FTS strategies, routes and what not
  """

    __serverPolicy = "Random"
    __nextServerID = 0
    __serverList = None
    __maxAttempts = 0

    def __init__(self, csPath=None, ftsHistoryViews=None):
        """
        Call the init of the parent, and initialize the list of FTS3 servers
    """

        self.log = gLogger.getSubLogger("FTS3Placement")
        super(FTS3Placement, self).__init__(csPath=csPath,
                                            ftsHistoryViews=ftsHistoryViews)
        srvList = getFTS3Servers()
        if not srvList['OK']:
            self.log.error(srvList['Message'])

        self.__serverList = srvList.get('Value', [])
        self.maxAttempts = len(self.__serverList)

        self.rssClient = ResourceStatus()

    def getReplicationTree(self, sourceSEs, targetSEs, size, strategy=None):
        """ For multiple source to multiple destination, find the optimal replication
        strategy.

       :param sourceSEs : list of source SE
       :param targetSEs : list of destination SE
       :param size : size of the File
       :param strategy : which strategy to use

       :returns S_OK(dict) < route name :  { dict with key Ancestor, SourceSE, TargetSEtargetSE, Strategy } >

       For the time being, we are waiting for FTS3 to provide advisory mechanisms. So we just use
       simple techniques
    """

        # We will use a single random source
        sourceSE = random.choice(sourceSEs)

        tree = {}
        for targetSE in targetSEs:
            tree["%s#%s" % (sourceSE, targetSE)] = {
                "Ancestor": False,
                "SourceSE": sourceSE,
                "TargetSE": targetSE,
                "Strategy": "FTS3Simple"
            }

        return S_OK(tree)

    def refresh(self, ftsHistoryViews):
        """
    Refresh, whatever that means... recalculate all what you need,
    fetches the latest conf and what not.
    """
        return super(FTS3Placement,
                     self).refresh(ftsHistoryViews=ftsHistoryViews)

    def __failoverServerPolicy(self, attempt=0):
        """
       Returns always the server at a given position (normally the first one)

       :param attempt: position of the server in the list
    """
        if attempt >= len(self.__serverList):
            raise Exception(
                "FTS3Placement.__failoverServerPolicy: attempt to reach non existing server index"
            )

        return self.__serverList[attempt]

    def __sequenceServerPolicy(self):
        """
       Every time the this policy is called, return the next server on the list
    """

        fts3server = self.__serverList[self.__nextServerID]
        self.__nextServerID = (self.__nextServerID + 1) % len(
            self.__serverList)
        return fts3server

    def __randomServerPolicy(self):
        """
      return a random server from the list
    """
        return random.choice(self.__serverList)

    def __chooseFTS3Server(self):
        """
      Choose the appropriate FTS3 server depending on the policy
    """

        fts3Server = None
        attempt = 0
        # FIXME : need to get real valeu from RSS
        ftsServerStatus = True

        while not fts3Server and attempt < self.maxAttempts:
            if self.__serverPolicy == 'Random':
                fts3Server = self.__randomServerPolicy()
            elif self.__serverPolicy == 'Sequence':
                fts3Server = self.__sequenceServerPolicy()
            elif self.__serverPolicy == 'Failover':
                fts3Server = self.__failoverServerPolicy(attempt=attempt)
            else:
                self.log.error(
                    'Unknown server policy %s. Using Random instead' %
                    self.__serverPolicy)
                fts3Server = self.__randomServerPolicy()

            if not ftsServerStatus:
                self.log.warn(
                    'FTS server %s is not in good shape. Choose another one' %
                    fts3Server)
                fts3Server = None
            attempt += 1

            # FIXME : I need to get the FTS server status from RSS


#       ftsStatusFromRss = rss.ftsStatusOrSomethingLikeThat

        if fts3Server:
            return S_OK(fts3Server)

        return S_ERROR("Could not find an FTS3 server (max attempt reached)")

    def findRoute(self, sourceSE, targetSE):
        """ Find the appropriate route from point A to B
      :param sourceSE : source SE
      :param targetSE : destination SE

      :returns S_OK(FTSRoute)

    """

        fts3server = self.__chooseFTS3Server()

        if not fts3server['OK']:
            return fts3server

        fts3server = fts3server['Value']

        route = FTSRoute(sourceSE, targetSE, fts3server)

        return S_OK(route)

    def isRouteValid(self, route):
        """
        FIXME: until RSS is ready, I check manually the status
        In FTS3, all routes are valid a priori.
        If a route was not valid for some reason, then FTS would know it
        thanks to the blacklist sent by RSS, and would deal with it itself.
       :param route : FTSRoute

       :returns S_OK or S_ERROR(reason)
    """

        rAccess = self.rssClient.getStorageElementStatus(
            route.sourceSE, "ReadAccess")
        self.log.debug("se read %s %s" % (route.sourceSE, rAccess))
        if not rAccess["OK"]:
            self.log.error(rAccess["Message"])
            return rAccess

        if rAccess["Value"][route.sourceSE]["ReadAccess"] not in ("Active",
                                                                  "Degraded"):
            return S_ERROR("Source SE is not readable")

        wAccess = self.rssClient.getStorageElementStatus(
            route.targetSE, "WriteAccess")
        self.log.debug("se write %s %s" % (route.targetSE, wAccess))
        if not wAccess["OK"]:
            self.log.error(wAccess["Message"])
            return wAccess
        if wAccess["Value"][route.targetSE]["WriteAccess"] not in ("Active",
                                                                   "Degraded"):
            return S_ERROR("Target SE is not writable")

        return S_OK()
コード例 #8
0
ファイル: StrategyHandler.py プロジェクト: ptakha/DIRAC-1
class StrategyHandler(object):
    """
  .. class:: StrategyHandler

  StrategyHandler is a helper class for determining optimal replication tree for given
  source files, their replicas and target storage elements.
  """
    def __init__(self,
                 configSection,
                 channels=None,
                 bandwidths=None,
                 failedFiles=None):
        """c'tor

    :param self: self reference
    :param str configSection: path on CS to ReplicationScheduler agent
    :param bandwithds: observed throughput on active channels
    :param channels: active channels
    :param int failedFiles: max number of distinct failed files to allow scheduling
    """
        ## save config section
        self.configSection = configSection + "/" + self.__class__.__name__
        ##

        ## sublogger
        self.log = gLogger.getSubLogger("StrategyHandler", child=True)
        self.log.setLevel(
            gConfig.getValue(self.configSection + "/LogLevel", "DEBUG"))

        self.supportedStrategies = [
            'Simple', 'DynamicThroughput', 'Swarm', 'MinimiseTotalWait'
        ]
        self.log.info("Supported strategies = %s" %
                      ", ".join(self.supportedStrategies))

        self.sigma = gConfig.getValue(self.configSection + '/HopSigma', 0.0)
        self.log.info("HopSigma = %s" % self.sigma)
        self.schedulingType = gConfig.getValue(
            self.configSection + '/SchedulingType', 'File')
        self.log.info("SchedulingType = %s" % self.schedulingType)
        self.activeStrategies = gConfig.getValue(
            self.configSection + '/ActiveStrategies', ['MinimiseTotalWait'])
        self.log.info("ActiveStrategies = %s" %
                      ", ".join(self.activeStrategies))
        self.numberOfStrategies = len(self.activeStrategies)
        self.log.info("Number of active strategies = %s" %
                      self.numberOfStrategies)
        self.acceptableFailureRate = gConfig.getValue(
            self.configSection + '/AcceptableFailureRate', 75)
        self.log.info("AcceptableFailureRate = %s" %
                      self.acceptableFailureRate)
        self.acceptableFailedFiles = gConfig.getValue(
            self.configSection + "/AcceptableFailedFiles", 5)
        self.log.info("AcceptableFailedFiles = %s" %
                      self.acceptableFailedFiles)
        self.rwUpdatePeriod = gConfig.getValue(
            self.configSection + "/RssRWUpdatePeriod", 600)
        self.log.info("RSSUpdatePeriod = %s s" % self.rwUpdatePeriod)
        self.rwUpdatePeriod = datetime.timedelta(seconds=self.rwUpdatePeriod)
        ## bandwithds
        self.bandwidths = bandwidths if bandwidths else {}
        ## channels
        self.channels = channels if channels else {}
        ## distinct failed files per channel
        self.failedFiles = failedFiles if failedFiles else {}
        ## chosen strategy
        self.chosenStrategy = 0
        ## fts graph
        self.ftsGraph = None
        ## timestamp for last update
        self.lastRssUpdate = datetime.datetime.now()
        # dispatcher
        self.strategyDispatcher = {
            "MinimiseTotalWait": self.minimiseTotalWait,
            "DynamicThroughput": self.dynamicThroughput,
            "Simple": self.simple,
            "Swarm": self.swarm
        }
        ## own RSS client
        self.resourceStatus = ResourceStatus()
        ## create fts graph
        ftsGraph = self.setup(self.channels, self.bandwidths, self.failedFiles)
        if not ftsGraph["OK"]:
            raise SHGraphCreationError(ftsGraph["Message"])
        self.log.info("%s has been constructed" % self.__class__.__name__)

    def setup(self, channels, bandwithds, failedFiles):
        """ prepare fts graph 

    :param dict channels: { channelID : { "Files" : long , Size = long, "ChannelName" : str, 
                                          "Source" : str, "Destination" : str , "ChannelName" : str, "Status" : str  } }
    :param dict bandwidths: { channelID { "Throughput" : float, "Fileput" : float, "SucessfulFiles" : long, "FailedFiles" : long  } }
    :param dict failedFiles: { channelID : int }

    channelInfo { channelName : { "ChannelID" : int, "TimeToStart" : float} }  
    """
        graph = FTSGraph("sites")

        result = getStorageElementSiteMapping()
        if not result['OK']:
            return result
        sitesDict = result['Value']

        ## create nodes
        for site, ses in sitesDict.items():
            rwDict = self.__getRWAccessForSE(ses)
            if not rwDict["OK"]:
                return rwDict
            siteName = site
            if '.' in site:
                siteName = site.split('.')[1]
            graph.addNode(LCGSite(siteName, {"SEs": rwDict["Value"]}))
        ## channels { channelID : { "Files" : long , Size = long, "ChannelName" : str,
        ##                          "Source" : str, "Destination" : str ,
        ##                          "ChannelName" : str, "Status" : str  } }
        ## bandwidths { channelID { "Throughput" : float, "Fileput" : float,
        ##                           "SucessfulFiles" : long, "FailedFiles" : long  } }
        ## channelInfo { channelName : { "ChannelID" : int, "TimeToStart" : float} }
        for channelID, channelDict in channels.items():
            sourceName = channelDict["Source"]
            destName = channelDict["Destination"]
            fromNode = graph.getNode(sourceName)
            toNode = graph.getNode(destName)
            if fromNode and toNode:
                rwAttrs = {
                    "status": channels[channelID]["Status"],
                    "files": channelDict["Files"],
                    "size": channelDict["Size"],
                    "successfulAttempts":
                    bandwithds[channelID]["SuccessfulFiles"],
                    "failedAttempts": bandwithds[channelID]["FailedFiles"],
                    "distinctFailedFiles": failedFiles.get(channelID, 0),
                    "fileput": bandwithds[channelID]["Fileput"],
                    "throughput": bandwithds[channelID]["Throughput"]
                }
                roAttrs = {
                    "channelID": channelID,
                    "channelName": channelDict["ChannelName"],
                    "acceptableFailureRate": self.acceptableFailureRate,
                    "acceptableFailedFiles": self.acceptableFailedFiles,
                    "schedulingType": self.schedulingType
                }
                ftsChannel = FTSChannel(fromNode, toNode, rwAttrs, roAttrs)
                graph.addEdge(ftsChannel)
        self.ftsGraph = graph
        self.lastRssUpdate = datetime.datetime.now()
        return S_OK()

    def updateGraph(self, rwAccess=False, replicationTree=None, size=0.0):
        """ update rw access for nodes (sites) and size anf files for edges (channels) """
        replicationTree = replicationTree if replicationTree else {}
        size = size if size else 0.0
        ## update nodes rw access for SEs
        if rwAccess:
            for lcgSite in self.ftsGraph.nodes():
                rwDict = self.__getRWAccessForSE(lcgSite.SEs.keys())
                if not rwDict["OK"]:
                    return rwDict
                lcgSite.SEs = rwDict["Value"]
        ## update channels size and files
        if replicationTree:
            for channel in self.ftsGraph.edges():
                if channel.channelID in replicationTree:
                    channel.size += size
                    channel.files += 1
        return S_OK()

    def simple(self, sourceSEs, targetSEs):
        """ simple strategy - one source, many targets

    :param list sourceSEs: list with only one sourceSE name
    :param list targetSEs: list with target SE names
    :param str lfn: logical file name
    :param dict metadata: file metadata read from catalogue
    """
        ## make targetSEs list unique
        if len(sourceSEs) != 1:
            return S_ERROR(
                "simple: wrong argument supplied for sourceSEs, only one sourceSE allowed"
            )
        sourceSE = sourceSEs[0]
        tree = {}
        for targetSE in targetSEs:
            channel = self.ftsGraph.findChannel(sourceSE, targetSE)
            if not channel["OK"]:
                return S_ERROR(channel["Message"])
            channel = channel["Value"]
            if not channel.fromNode.SEs[sourceSE]["read"]:
                return S_ERROR(
                    "simple: sourceSE '%s' in banned for reading rigth now" %
                    sourceSE)
            if not channel.toNode.SEs[targetSE]["write"]:
                return S_ERROR(
                    "simple: targetSE '%s' is banned for writing rigth now" %
                    targetSE)
            if channel.channelID in tree:
                return S_ERROR( "simple: unable to create replication tree, channel '%s' cannot be used twice" %\
                                  channel.channelName )
            tree[channel.channelID] = {
                "Ancestor": False,
                "SourceSE": sourceSE,
                "DestSE": targetSE,
                "Strategy": "Simple"
            }

        return S_OK(tree)

    def swarm(self, sourceSEs, targetSEs):
        """ swarm strategy - one target, many sources, pick up the fastest 
    
    :param list sourceSEs: list of source SE 
    :param str targetSEs: on element list with name of target SE
    :param str lfn: logical file name
    :param dict metadata: file metadata read from catalogue
    """
        tree = {}
        channels = []
        if len(targetSEs) > 1:
            return S_ERROR(
                "swarm: wrong argument supplied for targetSEs, only one targetSE allowed"
            )
        targetSE = targetSEs[0]
        ## find channels
        for sourceSE in sourceSEs:
            channel = self.ftsGraph.findChannel(sourceSE, targetSE)
            if not channel["OK"]:
                self.log.warn("swarm: %s" % channel["Message"])
                continue
            channels.append((sourceSE, channel["Value"]))
        ## exit - no channels
        if not channels:
            return S_ERROR(
                "swarm: unable to find FTS channels between '%s' and '%s'" %
                (",".join(sourceSEs), targetSE))
        ## filter out non active channels
        channels = [
            (sourceSE, channel) for sourceSE, channel in channels
            if channel.fromNode.SEs[sourceSE]["read"]
            and channel.toNode.SEs[targetSE]["write"] and channel.status ==
            "Active" and channel.timeToStart < float("inf")
        ]
        ## exit - no active channels
        if not channels:
            return S_ERROR(
                "swarm: no active channels found between %s and %s" %
                (sourceSEs, targetSE))

        ## find min timeToStart
        minTimeToStart = float("inf")
        selSourceSE = selChannel = None
        for sourceSE, ftsChannel in channels:
            if ftsChannel.timeToStart < minTimeToStart:
                minTimeToStart = ftsChannel.timeToStart
                selSourceSE = sourceSE
                selChannel = ftsChannel

        if not selSourceSE:
            return S_ERROR(
                "swarm: no active channels found between %s and %s" %
                (sourceSEs, targetSE))

        tree[selChannel.channelID] = {
            "Ancestor": False,
            "SourceSE": selSourceSE,
            "DestSE": targetSE,
            "Strategy": "Swarm"
        }
        return S_OK(tree)

    def minimiseTotalWait(self, sourceSEs, targetSEs):
        """ find dag that minimises start time 
    
    :param list sourceSEs: list of avialable source SEs
    :param list targetSEs: list of target SEs
    :param str lfn: logical file name
    :param dict metadata: file metadata read from catalogue
    """
        tree = {}
        primarySources = sourceSEs
        while targetSEs:
            minTimeToStart = float("inf")
            channels = []
            for targetSE in targetSEs:
                for sourceSE in sourceSEs:
                    ftsChannel = self.ftsGraph.findChannel(sourceSE, targetSE)
                    if not ftsChannel["OK"]:
                        self.log.warn("minimiseTotalWait: %s" %
                                      ftsChannel["Message"])
                        continue
                    ftsChannel = ftsChannel["Value"]
                    channels.append((ftsChannel, sourceSE, targetSE))
            if not channels:
                msg = "minimiseTotalWait: FTS channels between %s and %s not defined" % (
                    ",".join(sourceSEs), ",".join(targetSEs))
                self.log.error(msg)
                return S_ERROR(msg)
            ## filter out already used channels
            channels = [(channel, sourceSE, targetSE)
                        for channel, sourceSE, targetSE in channels
                        if channel.channelID not in tree]
            if not channels:
                msg = "minimiseTotalWait: all FTS channels between %s and %s are already used in tree" % (
                    ",".join(sourceSEs), ",".join(targetSEs))
                self.log.error(msg)
                return S_ERROR(msg)

            self.log.debug(
                "minimiseTotalWait: found %s candiate channels, checking activity"
                % len(channels))
            channels = [
                (channel, sourceSE, targetSE)
                for channel, sourceSE, targetSE in channels
                if channel.fromNode.SEs[sourceSE]["read"]
                and channel.toNode.SEs[targetSE]["write"] and channel.status ==
                "Active" and channel.timeToStart < float("inf")
            ]

            if not channels:
                self.log.error(
                    "minimiseTotalWait: no active FTS channels found")
                return S_ERROR(
                    "minimiseTotalWait: no active FTS channels found")

            candidates = []
            for channel, sourceSE, targetSE in channels:
                timeToStart = channel.timeToStart
                if sourceSE not in primarySources:
                    timeToStart += self.sigma
                ## local found
                if channel.fromNode == channel.toNode:
                    self.log.debug(
                        "minimiseTotalWait: found local channel '%s'" %
                        channel.channelName)
                    candidates = [(channel, sourceSE, targetSE)]
                    break
                if timeToStart <= minTimeToStart:
                    minTimeToStart = timeToStart
                    candidates = [(channel, sourceSE, targetSE)]
                elif timeToStart == minTimeToStart:
                    candidates.append((channel, sourceSE, targetSE))

            if not candidates:
                return S_ERROR(
                    "minimiseTotalWait: unable to find candidate FTS channels minimising total wait time"
                )

            random.shuffle(candidates)
            selChannel, selSourceSE, selTargetSE = candidates[0]
            ancestor = False
            for channelID, treeItem in tree.items():
                if selSourceSE in treeItem["DestSE"]:
                    ancestor = channelID
            tree[selChannel.channelID] = {
                "Ancestor": ancestor,
                "SourceSE": selSourceSE,
                "DestSE": selTargetSE,
                "Strategy": "MinimiseTotalWait"
            }
            sourceSEs.append(selTargetSE)
            targetSEs.remove(selTargetSE)

        return S_OK(tree)

    def dynamicThroughput(self, sourceSEs, targetSEs):
        """ dynamic throughput - many sources, many targets - find dag that minimises overall throughput 

    :param list sourceSEs: list of available source SE names
    :param list targetSE: list of target SE names
    :param str lfn: logical file name
    :param dict metadata: file metadata read from catalogue
    """
        tree = {}
        primarySources = sourceSEs
        timeToSite = {}
        while targetSEs:
            minTimeToStart = float("inf")
            channels = []
            for targetSE in targetSEs:
                for sourceSE in sourceSEs:
                    ftsChannel = self.ftsGraph.findChannel(sourceSE, targetSE)
                    if not ftsChannel["OK"]:
                        self.log.warn("dynamicThroughput: %s" %
                                      ftsChannel["Message"])
                        continue
                    ftsChannel = ftsChannel["Value"]
                    channels.append((ftsChannel, sourceSE, targetSE))
            ## no candidate channels found
            if not channels:
                msg = "dynamicThroughput: FTS channels between %s and %s are not defined" % (
                    ",".join(sourceSEs), ",".join(targetSEs))
                self.log.error(msg)
                return S_ERROR(msg)
            ## filter out already used channels
            channels = [(channel, sourceSE, targetSE)
                        for channel, sourceSE, targetSE in channels
                        if channel.channelID not in tree]
            if not channels:
                msg = "dynamicThroughput: all FTS channels between %s and %s are already used in tree" % (
                    ",".join(sourceSEs), ",".join(targetSEs))
                self.log.error(msg)
                return S_ERROR(msg)
            ## filter out non-active channels
            self.log.debug(
                "dynamicThroughput: found %s candidate channels, checking activity"
                % len(channels))
            channels = [
                (channel, sourceSE, targetSE)
                for channel, sourceSE, targetSE in channels
                if channel.fromNode.SEs[sourceSE]["read"]
                and channel.toNode.SEs[targetSE]["write"] and channel.status ==
                "Active" and channel.timeToStart < float("inf")
            ]
            if not channels:
                self.log.info(
                    "dynamicThroughput: active candidate channels not found")
                return S_ERROR(
                    "dynamicThroughput: no active candidate FTS channels")

            candidates = []
            selTimeToStart = None
            for channel, sourceSE, targetSE in channels:
                timeToStart = channel.timeToStart
                if sourceSE not in primarySources:
                    timeToStart += self.sigma
                if sourceSE in timeToSite:
                    timeToStart += timeToSite[sourceSE]
                ## local found
                if channel.fromNode == channel.toNode:
                    self.log.debug(
                        "dynamicThroughput: found local channel '%s'" %
                        channel.channelName)
                    candidates = [(channel, sourceSE, targetSE)]
                    selTimeToStart = timeToStart
                    break
                if timeToStart <= minTimeToStart:
                    selTimeToStart = timeToStart
                    minTimeToStart = timeToStart
                    candidates = [(channel, sourceSE, targetSE)]
                elif timeToStart == minTimeToStart:
                    candidates.append((channel, sourceSE, targetSE))

            if not candidates:
                return S_ERROR(
                    "dynamicThroughput: unable to find candidate FTS channels")

            random.shuffle(candidates)
            selChannel, selSourceSE, selTargetSE = candidates[0]
            ancestor = False
            for channelID, treeItem in tree.items():
                if selSourceSE in treeItem["DestSE"]:
                    ancestor = channelID
            tree[selChannel.channelID] = {
                "Ancestor": ancestor,
                "SourceSE": selSourceSE,
                "DestSE": selTargetSE,
                "Strategy": "DynamicThroughput"
            }
            timeToSite[selTargetSE] = selTimeToStart
            sourceSEs.append(selTargetSE)
            targetSEs.remove(selTargetSE)

        return S_OK(tree)

    def reset(self):
        """ reset :chosenStrategy: 

    :param self: self reference
    """
        self.chosenStrategy = 0

    def getSupportedStrategies(self):
        """ Get supported strategies.

    :param self: self reference
    """
        return self.supportedStrategies

    def replicationTree(self, sourceSEs, targetSEs, size, strategy=None):
        """ get replication tree

    :param str lfn: LFN
    :param list sourceSEs: list of sources SE names to use
    :param list targetSEs: liost of target SE names to use
    :param long size: file size
    :param str strategy: strategy name
    """
        ## update SEs rwAccess every rwUpdatePertion timedelta (default 300 s)
        now = datetime.datetime.now()
        if now - self.lastRssUpdate > self.rwUpdatePeriod:
            update = self.updateGraph(rwAccess=True)
            if not update["OK"]:
                self.log.warn(
                    "replicationTree: unable to update FTS graph: %s" %
                    update["Message"])
            else:
                self.lastRssUpdate = now
        ## get strategy
        strategy = strategy if strategy else self.__selectStrategy()
        if strategy not in self.getSupportedStrategies():
            return S_ERROR("replicationTree: unsupported strategy '%s'" %
                           strategy)

        self.log.info( "replicationTree: strategy=%s sourceSEs=%s targetSEs=%s size=%s" %\
                         ( strategy, sourceSEs, targetSEs, size ) )
        ## fire action from dispatcher
        tree = self.strategyDispatcher[strategy](sourceSEs, targetSEs)
        if not tree["OK"]:
            self.log.error("replicationTree: %s" % tree["Message"])
            return tree
        ## update graph edges
        update = self.updateGraph(replicationTree=tree["Value"], size=size)
        if not update["OK"]:
            self.log.error("replicationTree: unable to update FTS graph: %s" %
                           update["Message"])
            return update
        return tree

    def __selectStrategy(self):
        """ If more than one active strategy use one after the other.

    :param self: self reference
    """
        chosenStrategy = self.activeStrategies[self.chosenStrategy]
        self.chosenStrategy += 1
        if self.chosenStrategy == self.numberOfStrategies:
            self.chosenStrategy = 0
        return chosenStrategy

    def __getRWAccessForSE(self, seList):
        """ get RSS R/W for :seList: 

    :param list seList: SE list
    """
        rwDict = dict.fromkeys(seList)
        for se in rwDict:
            rwDict[se] = {"read": False, "write": False}
        rAccess = self.resourceStatus.getStorageElementStatus(
            seList, statusType="ReadAccess", default='Unknown')
        if not rAccess["OK"]:
            return rAccess
        rAccess = [
            k for k, v in rAccess["Value"].items()
            if "ReadAccess" in v and v["ReadAccess"] in ("Active", "Degraded")
        ]
        wAccess = self.resourceStatus.getStorageElementStatus(
            seList, statusType="WriteAccess", default='Unknown')
        if not wAccess["OK"]:
            return wAccess
        wAccess = [
            k for k, v in wAccess["Value"].items()
            if "WriteAccess" in v and v["WriteAccess"] in ("Active",
                                                           "Degraded")
        ]
        for se in rwDict:
            rwDict[se]["read"] = se in rAccess
            rwDict[se]["write"] = se in wAccess
        return S_OK(rwDict)
コード例 #9
0
ファイル: InputDataAgent.py プロジェクト: Teddy22/DIRAC
class InputDataAgent( OptimizerModule ):
  """
      The specific Optimizer must provide the following methods:
      - initializeOptimizer() before each execution cycle
      - checkJob() - the main method called for each job
  """

  #############################################################################
  def initializeOptimizer( self ):
    """Initialize specific parameters for JobSanityAgent.
    """
    self.failedMinorStatus = self.am_getOption( '/FailedJobStatus', 'Input Data Not Available' )
    #this will ignore failover SE files
    self.checkFileMetadata = self.am_getOption( 'CheckFileMetadata', True )

    self.dataManager = DataManager()
    self.resourceStatus = ResourceStatus()
    self.fc = FileCatalog()

    self.seToSiteMapping = {}
    self.lastCScheck = 0
    self.cacheLength = 600

    return S_OK()

  #############################################################################
  def checkJob( self, job, classAdJob ):
    """
    This method does the optimization corresponding to this Agent, 
    it is call for each job by the Optimizer framework
    """

    result = self.jobDB.getInputData( job )
    if not result['OK']:
      self.log.warn( 'Failed to get input data from JobdB for %s' % ( job ) )
      self.log.warn( result['Message'] )
      return result
    if not result['Value']:
      self.log.verbose( 'Job %s has no input data requirement' % ( job ) )
      return self.setNextOptimizer( job )

    #Check if we already executed this Optimizer and the input data is resolved
    res = self.getOptimizerJobInfo( job, self.am_getModuleParam( 'optimizerName' ) )
    if res['OK'] and len( res['Value'] ):
      pass
    else:
      self.log.verbose( 'Job %s has an input data requirement and will be processed' % ( job ) )
      inputData = result['Value']
      result = self.__resolveInputData( job, inputData )
      if not result['OK']:
        self.log.warn( result['Message'] )
        return result

    return self.setNextOptimizer( job )

  #############################################################################
  def __resolveInputData( self, job, inputData ):
    """This method checks the file catalog for replica information.
    """
    lfns = [ fname.replace( 'LFN:', '' ) for fname in inputData ]

    start = time.time()
    # In order to place jobs on Hold if a certain SE is banned we need first to check first if
    # if the replicas are really available
    replicas = self.dataManager.getActiveReplicas( lfns )
    timing = time.time() - start
    self.log.verbose( 'Catalog Replicas Lookup Time: %.2f seconds ' % ( timing ) )
    if not replicas['OK']:
      self.log.warn( replicas['Message'] )
      return replicas

    replicaDict = replicas['Value']

    siteCandidates = self.__checkReplicas( job, replicaDict )

    if not siteCandidates['OK']:
      self.log.warn( siteCandidates['Message'] )
      return siteCandidates

    if self.checkFileMetadata:
      guids = True
      start = time.time()
      guidDict = self.fc.getFileMetadata( lfns )
      timing = time.time() - start
      self.log.info( 'Catalog Metadata Lookup Time: %.2f seconds ' % ( timing ) )

      if not guidDict['OK']:
        self.log.warn( guidDict['Message'] )
        guids = False

      failed = guidDict['Value']['Failed']
      if failed:
        self.log.warn( 'Failed to establish some GUIDs' )
        self.log.warn( failed )
        guids = False

      if guids:
        for lfn, reps in replicaDict['Successful'].items():
          guidDict['Value']['Successful'][lfn].update( reps )
        replicas = guidDict

    resolvedData = {}
    resolvedData['Value'] = replicas
    resolvedData['SiteCandidates'] = siteCandidates['Value']
    result = self.setOptimizerJobInfo( job, self.am_getModuleParam( 'optimizerName' ), resolvedData )
    if not result['OK']:
      self.log.warn( result['Message'] )
      return result
    return S_OK( resolvedData )

  #############################################################################
  def __checkReplicas( self, job, replicaDict ):
    """Check that all input lfns have valid replicas and can all be found at least in one single site.
    """
    badLFNs = []

    if replicaDict.has_key( 'Successful' ):
      for lfn, reps in replicaDict['Successful'].items():
        if not reps:
          badLFNs.append( 'LFN:%s Problem: No replicas available' % ( lfn ) )
    else:
      return S_ERROR( 'No replica Info available' )

    if replicaDict.has_key( 'Failed' ):
      for lfn, cause in replicaDict['Failed'].items():
        badLFNs.append( 'LFN:%s Problem: %s' % ( lfn, cause ) )

    if badLFNs:
      self.log.info( 'Found %s problematic LFN(s) for job %s' % ( len( badLFNs ), job ) )
      param = '\n'.join( badLFNs )
      self.log.info( param )
      result = self.setJobParam( job, self.am_getModuleParam( 'optimizerName' ), param )
      if not result['OK']:
        self.log.error( result['Message'] )
      return S_ERROR( 'Input Data Not Available' )

    return self.__getSiteCandidates( replicaDict['Successful'] )

  #############################################################################
  # FIXME: right now this is unused...
  def __checkActiveSEs( self, job, replicaDict ):
    """
    Check active SE and replicas and identify possible Site candidates for 
    the execution of the job
    """
    # Now let's check if some replicas might not be available due to banned SE's
    activeReplicas = self.dataManager.checkActiveReplicas( replicaDict )
    if not activeReplicas['OK']:
      # due to banned SE's input data might no be available
      msg = "On Hold: Missing replicas due to banned SE"
      self.log.info( msg )
      self.log.warn( activeReplicas['Message'] )
      return S_ERROR( msg )

    activeReplicaDict = activeReplicas['Value']

    siteCandidates = self.__checkReplicas( job, activeReplicaDict )

    if not siteCandidates['OK']:
      # due to a banned SE's input data is not available at a single site      
      msg = "On Hold: Input data not Available due to banned SE"
      self.log.info( msg )
      self.log.warn( siteCandidates['Message'] )
      return S_ERROR( msg )

    resolvedData = {}
    resolvedData['Value'] = activeReplicas
    resolvedData['SiteCandidates'] = siteCandidates['Value']
    result = self.setOptimizerJobInfo( job, self.am_getModuleParam( 'optimizerName' ), resolvedData )
    if not result['OK']:
      self.log.warn( result['Message'] )
      return result
    return S_OK( resolvedData )


  #############################################################################
  def __getSitesForSE( self, se ):
    """ Returns a list of sites having the given SE as a local one.
        Uses the local cache of the site-se information
    """

    # Empty the cache if too old
    if ( time.time() - self.lastCScheck ) > self.cacheLength:
      self.log.verbose( 'Resetting the SE to site mapping cache' )
      self.seToSiteMapping = {}
      self.lastCScheck = time.time()

    if se not in self.seToSiteMapping:
      sites = getSitesForSE( se )
      if sites['OK']:
        self.seToSiteMapping[se] = list( sites['Value'] )
      return sites
    else:
      return S_OK( self.seToSiteMapping[se] )

  #############################################################################
  def __getSiteCandidates( self, inputData ):
    """This method returns a list of possible site candidates based on the
       job input data requirement.  For each site candidate, the number of files
       on disk and tape is resolved.
    """

    fileSEs = {}
    for lfn, replicas in inputData.items():
      siteList = []
      for se in replicas.keys():
        sites = self.__getSitesForSE( se )
        if sites['OK']:
          siteList += sites['Value']
      fileSEs[lfn] = uniqueElements( siteList )

    siteCandidates = []
    i = 0
    for _fileName, sites in fileSEs.items():
      if not i:
        siteCandidates = sites
      else:
        tempSite = []
        for site in siteCandidates:
          if site in sites:
            tempSite.append( site )
        siteCandidates = tempSite
      i += 1

    if not len( siteCandidates ):
      return S_ERROR( 'No candidate sites available' )

    #In addition, check number of files on tape and disk for each site
    #for optimizations during scheduling
    siteResult = {}
    for site in siteCandidates:
      siteResult[site] = { 'disk': [], 'tape': [] }

    seDict = {}
    for lfn, replicas in inputData.items():
      for se in replicas.keys():
        if se not in seDict:
          sites = self.__getSitesForSE( se )
          if not sites['OK']:
            continue
          try:
            #storageElement = StorageElement( se )
            result = self.resourceStatus.getStorageElementStatus( se, statusType = 'ReadAccess' )
            if not result['OK']:
              continue
            seDict[se] = { 'Sites': sites['Value'], 'SEParams': result['Value'][se] }
            result = getStorageElementOptions( se )
            if not result['OK']:
              continue
            seDict[se]['SEParams'].update(result['Value'])
          except Exception:
            self.log.exception( 'Failed to instantiate StorageElement( %s )' % se )
            continue
        for site in seDict[se]['Sites']:
          if site in siteCandidates:
            if seDict[se]['SEParams']['ReadAccess'] and seDict[se]['SEParams']['DiskSE']:
              if lfn not in siteResult[site]['disk']:
                siteResult[site]['disk'].append( lfn )
                if lfn in siteResult[site]['tape']:
                  siteResult[site]['tape'].remove( lfn )
            if seDict[se]['SEParams']['ReadAccess'] and seDict[se]['SEParams']['TapeSE']:
              if lfn not in siteResult[site]['tape'] and lfn not in siteResult[site]['disk']:
                siteResult[site]['tape'].append( lfn )

    for site in siteResult:
      siteResult[site]['disk'] = len( siteResult[site]['disk'] )
      siteResult[site]['tape'] = len( siteResult[site]['tape'] )
    return S_OK( siteResult )
コード例 #10
0
ファイル: StorageFactory.py プロジェクト: atsareg/DIRAC
class StorageFactory(object):
    def __init__(self, useProxy=False, vo=None):
        self.rootConfigPath = '/Resources/StorageElements'
        self.proxy = False
        self.proxy = useProxy
        self.resourceStatus = ResourceStatus()
        self.vo = vo
        if self.vo is None:
            result = getVOfromProxyGroup()
            if result['OK']:
                self.vo = result['Value']
            else:
                RuntimeError("Can not get the current VO context")
        self.remotePlugins = []
        self.localPlugins = []
        self.name = ''
        self.options = {}
        self.protocolDetails = []
        self.storages = []

    ###########################################################################################
    #
    # Below are public methods for obtaining storage objects
    #

    def getStorageName(self, initialName):
        return self._getConfigStorageName(initialName, 'Alias')

    def getStorage(self, parameterDict, hideExceptions=False):
        """ This instantiates a single storage for the details provided and doesn't check the CS.
    """
        # The storage name must be supplied.
        if parameterDict.has_key('StorageName'):
            storageName = parameterDict['StorageName']
        else:
            errStr = "StorageFactory.getStorage: StorageName must be supplied"
            gLogger.error(errStr)
            return S_ERROR(errStr)

        # PluginName must be supplied otherwise nothing with work.
        if parameterDict.has_key('PluginName'):
            pluginName = parameterDict['PluginName']
        # Temporary fix for backward compatibility
        elif parameterDict.has_key('ProtocolName'):
            pluginName = parameterDict['ProtocolName']
        else:
            errStr = "StorageFactory.getStorage: PluginName must be supplied"
            gLogger.error(errStr)
            return S_ERROR(errStr)

        return self.__generateStorageObject(storageName,
                                            pluginName,
                                            parameterDict,
                                            hideExceptions=hideExceptions)

    def getStorages(self, storageName, pluginList=None, hideExceptions=False):
        """ Get an instance of a Storage based on the DIRAC SE name based on the CS entries CS

        'storageName' is the DIRAC SE name i.e. 'CERN-RAW'
        'pluginList' is an optional list of protocols if a sub-set is desired i.e ['SRM2','SRM1']
    """
        self.remotePlugins = []
        self.localPlugins = []
        self.name = ''
        self.options = {}
        self.protocolDetails = []
        self.storages = []
        if pluginList is None:
            pluginList = []
        elif isinstance(pluginList, basestring):
            pluginList = [pluginList]
        if not self.vo:
            gLogger.warn('No VO information available')

        # Get the name of the storage provided
        res = self._getConfigStorageName(storageName, 'Alias')
        if not res['OK']:
            return res
        storageName = res['Value']
        self.name = storageName

        # In case the storage is made from a base SE, get this information
        res = self._getConfigStorageName(storageName, 'BaseSE')
        if not res['OK']:
            return res
        # If the storage is derived frmo another one, keep the information
        if res['Value'] != storageName:
            derivedStorageName = storageName
            storageName = res['Value']
        else:
            derivedStorageName = None

        # Get the options defined in the CS for this storage
        res = self._getConfigStorageOptions(
            storageName, derivedStorageName=derivedStorageName)
        if not res['OK']:
            return res
        self.options = res['Value']

        # Get the protocol specific details
        res = self._getConfigStorageProtocols(
            storageName, derivedStorageName=derivedStorageName)
        if not res['OK']:
            return res
        self.protocolDetails = res['Value']

        requestedLocalPlugins = []
        requestedRemotePlugins = []
        requestedProtocolDetails = []
        turlProtocols = []
        # Generate the protocol specific plug-ins
        for protocolDict in self.protocolDetails:
            pluginName = protocolDict.get('PluginName')
            if pluginList and pluginName not in pluginList:
                continue
            protocol = protocolDict['Protocol']
            result = self.__generateStorageObject(
                storageName,
                pluginName,
                protocolDict,
                hideExceptions=hideExceptions)
            if result['OK']:
                self.storages.append(result['Value'])
                if pluginName in self.localPlugins:
                    turlProtocols.append(protocol)
                    requestedLocalPlugins.append(pluginName)
                if pluginName in self.remotePlugins:
                    requestedRemotePlugins.append(pluginName)
                requestedProtocolDetails.append(protocolDict)
            else:
                gLogger.info(result['Message'])

        if len(self.storages) > 0:
            resDict = {}
            resDict['StorageName'] = self.name
            resDict['StorageOptions'] = self.options
            resDict['StorageObjects'] = self.storages
            resDict['LocalPlugins'] = requestedLocalPlugins
            resDict['RemotePlugins'] = requestedRemotePlugins
            resDict['ProtocolOptions'] = requestedProtocolDetails
            resDict['TurlProtocols'] = turlProtocols
            return S_OK(resDict)
        else:
            errStr = "StorageFactory.getStorages: Failed to instantiate any storage protocols."
            gLogger.error(errStr, self.name)
            return S_ERROR(errStr)

    ###########################################################################################
    #
    # Below are internal methods for obtaining section/option/value configuration
    #

    def _getConfigStorageName(self, storageName, referenceType):
        """
      This gets the name of the storage the configuration service.
      If the storage is a reference to another SE the resolution is performed.

      'storageName' is the storage section to check in the CS
    """
        configPath = '%s/%s' % (self.rootConfigPath, storageName)
        res = gConfig.getOptions(configPath)
        if not res['OK']:
            errStr = "StorageFactory._getConfigStorageName: Failed to get storage options"
            gLogger.error(errStr, res['Message'])
            return S_ERROR(errStr)
        if not res['Value']:
            errStr = "StorageFactory._getConfigStorageName: Supplied storage doesn't exist."
            gLogger.error(errStr, configPath)
            return S_ERROR(errStr)
        if referenceType in res['Value']:
            configPath = cfgPath(self.rootConfigPath, storageName,
                                 referenceType)
            referenceName = gConfig.getValue(configPath)
            result = self._getConfigStorageName(referenceName, 'Alias')
            if not result['OK']:
                return result
            resolvedName = result['Value']
        else:
            resolvedName = storageName
        return S_OK(resolvedName)

    def _getConfigStorageOptions(self, storageName, derivedStorageName=None):
        """ Get the options associated to the StorageElement as defined in the CS
    """
        optionsDict = {}
        # We first get the options of the baseSE, and then overwrite with the derivedSE
        for seName in (storageName,
                       derivedStorageName) if derivedStorageName else (
                           storageName, ):
            storageConfigPath = cfgPath(self.rootConfigPath, seName)
            res = gConfig.getOptions(storageConfigPath)
            if not res['OK']:
                errStr = "StorageFactory._getStorageOptions: Failed to get storage options."
                gLogger.error(errStr, "%s: %s" % (seName, res['Message']))
                return S_ERROR(errStr)
            for option in set(res['Value']) - set(
                ('ReadAccess', 'WriteAccess', 'CheckAccess', 'RemoveAccess')):
                optionConfigPath = cfgPath(storageConfigPath, option)
                default = [] if option in ['VO'] else ''
                optionsDict[option] = gConfig.getValue(optionConfigPath,
                                                       default)

        # The status is that of the derived SE only
        seName = derivedStorageName if derivedStorageName else storageName
        res = self.resourceStatus.getStorageElementStatus(seName)
        if not res['OK']:
            errStr = "StorageFactory._getStorageOptions: Failed to get storage status"
            gLogger.error(errStr, "%s: %s" % (seName, res['Message']))
            return S_ERROR(errStr)

        # For safety, we did not add the ${statusType}Access keys
        # this requires modifications in the StorageElement class

        # We add the dictionary with the statusTypes and values
        # { 'statusType1' : 'status1', 'statusType2' : 'status2' ... }
        optionsDict.update(res['Value'][seName])

        return S_OK(optionsDict)

    def __getProtocolsSections(self, storageName):
        storageConfigPath = cfgPath(self.rootConfigPath, storageName)
        res = gConfig.getSections(storageConfigPath)
        if not res['OK']:
            errStr = "StorageFactory._getConfigStorageProtocols: Failed to get storage sections"
            gLogger.error(errStr, "%s: %s" % (storageName, res['Message']))
            return S_ERROR(errStr)
        protocolSections = res['Value']
        return S_OK(protocolSections)

    def _getConfigStorageProtocols(self, storageName, derivedStorageName=None):
        """ Protocol specific information is present as sections in the Storage configuration
    """
        res = self.__getProtocolsSections(storageName)
        if not res['OK']:
            return res
        protocolSections = res['Value']
        sortedProtocolSections = sorted(protocolSections)
        protocolDetails = []
        for protocolSection in sortedProtocolSections:
            res = self._getConfigStorageProtocolDetails(
                storageName, protocolSection)
            if not res['OK']:
                return res
            protocolDetails.append(res['Value'])
        if derivedStorageName:
            # We may have parameters overwriting the baseSE protocols
            res = self.__getProtocolsSections(derivedStorageName)
            if not res['OK']:
                return res
            for protocolSection in res['Value']:
                res = self._getConfigStorageProtocolDetails(derivedStorageName,
                                                            protocolSection,
                                                            checkAccess=False)
                if not res['OK']:
                    return res
                detail = res['Value']
                pluginName = detail.get('PluginName')
                if pluginName:
                    for protocolDetail in protocolDetails:
                        if protocolDetail.get('PluginName') == pluginName:
                            for key, val in detail.iteritems():
                                if val:
                                    protocolDetail[key] = val
                        break
        return S_OK(protocolDetails)

    def _getConfigStorageProtocolDetails(self,
                                         storageName,
                                         protocolSection,
                                         checkAccess=True):
        """
      Parse the contents of the protocol block
    """
        # First obtain the options that are available
        protocolConfigPath = cfgPath(self.rootConfigPath, storageName,
                                     protocolSection)
        res = gConfig.getOptions(protocolConfigPath)
        if not res['OK']:
            errStr = "StorageFactory.__getProtocolDetails: Failed to get protocol options."
            gLogger.error(errStr, "%s: %s" % (storageName, protocolSection))
            return S_ERROR(errStr)
        options = res['Value']

        # We must have certain values internally even if not supplied in CS
        protocolDict = {
            'Access': '',
            'Host': '',
            'Path': '',
            'Port': '',
            'Protocol': '',
            'SpaceToken': '',
            'WSUrl': ''
        }
        for option in options:
            configPath = cfgPath(protocolConfigPath, option)
            optionValue = gConfig.getValue(configPath, '')
            protocolDict[option] = optionValue

        # This is a temporary for backward compatibility: move ProtocolName to PluginName
        protocolDict.setdefault('PluginName',
                                protocolDict.pop('ProtocolName', None))

        # Evaluate the base path taking into account possible VO specific setting
        if self.vo:
            result = gConfig.getOptionsDict(
                cfgPath(protocolConfigPath, 'VOPath'))
            voPath = ''
            if result['OK']:
                voPath = result['Value'].get(self.vo, '')
            if voPath:
                protocolDict['Path'] = voPath

        # Now update the local and remote protocol lists.
        # A warning will be given if the Access option is not set.
        if checkAccess:
            if protocolDict['Access'].lower() == 'remote':
                self.remotePlugins.append(protocolDict['PluginName'])
            elif protocolDict['Access'].lower() == 'local':
                self.localPlugins.append(protocolDict['PluginName'])
            else:
                errStr = "StorageFactory.__getProtocolDetails: The 'Access' option for %s:%s is neither 'local' or 'remote'." % (
                    storageName, protocolSection)
                gLogger.warn(errStr)

        # The PluginName option must be defined
        if not protocolDict['PluginName']:
            errStr = "StorageFactory.__getProtocolDetails: 'PluginName' option is not defined."
            gLogger.error(errStr, "%s: %s" % (storageName, protocolSection))
            return S_ERROR(errStr)

        return S_OK(protocolDict)

    ###########################################################################################
    #
    # Below is the method for obtaining the object instantiated for a provided storage configuration
    #

    def __generateStorageObject(self,
                                storageName,
                                pluginName,
                                parameters,
                                hideExceptions=False):

        storageType = pluginName
        if self.proxy:
            storageType = 'Proxy'

        objectLoader = ObjectLoader()
        result = objectLoader.loadObject('Resources.Storage.%sStorage' %
                                         storageType,
                                         storageType + 'Storage',
                                         hideExceptions=hideExceptions)
        if not result['OK']:
            gLogger.error('Failed to load storage object: %s' %
                          result['Message'])
            return result

        storageClass = result['Value']
        try:
            storage = storageClass(storageName, parameters)
        except Exception, x:
            errStr = "StorageFactory._generateStorageObject: Failed to instantiate %s: %s" % (
                storageName, x)
            gLogger.exception(errStr)
            return S_ERROR(errStr)

        return S_OK(storage)
コード例 #11
0
class StrategyHandler(object):
    """
  .. class:: StrategyHandler

  StrategyHandler is a helper class for determining optimal replication tree for given
  source files, their replicas and target storage elements.
  """
    def __init__(self,
                 configSection,
                 bandwidths=None,
                 channels=None,
                 failedFiles=None):
        """c'tor

    :param self: self reference
    :param str configSection: path on CS to ReplicationScheduler agent
    :param bandwithds: observed throughput on active channels
    :param channels: active channels
    :param int failedFiles: max number of distinct failed files to allow scheduling
    """
        ## save config section
        self.configSection = configSection + "/" + self.__class__.__name__
        ## sublogger
        self.log = gLogger.getSubLogger("StrategyHandler", child=True)
        self.log.setLevel(
            gConfig.getValue(self.configSection + "/LogLevel", "DEBUG"))

        self.supportedStrategies = [
            'Simple', 'DynamicThroughput', 'Swarm', 'MinimiseTotalWait'
        ]
        self.log.debug("Supported strategies = %s" %
                       ", ".join(self.supportedStrategies))

        self.sigma = gConfig.getValue(self.configSection + '/HopSigma', 0.0)
        self.log.debug("HopSigma = %s" % self.sigma)
        self.schedulingType = gConfig.getValue(
            self.configSection + '/SchedulingType', 'File')
        self.log.debug("SchedulingType = %s" % self.schedulingType)
        self.activeStrategies = gConfig.getValue(
            self.configSection + '/ActiveStrategies', ['MinimiseTotalWait'])
        self.log.debug("ActiveStrategies = %s" %
                       ", ".join(self.activeStrategies))
        self.numberOfStrategies = len(self.activeStrategies)
        self.log.debug("Number of active strategies = %s" %
                       self.numberOfStrategies)
        self.acceptableFailureRate = gConfig.getValue(
            self.configSection + '/AcceptableFailureRate', 75)
        self.log.debug("AcceptableFailureRate = %s" %
                       self.acceptableFailureRate)
        self.acceptableFailedFiles = gConfig.getValue(
            self.configSection + "/AcceptableFailedFiles", 5)
        self.log.debug("AcceptableFailedFiles = %s" %
                       self.acceptableFailedFiles)

        self.bandwidths = bandwidths if bandwidths else {}
        self.channels = channels if channels else {}
        self.failedFiles = failedFiles if failedFiles else {}
        self.chosenStrategy = 0

        # dispatcher
        self.strategyDispatcher = {
            re.compile("MinimiseTotalWait"): self.__minimiseTotalWait,
            re.compile("DynamicThroughput"): self.__dynamicThroughput,
            re.compile("Simple"): self.__simple,
            re.compile("Swarm"): self.__swarm
        }

        self.resourceStatus = ResourceStatus()

        self.log.debug("strategyDispatcher entries:")
        for key, value in self.strategyDispatcher.items():
            self.log.debug("%s : %s" % (key.pattern, value.__name__))

        self.log.debug("%s has been constructed" % self.__class__.__name__)

    def reset(self):
        """ reset :chosenStrategy: 

    :param self: self reference
    """
        self.chosenStrategy = 0

    def setFailedFiles(self, failedFiles):
        """ set the failed FTS files counters

    :param self: self reference
    :param failedFiles: observed distinct failed files
    """
        self.failedFiles = failedFiles if failedFiles else {}

    def setBandwiths(self, bandwidths):
        """ set the bandwidths 

    :param self: self reference
    :param bandwithds: observed througput of active FTS channels
    """

        self.bandwidths = bandwidths if bandwidths else {}

    def setChannels(self, channels):
        """ set the channels
    
    :param self: self reference
    :param channels: active channels queues
    """
        self.channels = channels if channels else {}

    def getSupportedStrategies(self):
        """ Get supported strategies.

    :param self: self reference
    """
        return self.supportedStrategies

    def determineReplicationTree(self,
                                 sourceSE,
                                 targetSEs,
                                 replicas,
                                 size,
                                 strategy=None,
                                 sigma=None):
        """ resolve and find replication tree given source and target storage elements, active replicas, 
    and file size.

    :param self: self reference
    :param str sourceSE: source storage element name
    :param list targetSEs: list of target storage elements
    :param dict replicas: active replicas
    :param int size: fiel size
    :param str strategy: strategy to use
    :param float sigma: hop sigma
    """
        if not strategy:
            strategy = self.__selectStrategy()
        self.log.debug("determineReplicationTree: will use %s strategy" %
                       strategy)

        if sigma:
            self.log.debug("determineReplicationTree: sigma = %s" % sigma)
            self.sigma = sigma

        # For each strategy implemented an 'if' must be placed here
        tree = {}
        for reStrategy in self.strategyDispatcher:
            self.log.debug(reStrategy.pattern)
            if reStrategy.search(strategy):
                if "_" in strategy:
                    try:
                        self.sigma = float(strategy.split("_")[1])
                        self.log.debug(
                            "determineReplicationTree: new sigma %s" %
                            self.sigma)
                    except ValueError:
                        self.log.warn(
                            "determineReplicationTree: can't set new sigma value from '%s'"
                            % strategy)
                if reStrategy.pattern in [
                        "MinimiseTotalWait", "DynamicThroughput"
                ]:
                    replicasToUse = replicas.keys() if sourceSE == None else [
                        sourceSE
                    ]
                    tree = self.strategyDispatcher[reStrategy].__call__(
                        replicasToUse, targetSEs)
                elif reStrategy.pattern == "Simple":
                    if not sourceSE in replicas.keys():
                        return S_ERROR(
                            "File does not exist at specified source site")
                    tree = self.__simple(sourceSE, targetSEs)
                elif reStrategy.pattern == "Swarm":
                    tree = self.__swarm(targetSEs[0], replicas.keys())

        # Now update the queues to reflect the chosen strategies
        for channelID in tree:
            self.channels[channelID]["Files"] += 1
            self.channels[channelID]["Size"] += size

        return S_OK(tree)

    def __selectStrategy(self):
        """ If more than one active strategy use one after the other.

    :param self: self reference
    """
        chosenStrategy = self.activeStrategies[self.chosenStrategy]
        self.chosenStrategy += 1
        if self.chosenStrategy == self.numberOfStrategies:
            self.chosenStrategy = 0
        return chosenStrategy

    def __simple(self, sourceSE, destSEs):
        """ This just does a simple replication from the source to all the targets.

    :param self: self reference
    :param str sourceSE: source storage element name
    :param list destSEs: destination storage elements  
    """
        tree = {}
        if not self.__getActiveSEs([sourceSE]):
            return tree
        sourceSites = self.__getChannelSitesForSE(sourceSE)
        for destSE in destSEs:
            destSites = self.__getChannelSitesForSE(destSE)
            for channelID, channelDict in self.channels.items():
                if channelID in tree:
                    continue
                if channelDict["Source"] in sourceSites and channelDict[
                        "Destination"] in destSites:
                    tree[channelID] = {
                        "Ancestor": False,
                        "SourceSE": sourceSE,
                        "DestSE": destSE,
                        "Strategy": "Simple"
                    }
        return tree

    def __swarm(self, destSE, replicas):
        """ This strategy is to be used to the data the the target site as quickly as possible from any source.

    :param self: self reference
    :param str destSE: destination storage element
    :param list replicas: replicas dictionary keys
    """
        tree = {}
        res = self.__getTimeToStart()
        if not res["OK"]:
            self.log.error(res["Message"])
            return tree
        channelInfo = res["Value"]
        minTimeToStart = float("inf")

        sourceSEs = self.__getActiveSEs(replicas)
        destSites = self.__getChannelSitesForSE(destSE)

        selectedChannelID = None
        selectedSourceSE = None
        selectedDestSE = None

        for destSite in destSites:
            for sourceSE in sourceSEs:
                for sourceSite in self.__getChannelSitesForSE(sourceSE):
                    channelName = "%s-%s" % (sourceSite, destSite)
                    if channelName not in channelInfo:
                        errStr = "__swarm: Channel not defined"
                        self.log.warn(errStr, channelName)
                        continue
                    channelTimeToStart = channelInfo[channelName][
                        "TimeToStart"]
                    if channelTimeToStart <= minTimeToStart:
                        minTimeToStart = channelTimeToStart
                        selectedSourceSE = sourceSE
                        selectedDestSE = destSE
                        selectedChannelID = channelInfo[channelName][
                            "ChannelID"]

        if selectedChannelID and selectedSourceSE and selectedDestSE:
            tree[selectedChannelID] = {
                "Ancestor": False,
                "SourceSE": selectedSourceSE,
                "DestSE": selectedDestSE,
                "Strategy": "Swarm"
            }
        return tree

    def __dynamicThroughput(self, sourceSEs, destSEs):
        """ This creates a replication tree based on observed throughput on the channels.

    :param self: self reference
    :param list sourceSEs: source storage elements names
    :param list destSEs: destination storage elements names
    """
        tree = {}
        res = self.__getTimeToStart()
        if not res["OK"]:
            self.log.error(res["Message"])
            return tree
        channelInfo = res["Value"]

        timeToSite = {}  # Maintains time to site including previous hops
        siteAncestor = {}  # Maintains the ancestor channel for a site

        while len(destSEs) > 0:
            try:
                minTotalTimeToStart = float("inf")
                candidateChannels = []
                sourceActiveSEs = self.__getActiveSEs(sourceSEs)
                for destSE in destSEs:
                    destSites = self.__getChannelSitesForSE(destSE)
                    for destSite in destSites:
                        for sourceSE in sourceActiveSEs:
                            sourceSites = self.__getChannelSitesForSE(sourceSE)
                            for sourceSite in sourceSites:
                                channelName = "%s-%s" % (sourceSite, destSite)
                                if channelName not in channelInfo:
                                    self.log.warn(
                                        "dynamicThroughput: bailing out! channel %s not defined "
                                        % channelName)
                                    raise StrategyHandlerChannelNotDefined(
                                        channelName)

                                channelID = channelInfo[channelName][
                                    "ChannelID"]
                                if channelID in tree:
                                    continue
                                channelTimeToStart = channelInfo[channelName][
                                    "TimeToStart"]

                                totalTimeToStart = channelTimeToStart
                                if sourceSE in timeToSite:
                                    totalTimeToStart += timeToSite[
                                        sourceSE] + self.sigma

                                if (sourceSite == destSite):
                                    selectedPathTimeToStart = totalTimeToStart
                                    candidateChannels = [(sourceSE, destSE,
                                                          channelID)]
                                    raise StrategyHandlerLocalFound(
                                        candidateChannels)

                                if totalTimeToStart < minTotalTimeToStart:
                                    minTotalTimeToStart = totalTimeToStart
                                    selectedPathTimeToStart = totalTimeToStart
                                    candidateChannels = [(sourceSE, destSE,
                                                          channelID)]
                                elif totalTimeToStart == minTotalTimeToStart and totalTimeToStart < float(
                                        "inf"):
                                    minTotalTimeToStart = totalTimeToStart
                                    selectedPathTimeToStart = totalTimeToStart
                                    candidateChannels.append(
                                        (sourceSE, destSE, channelID))

            except StrategyHandlerLocalFound:
                pass

            random.shuffle(candidateChannels)
            selectedSourceSE, selectedDestSE, selectedChannelID = candidateChannels[
                0]
            timeToSite[selectedDestSE] = selectedPathTimeToStart
            siteAncestor[selectedDestSE] = selectedChannelID

            waitingChannel = False if selectedSourceSE not in siteAncestor else siteAncestor[
                selectedSourceSE]

            tree[selectedChannelID] = {
                "Ancestor": waitingChannel,
                "SourceSE": selectedSourceSE,
                "DestSE": selectedDestSE,
                "Strategy": "DynamicThroughput"
            }
            sourceSEs.append(selectedDestSE)
            destSEs.remove(selectedDestSE)
        return tree

    def __minimiseTotalWait(self, sourceSEs, destSEs):
        """ This creates a replication tree based on observed throughput on the channels.

    :param self: self reference
    :param list sourceSEs: source storage elements names
    :param list destSEs: destination storage elements names
    """

        self.log.debug("sourceSEs = %s" % sourceSEs)
        self.log.debug("destSEs = %s" % destSEs)

        tree = {}
        res = self.__getTimeToStart()
        if not res["OK"]:
            self.log.error(res["Message"])
            return tree
        channelInfo = res["Value"]

        timeToSite = {}  # Maintains time to site including previous hops
        siteAncestor = {}  # Maintains the ancestor channel for a site
        primarySources = sourceSEs

        while destSEs:
            try:
                minTotalTimeToStart = float("inf")
                candidateChannels = []
                sourceActiveSEs = self.__getActiveSEs(sourceSEs)
                for destSE in destSEs:
                    destSites = self.__getChannelSitesForSE(destSE)
                    for destSite in destSites:
                        for sourceSE in sourceActiveSEs:
                            sourceSites = self.__getChannelSitesForSE(sourceSE)
                            for sourceSite in sourceSites:
                                channelName = "%s-%s" % (sourceSite, destSite)

                                if channelName not in channelInfo:
                                    continue

                                channelID = channelInfo[channelName][
                                    "ChannelID"]
                                # If this channel is already used, look for another sourceSE
                                if channelID in tree:
                                    continue
                                channelTimeToStart = channelInfo[channelName][
                                    "TimeToStart"]
                                if not sourceSE in primarySources:
                                    channelTimeToStart += self.sigma
                                ## local transfer found
                                if sourceSite == destSite:
                                    selectedPathTimeToStart = channelTimeToStart
                                    candidateChannels = [(sourceSE, destSE,
                                                          channelID)]
                                    ## bail out to save rainforests
                                    raise StrategyHandlerLocalFound(
                                        candidateChannels)
                                if channelTimeToStart < minTotalTimeToStart:
                                    minTotalTimeToStart = channelTimeToStart
                                    selectedPathTimeToStart = channelTimeToStart
                                    candidateChannels = [(sourceSE, destSE,
                                                          channelID)]
                                elif channelTimeToStart == minTotalTimeToStart and channelTimeToStart != float(
                                        "inf"):
                                    minTotalTimeToStart = channelTimeToStart
                                    selectedPathTimeToStart = channelTimeToStart
                                    candidateChannels.append(
                                        (sourceSE, destSE, channelID))

            except StrategyHandlerLocalFound:
                pass

            if not candidateChannels:
                return tree

            ## shuffle candidates and pick the 1st one
            random.shuffle(candidateChannels)
            selectedSourceSE, selectedDestSE, selectedChannelID = candidateChannels[
                0]
            timeToSite[selectedDestSE] = selectedPathTimeToStart
            siteAncestor[selectedDestSE] = selectedChannelID
            waitingChannel = False if selectedSourceSE not in siteAncestor else siteAncestor[
                selectedSourceSE]

            tree[selectedChannelID] = {
                "Ancestor": waitingChannel,
                "SourceSE": selectedSourceSE,
                "DestSE": selectedDestSE,
                "Strategy": "MinimiseTotalWait"
            }
            sourceSEs.append(selectedDestSE)
            destSEs.remove(selectedDestSE)

        return tree

    def __getTimeToStart(self):
        """ Generate the dictionary of times to start based on task queue contents and observed throughput.

    :param self: self reference
    """

        if self.schedulingType not in ("File", "Throughput"):
            errStr = "__getTimeToStart: CS SchedulingType entry must be either 'File' or 'Throughput'"
            self.log.error(errStr)
            return S_ERROR(errStr)

        channelInfo = {}
        for channelID, bandwidth in self.bandwidths.items():

            channelDict = self.channels[channelID]
            channelName = channelDict["ChannelName"]

            # initial equal 0.0
            timeToStart = 0.0

            channelStatus = channelDict["Status"]

            ## channel is active?
            if channelStatus == "Active":

                channelFileSuccess = bandwidth["SuccessfulFiles"]
                channelFileFailed = bandwidth["FailedFiles"]
                attempted = channelFileSuccess + channelFileFailed

                successRate = 100.0
                if attempted != 0:
                    successRate = 100.0 * (channelFileSuccess /
                                           float(attempted))

                ## get distinct failed files counter
                distinctFailedFiles = self.failedFiles.get(channelID, 0)

                ## success rate too low and more than acceptable distinct files are affected?, make channel unattractive
                if (successRate < self.acceptableFailureRate) and (
                        distinctFailedFiles > self.acceptableFailedFiles):
                    timeToStart = float("inf")
                else:

                    ## scheduling type == Throughput
                    transferSpeed = bandwidth["Throughput"]
                    waitingTransfers = channelDict["Size"]

                    ## scheduling type == File, overwrite transferSpeed and waitingTransfer
                    if self.schedulingType == "File":
                        transferSpeed = bandwidth["Fileput"]
                        waitingTransfers = channelDict["Files"]

                    if transferSpeed > 0:
                        timeToStart = waitingTransfers / float(transferSpeed)

            else:
                ## channel not active, make it unattractive
                timeToStart = float("inf")

            channelInfo.setdefault(channelName, {
                "ChannelID": channelID,
                "TimeToStart": timeToStart
            })

        return S_OK(channelInfo)

    def __getActiveSEs(self, seList, access="Read"):
        """Get active storage elements.

    :param self: self reference
    :param list seList: stogare element list
    :param str access: storage element accesss, could be 'Read' (default) or 'Write' 
    """
        res = self.resourceStatus.getStorageElementStatus(seList,
                                                          statusType=access,
                                                          default='Unknown')
        if not res["OK"]:
            return []
        return [
            k for k, v in res["Value"].items()
            if access in v and v[access] in ("Active", "Bad")
        ]

    def __getChannelSitesForSE(self, storageElement):
        """Get sites for given storage element.
    
    :param self: self reference
    :param str storageElement: storage element name
    """
        res = getSitesForSE(storageElement)
        if not res["OK"]:
            return []
        sites = []
        for site in res["Value"]:
            siteName = site.split(".")
            if len(siteName) > 1:
                if not siteName[1] in sites:
                    sites.append(siteName[1])
        return sites
コード例 #12
0
ファイル: StrategyHandler.py プロジェクト: bmb/DIRAC
class StrategyHandler( object ):
  """
  .. class:: StrategyHandler

  StrategyHandler is a helper class for determining optimal replication tree for given
  source files, their replicas and target storage elements.
  """

  def __init__( self, configSection, bandwidths=None, channels=None, failedFiles=None ):
    """c'tor

    :param self: self reference
    :param str configSection: path on CS to ReplicationScheduler agent
    :param bandwithds: observed throughput on active channels
    :param channels: active channels
    :param int failedFiles: max number of distinct failed files to allow scheduling
    """
    ## save config section
    self.configSection = configSection + "/" + self.__class__.__name__
    ## sublogger
    self.log = gLogger.getSubLogger( "StrategyHandler", child=True )
    self.log.setLevel( gConfig.getValue( self.configSection + "/LogLevel", "DEBUG"  ) )
  
    self.supportedStrategies = [ 'Simple', 'DynamicThroughput', 'Swarm', 'MinimiseTotalWait' ]
    self.log.debug( "Supported strategies = %s" % ", ".join( self.supportedStrategies ) )
  
    self.sigma = gConfig.getValue( self.configSection + '/HopSigma', 0.0 )
    self.log.debug( "HopSigma = %s" % self.sigma )
    self.schedulingType = gConfig.getValue( self.configSection + '/SchedulingType', 'File' )
    self.log.debug( "SchedulingType = %s" % self.schedulingType )
    self.activeStrategies = gConfig.getValue( self.configSection + '/ActiveStrategies', ['MinimiseTotalWait'] )
    self.log.debug( "ActiveStrategies = %s" % ", ".join( self.activeStrategies ) )
    self.numberOfStrategies = len( self.activeStrategies )
    self.log.debug( "Number of active strategies = %s" % self.numberOfStrategies )
    self.acceptableFailureRate = gConfig.getValue( self.configSection + '/AcceptableFailureRate', 75 )
    self.log.debug( "AcceptableFailureRate = %s" % self.acceptableFailureRate )
    self.acceptableFailedFiles = gConfig.getValue( self.configSection + "/AcceptableFailedFiles", 5 )
    self.log.debug( "AcceptableFailedFiles = %s" % self.acceptableFailedFiles )

    self.bandwidths = bandwidths if bandwidths else {}
    self.channels = channels if channels else {}
    self.failedFiles = failedFiles if failedFiles else {}
    self.chosenStrategy = 0

    # dispatcher
    self.strategyDispatcher = { re.compile("MinimiseTotalWait") : self.__minimiseTotalWait, 
                                re.compile("DynamicThroughput") : self.__dynamicThroughput,
                                re.compile("Simple") : self.__simple, 
                                re.compile("Swarm") : self.__swarm }

    self.resourceStatus = ResourceStatus()

    self.log.debug( "strategyDispatcher entries:" )
    for key, value in self.strategyDispatcher.items():
      self.log.debug( "%s : %s" % ( key.pattern, value.__name__ ) )

    self.log.debug("%s has been constructed" % self.__class__.__name__ )

  def reset( self ):
    """ reset :chosenStrategy: 

    :param self: self reference
    """
    self.chosenStrategy = 0

  def setFailedFiles( self, failedFiles ):
    """ set the failed FTS files counters

    :param self: self reference
    :param failedFiles: observed distinct failed files
    """
    self.failedFiles = failedFiles if failedFiles else {}

  def setBandwiths( self, bandwidths ):
    """ set the bandwidths 

    :param self: self reference
    :param bandwithds: observed througput of active FTS channels
    """
  
    self.bandwidths = bandwidths if bandwidths else {}

  def setChannels( self, channels ):
    """ set the channels
    
    :param self: self reference
    :param channels: active channels queues
    """
    self.channels = channels if channels else {}

  def getSupportedStrategies( self ):
    """ Get supported strategies.

    :param self: self reference
    """    
    return self.supportedStrategies

  def determineReplicationTree( self, sourceSE, targetSEs, replicas, size, strategy = None, sigma = None ):
    """ resolve and find replication tree given source and target storage elements, active replicas, 
    and file size.

    :param self: self reference
    :param str sourceSE: source storage element name
    :param list targetSEs: list of target storage elements
    :param dict replicas: active replicas
    :param int size: fiel size
    :param str strategy: strategy to use
    :param float sigma: hop sigma
    """
    if not strategy:
      strategy = self.__selectStrategy()
    self.log.debug( "determineReplicationTree: will use %s strategy"  % strategy )

    if sigma:
      self.log.debug( "determineReplicationTree: sigma = %s"  % sigma )
      self.sigma = sigma

    # For each strategy implemented an 'if' must be placed here 
    tree = {}
    for reStrategy in self.strategyDispatcher:
      self.log.debug( reStrategy.pattern )
      if reStrategy.search( strategy ):
        if "_" in strategy:
          try:
            self.sigma = float(strategy.split("_")[1])
            self.log.debug("determineReplicationTree: new sigma %s" % self.sigma )
          except ValueError:
            self.log.warn("determineReplicationTree: can't set new sigma value from '%s'" % strategy )
        if reStrategy.pattern in [ "MinimiseTotalWait", "DynamicThroughput" ]:
          replicasToUse = replicas.keys() if sourceSE == None else [ sourceSE ]
          tree = self.strategyDispatcher[ reStrategy ].__call__( replicasToUse, targetSEs  )
        elif reStrategy.pattern == "Simple":
          if not sourceSE in replicas.keys():
            return S_ERROR( "File does not exist at specified source site" )
          tree = self.__simple( sourceSE, targetSEs )
        elif reStrategy.pattern == "Swarm":
          tree = self.__swarm( targetSEs[0], replicas.keys() )
      
    # Now update the queues to reflect the chosen strategies
    for channelID in tree:
      self.channels[channelID]["Files"] += 1
      self.channels[channelID]["Size"] += size

    return S_OK( tree )

  def __selectStrategy( self ):
    """ If more than one active strategy use one after the other.

    :param self: self reference
    """
    chosenStrategy = self.activeStrategies[self.chosenStrategy]
    self.chosenStrategy += 1
    if self.chosenStrategy == self.numberOfStrategies:
      self.chosenStrategy = 0
    return chosenStrategy

  def __simple( self, sourceSE, destSEs ):
    """ This just does a simple replication from the source to all the targets.

    :param self: self reference
    :param str sourceSE: source storage element name
    :param list destSEs: destination storage elements  
    """
    tree = {}
    if not self.__getActiveSEs( [ sourceSE ] ):
      return tree
    sourceSites = self.__getChannelSitesForSE( sourceSE )
    for destSE in destSEs:
      destSites = self.__getChannelSitesForSE( destSE )
      for channelID, channelDict in self.channels.items():
        if channelID in tree: 
          continue
        if channelDict["Source"] in sourceSites and channelDict["Destination"] in destSites:
          tree[channelID] = { "Ancestor" : False, 
                              "SourceSE" : sourceSE, 
                              "DestSE" : destSE,
                              "Strategy" : "Simple" }
    return tree

  def __swarm( self, destSE, replicas ):
    """ This strategy is to be used to the data the the target site as quickly as possible from any source.

    :param self: self reference
    :param str destSE: destination storage element
    :param list replicas: replicas dictionary keys
    """
    tree = {}
    res = self.__getTimeToStart()
    if not res["OK"]:
      self.log.error( res["Message"] )
      return tree
    channelInfo = res["Value"]
    minTimeToStart = float( "inf" )

    sourceSEs = self.__getActiveSEs( replicas )
    destSites = self.__getChannelSitesForSE( destSE )

    selectedChannelID = None
    selectedSourceSE = None
    selectedDestSE = None

    for destSite in destSites:
      for sourceSE in sourceSEs:
        for sourceSite in self.__getChannelSitesForSE( sourceSE ):
          channelName = "%s-%s" % ( sourceSite, destSite )
          if channelName not in channelInfo:
            errStr = "__swarm: Channel not defined"
            self.log.warn( errStr, channelName )
            continue
          channelTimeToStart = channelInfo[channelName]["TimeToStart"]
          if channelTimeToStart <= minTimeToStart:
            minTimeToStart = channelTimeToStart
            selectedSourceSE = sourceSE
            selectedDestSE = destSE
            selectedChannelID = channelInfo[channelName]["ChannelID"]
         
    if selectedChannelID and selectedSourceSE and selectedDestSE:
      tree[selectedChannelID] = { "Ancestor" : False,
                                  "SourceSE" : selectedSourceSE,
                                  "DestSE" : selectedDestSE,
                                  "Strategy" : "Swarm" }
    return tree

  def __dynamicThroughput( self, sourceSEs, destSEs ):
    """ This creates a replication tree based on observed throughput on the channels.

    :param self: self reference
    :param list sourceSEs: source storage elements names
    :param list destSEs: destination storage elements names
    """
    tree = {}
    res = self.__getTimeToStart()
    if not res["OK"]:
      self.log.error( res["Message"] )
      return tree
    channelInfo = res["Value"]

    timeToSite = {}   # Maintains time to site including previous hops
    siteAncestor = {} # Maintains the ancestor channel for a site

    while len( destSEs ) > 0:
      try:
        minTotalTimeToStart = float( "inf" )
        candidateChannels = []
        sourceActiveSEs = self.__getActiveSEs( sourceSEs )
        for destSE in destSEs:
          destSites = self.__getChannelSitesForSE( destSE )
          for destSite in destSites:
            for sourceSE in sourceActiveSEs:
              sourceSites = self.__getChannelSitesForSE( sourceSE )
              for sourceSite in sourceSites:
                channelName = "%s-%s" % ( sourceSite, destSite )
                if channelName not in channelInfo:
                  self.log.warn( "dynamicThroughput: bailing out! channel %s not defined " % channelName )
                  raise StrategyHandlerChannelNotDefined( channelName )

                channelID = channelInfo[channelName]["ChannelID"]
                if channelID in tree:
                  continue
                channelTimeToStart = channelInfo[channelName]["TimeToStart"]

                totalTimeToStart = channelTimeToStart
                if sourceSE in timeToSite:
                  totalTimeToStart += timeToSite[sourceSE] + self.sigma
                  
                if ( sourceSite == destSite ) :
                  selectedPathTimeToStart = totalTimeToStart
                  candidateChannels = [ ( sourceSE, destSE, channelID ) ]
                  raise StrategyHandlerLocalFound( candidateChannels )

                if totalTimeToStart < minTotalTimeToStart:
                  minTotalTimeToStart = totalTimeToStart
                  selectedPathTimeToStart = totalTimeToStart
                  candidateChannels = [ ( sourceSE, destSE, channelID ) ]
                elif totalTimeToStart == minTotalTimeToStart and totalTimeToStart < float("inf"):
                  minTotalTimeToStart = totalTimeToStart
                  selectedPathTimeToStart = totalTimeToStart
                  candidateChannels.append( ( sourceSE, destSE, channelID ) )
               
      except StrategyHandlerLocalFound:
        pass

      random.shuffle( candidateChannels )
      selectedSourceSE, selectedDestSE, selectedChannelID = candidateChannels[0]
      timeToSite[selectedDestSE] = selectedPathTimeToStart
      siteAncestor[selectedDestSE] = selectedChannelID
      
      waitingChannel = False if selectedSourceSE not in siteAncestor else siteAncestor[selectedSourceSE]
    
      tree[selectedChannelID] = { "Ancestor" : waitingChannel,
                                  "SourceSE" : selectedSourceSE,
                                  "DestSE" : selectedDestSE,
                                  "Strategy" : "DynamicThroughput" }
      sourceSEs.append( selectedDestSE )
      destSEs.remove( selectedDestSE )
    return tree

  def __minimiseTotalWait( self, sourceSEs, destSEs ):
    """ This creates a replication tree based on observed throughput on the channels.

    :param self: self reference
    :param list sourceSEs: source storage elements names
    :param list destSEs: destination storage elements names
    """

    self.log.debug( "sourceSEs = %s" % sourceSEs )
    self.log.debug( "destSEs = %s" % destSEs )
    
    tree = {}
    res = self.__getTimeToStart()
    if not res["OK"]:
      self.log.error( res["Message"] )
      return tree
    channelInfo = res["Value"]

    timeToSite = {}                # Maintains time to site including previous hops
    siteAncestor = {}              # Maintains the ancestor channel for a site
    primarySources = sourceSEs

    while destSEs:
      try:
        minTotalTimeToStart = float( "inf" )
        candidateChannels = []
        sourceActiveSEs = self.__getActiveSEs( sourceSEs )
        for destSE in destSEs:
          destSites = self.__getChannelSitesForSE( destSE )
          for destSite in destSites:
            for sourceSE in sourceActiveSEs:
              sourceSites = self.__getChannelSitesForSE( sourceSE )
              for sourceSite in sourceSites:
                channelName = "%s-%s" % ( sourceSite, destSite )

                if channelName not in channelInfo:
                  continue
                
                channelID = channelInfo[channelName]["ChannelID"]
                # If this channel is already used, look for another sourceSE
                if channelID in tree:
                  continue
                channelTimeToStart = channelInfo[channelName]["TimeToStart"]
                if not sourceSE in primarySources:
                  channelTimeToStart += self.sigma
                ## local transfer found
                if sourceSite == destSite:
                  selectedPathTimeToStart = channelTimeToStart
                  candidateChannels = [ ( sourceSE, destSE, channelID ) ]
                  ## bail out to save rainforests
                  raise StrategyHandlerLocalFound( candidateChannels )
                if channelTimeToStart < minTotalTimeToStart:
                  minTotalTimeToStart = channelTimeToStart
                  selectedPathTimeToStart = channelTimeToStart
                  candidateChannels = [ ( sourceSE, destSE, channelID ) ]
                elif channelTimeToStart == minTotalTimeToStart and channelTimeToStart != float("inf"):
                  minTotalTimeToStart = channelTimeToStart
                  selectedPathTimeToStart = channelTimeToStart
                  candidateChannels.append( ( sourceSE, destSE, channelID ) )

      except StrategyHandlerLocalFound:
        pass

      if not candidateChannels:
        return tree
      
      ## shuffle candidates and pick the 1st one
      random.shuffle( candidateChannels )
      selectedSourceSE, selectedDestSE, selectedChannelID = candidateChannels[0]
      timeToSite[selectedDestSE] = selectedPathTimeToStart
      siteAncestor[selectedDestSE] = selectedChannelID
      waitingChannel = False if selectedSourceSE not in siteAncestor else siteAncestor[selectedSourceSE]

      tree[selectedChannelID] = { "Ancestor" : waitingChannel,
                                  "SourceSE" : selectedSourceSE,
                                  "DestSE" : selectedDestSE,
                                  "Strategy" : "MinimiseTotalWait" }
      sourceSEs.append( selectedDestSE )
      destSEs.remove( selectedDestSE )
      
    return tree

  def __getTimeToStart( self ):
    """ Generate the dictionary of times to start based on task queue contents and observed throughput.

    :param self: self reference
    """

    if self.schedulingType not in ( "File", "Throughput" ):
      errStr = "__getTimeToStart: CS SchedulingType entry must be either 'File' or 'Throughput'"
      self.log.error( errStr )
      return S_ERROR( errStr )

    channelInfo = {}
    for channelID, bandwidth in self.bandwidths.items():

      channelDict = self.channels[channelID] 
      channelName = channelDict["ChannelName"]

      # initial equal 0.0
      timeToStart = 0.0

      channelStatus = channelDict["Status"]

      ## channel is active?
      if channelStatus == "Active":
        
        channelFileSuccess = bandwidth["SuccessfulFiles"]
        channelFileFailed = bandwidth["FailedFiles"]
        attempted = channelFileSuccess + channelFileFailed
        

        successRate = 100.0
        if attempted != 0:
          successRate = 100.0 * ( channelFileSuccess / float( attempted ) )
    
        ## get distinct failed files counter
        distinctFailedFiles = self.failedFiles.get( channelID, 0 )      
    
        ## success rate too low and more than acceptable distinct files are affected?, make channel unattractive
        if ( successRate < self.acceptableFailureRate ) and ( distinctFailedFiles > self.acceptableFailedFiles ):
          timeToStart = float( "inf" ) 
        else:

          ## scheduling type == Throughput
          transferSpeed = bandwidth["Throughput"] 
          waitingTransfers = channelDict["Size"]

          ## scheduling type == File, overwrite transferSpeed and waitingTransfer
          if self.schedulingType == "File":
            transferSpeed = bandwidth["Fileput"] 
            waitingTransfers = channelDict["Files"]

          if transferSpeed > 0:
            timeToStart = waitingTransfers / float( transferSpeed )
            
      else:
        ## channel not active, make it unattractive
        timeToStart = float( "inf" ) 

      channelInfo.setdefault( channelName, { "ChannelID" : channelID, 
                                             "TimeToStart" : timeToStart } )

    return S_OK( channelInfo )

  def __getActiveSEs( self, seList, access = "Read" ):
    """Get active storage elements.

    :param self: self reference
    :param list seList: stogare element list
    :param str access: storage element accesss, could be 'Read' (default) or 'Write' 
    """
    res = self.resourceStatus.getStorageElementStatus( seList, statusType = access, default = 'Unknown' )
    if not res["OK"]:
      return []
    return [ k for k, v in res["Value"].items() if access in v and v[access] in ( "Active", "Bad" ) ]
   
  def __getChannelSitesForSE( self, storageElement ):
    """Get sites for given storage element.
    
    :param self: self reference
    :param str storageElement: storage element name
    """
    res = getSitesForSE( storageElement )
    if not res["OK"]:
      return []
    sites = []
    for site in res["Value"]:
      siteName = site.split( "." )
      if len( siteName ) > 1:
        if not siteName[1] in sites:
          sites.append( siteName[1] )
    return sites
コード例 #13
0
ファイル: StorageFactory.py プロジェクト: coberger/DIRAC
class StorageFactory:

  def __init__( self, useProxy = False, vo = None ):

    self.rootConfigPath = '/Resources/StorageElements'
    self.valid = True
    self.proxy = False
    self.proxy = useProxy
    self.resourceStatus = ResourceStatus()
    self.vo = vo

  ###########################################################################################
  #
  # Below are public methods for obtaining storage objects
  #

  def getStorageName( self, initialName ):
    return self._getConfigStorageName( initialName )

  def getStorage( self, parameterDict ):
    """ This instantiates a single storage for the details provided and doesn't check the CS.
    """
    # The storage name must be supplied.
    if parameterDict.has_key( 'StorageName' ):
      storageName = parameterDict['StorageName']
    else:
      errStr = "StorageFactory.getStorage: StorageName must be supplied"
      gLogger.error( errStr )
      return S_ERROR( errStr )

    # ProtocolName must be supplied otherwise nothing with work.
    if parameterDict.has_key( 'ProtocolName' ):
      protocolName = parameterDict['ProtocolName']
    else:
      errStr = "StorageFactory.getStorage: ProtocolName must be supplied"
      gLogger.error( errStr )
      return S_ERROR( errStr )

    # The other options need not always be specified
    if parameterDict.has_key( 'Protocol' ):
      protocol = parameterDict['Protocol']
    else:
      protocol = ''

    if parameterDict.has_key( 'Port' ):
      port = parameterDict['Port']
    else:
      port = ''

    if parameterDict.has_key( 'Host' ):
      host = parameterDict['Host']
    else:
      host = ''

    if parameterDict.has_key( 'Path' ):
      path = parameterDict['Path']
    else:
      path = ''

    if parameterDict.has_key( 'SpaceToken' ):
      spaceToken = parameterDict['SpaceToken']
    else:
      spaceToken = ''

    if parameterDict.has_key( 'WSUrl' ):
      wsPath = parameterDict['WSUrl']
    else:
      wsPath = ''

    return self.__generateStorageObject( storageName, protocolName, protocol, path, host, port, spaceToken, wsPath, parameterDict )


  def getStorages( self, storageName, protocolList = [] ):
    """ Get an instance of a Storage based on the DIRAC SE name based on the CS entries CS

        'storageName' is the DIRAC SE name i.e. 'CERN-RAW'
        'protocolList' is an optional list of protocols if a sub-set is desired i.e ['SRM2','SRM1']
    """
    self.remoteProtocols = []
    self.localProtocols = []
    self.name = ''
    self.options = {}
    self.protocolDetails = []
    self.storages = []

    # Get the name of the storage provided
    res = self._getConfigStorageName( storageName )
    if not res['OK']:
      self.valid = False
      return res
    storageName = res['Value']
    self.name = storageName

    # Get the options defined in the CS for this storage
    res = self._getConfigStorageOptions( storageName )
    if not res['OK']:
      self.valid = False
      return res
    self.options = res['Value']

    # Get the protocol specific details
    res = self._getConfigStorageProtocols( storageName )
    if not res['OK']:
      self.valid = False
      return res
    self.protocolDetails = res['Value']

    requestedLocalProtocols = []
    requestedRemoteProtocols = []
    requestedProtocolDetails = []
    turlProtocols = []
    # Generate the protocol specific plug-ins
    self.storages = []
    for protocolDict in self.protocolDetails:
      protocolName = protocolDict['ProtocolName']
      protocolRequested = True
      if protocolList:
        if protocolName not in protocolList:
          protocolRequested = False
      if protocolRequested:
        protocol = protocolDict['Protocol']
        host = protocolDict['Host']
        path = protocolDict['Path']
        port = protocolDict['Port']
        spaceToken = protocolDict['SpaceToken']
        wsUrl = protocolDict['WSUrl']
        res = self.__generateStorageObject( storageName, protocolName, protocol,
                                            path = path, host = host, port = port,
                                            spaceToken = spaceToken, wsUrl = wsUrl,
                                            parameters = protocolDict )
        if res['OK']:
          self.storages.append( res['Value'] )
          if protocolName in self.localProtocols:
            turlProtocols.append( protocol )
            requestedLocalProtocols.append( protocolName )
          if protocolName in self.remoteProtocols:
            requestedRemoteProtocols.append( protocolName )
          requestedProtocolDetails.append( protocolDict )
        else:
          gLogger.info( res['Message'] )

    if len( self.storages ) > 0:
      resDict = {}
      resDict['StorageName'] = self.name
      resDict['StorageOptions'] = self.options
      resDict['StorageObjects'] = self.storages
      resDict['LocalProtocols'] = requestedLocalProtocols
      resDict['RemoteProtocols'] = requestedRemoteProtocols
      resDict['ProtocolOptions'] = requestedProtocolDetails
      resDict['TurlProtocols'] = turlProtocols
      return S_OK( resDict )
    else:
      errStr = "StorageFactory.getStorages: Failed to instantiate any storage protocols."
      gLogger.error( errStr, self.name )
      return S_ERROR( errStr )
  ###########################################################################################
  #
  # Below are internal methods for obtaining section/option/value configuration
  #

  def _getConfigStorageName( self, storageName ):
    """
      This gets the name of the storage the configuration service.
      If the storage is an alias for another the resolution is performed.

      'storageName' is the storage section to check in the CS
    """
    configPath = '%s/%s' % ( self.rootConfigPath, storageName )
    res = gConfig.getOptions( configPath )
    if not res['OK']:
      errStr = "StorageFactory._getConfigStorageName: Failed to get storage options"
      gLogger.error( errStr, res['Message'] )
      return S_ERROR( errStr )
    if not res['Value']:
      errStr = "StorageFactory._getConfigStorageName: Supplied storage doesn't exist."
      gLogger.error( errStr, configPath )
      return S_ERROR( errStr )
    if 'Alias' in res['Value']:
      configPath = '%s/%s/Alias' % ( self.rootConfigPath, storageName )
      aliasName = gConfig.getValue( configPath )
      result = self._getConfigStorageName( aliasName )
      if not result['OK']:
        errStr = "StorageFactory._getConfigStorageName: Supplied storage doesn't exist."
        gLogger.error( errStr, configPath )
        return S_ERROR( errStr )
      resolvedName = result['Value']
    else:
      resolvedName = storageName
    return S_OK( resolvedName )

  def _getConfigStorageOptions( self, storageName ):
    """ Get the options associated to the StorageElement as defined in the CS
    """
    storageConfigPath = '%s/%s' % ( self.rootConfigPath, storageName )
    res = gConfig.getOptions( storageConfigPath )
    if not res['OK']:
      errStr = "StorageFactory._getStorageOptions: Failed to get storage options."
      gLogger.error( errStr, "%s: %s" % ( storageName, res['Message'] ) )
      return S_ERROR( errStr )
    options = res['Value']
    optionsDict = {}
    for option in options:

      if option in [ 'ReadAccess', 'WriteAccess', 'CheckAccess', 'RemoveAccess']:
        continue
      optionConfigPath = '%s/%s' % ( storageConfigPath, option )
      optionsDict[option] = gConfig.getValue( optionConfigPath, '' )

    res = self.resourceStatus.getStorageElementStatus( storageName )
    if not res[ 'OK' ]:
      errStr = "StorageFactory._getStorageOptions: Failed to get storage status"
      gLogger.error( errStr, "%s: %s" % ( storageName, res['Message'] ) )
      return S_ERROR( errStr )

    # For safety, we did not add the ${statusType}Access keys
    # this requires modifications in the StorageElement class

    # We add the dictionary with the statusTypes and values
    # { 'statusType1' : 'status1', 'statusType2' : 'status2' ... }
    optionsDict.update( res[ 'Value' ][ storageName ] )

    return S_OK( optionsDict )

  def _getConfigStorageProtocols( self, storageName ):
    """ Protocol specific information is present as sections in the Storage configuration
    """
    storageConfigPath = '%s/%s' % ( self.rootConfigPath, storageName )
    res = gConfig.getSections( storageConfigPath )
    if not res['OK']:
      errStr = "StorageFactory._getConfigStorageProtocols: Failed to get storage sections"
      gLogger.error( errStr, "%s: %s" % ( storageName, res['Message'] ) )
      return S_ERROR( errStr )
    protocolSections = res['Value']
    sortedProtocols = sortList( protocolSections )
    protocolDetails = []
    for protocol in sortedProtocols:
      res = self._getConfigStorageProtocolDetails( storageName, protocol )
      if not res['OK']:
        return res
      protocolDetails.append( res['Value'] )
    self.protocols = self.localProtocols + self.remoteProtocols
    return S_OK( protocolDetails )

  def _getConfigStorageProtocolDetails( self, storageName, protocol ):
    """
      Parse the contents of the protocol block
    """
    # First obtain the options that are available
    protocolConfigPath = '%s/%s/%s' % ( self.rootConfigPath, storageName, protocol )
    res = gConfig.getOptions( protocolConfigPath )
    if not res['OK']:
      errStr = "StorageFactory.__getProtocolDetails: Failed to get protocol options."
      gLogger.error( errStr, "%s: %s" % ( storageName, protocol ) )
      return S_ERROR( errStr )
    options = res['Value']

    # We must have certain values internally even if not supplied in CS
    protocolDict = {'Access':'', 'Host':'', 'Path':'', 'Port':'', 'Protocol':'', 'ProtocolName':'', 'SpaceToken':'', 'WSUrl':''}
    for option in options:
      configPath = '%s/%s' % ( protocolConfigPath, option )
      optionValue = gConfig.getValue( configPath, '' )
      protocolDict[option] = optionValue
      
    # Evaluate the base path taking into account possible VO specific setting 
    if self.vo:
      result = gConfig.getOptionsDict( cfgPath( protocolConfigPath, 'VOPath' ) )
      voPath = ''
      if result['OK']:
        voPath = result['Value'].get( self.vo, '' )
      if voPath:
        protocolDict['Path'] = voPath  

    # Now update the local and remote protocol lists.
    # A warning will be given if the Access option is not set.
    if protocolDict['Access'] == 'remote':
      self.remoteProtocols.append( protocolDict['ProtocolName'] )
    elif protocolDict['Access'] == 'local':
      self.localProtocols.append( protocolDict['ProtocolName'] )
    else:
      errStr = "StorageFactory.__getProtocolDetails: The 'Access' option for %s:%s is neither 'local' or 'remote'." % ( storageName, protocol )
      gLogger.warn( errStr )

    # The ProtocolName option must be defined
    if not protocolDict['ProtocolName']:
      errStr = "StorageFactory.__getProtocolDetails: 'ProtocolName' option is not defined."
      gLogger.error( errStr, "%s: %s" % ( storageName, protocol ) )
      return S_ERROR( errStr )
    return S_OK( protocolDict )

  ###########################################################################################
  #
  # Below is the method for obtaining the object instantiated for a provided storage configuration
  #

  def __generateStorageObject( self, storageName, protocolName, protocol, path = None,
                              host = None, port = None, spaceToken = None, wsUrl = None, parameters = {} ):

    storageType = protocolName
    if self.proxy:
      storageType = 'Proxy'

    moduleRootPaths = getInstalledExtensions()
    moduleLoaded = False
    path = path.rstrip( '/' )
    if not path:
      path = '/'
    for moduleRootPath in moduleRootPaths:
      if moduleLoaded:
        break
      gLogger.debug( "Trying to load from root path %s" % moduleRootPath )
      moduleFile = os.path.join( rootPath, moduleRootPath, "Resources", "Storage", "%sStorage.py" % storageType )
      gLogger.debug( "Looking for file %s" % moduleFile )
      if not os.path.isfile( moduleFile ):
        continue
      try:
        # This inforces the convention that the plug in must be named after the protocol
        moduleName = "%sStorage" % ( storageType )
        storageModule = __import__( '%s.Resources.Storage.%s' % ( moduleRootPath, moduleName ),
                                    globals(), locals(), [moduleName] )
      except Exception, x:
        errStr = "StorageFactory._generateStorageObject: Failed to import %s: %s" % ( storageName, x )
        gLogger.exception( errStr )
        return S_ERROR( errStr )

      try:
        evalString = "storageModule.%s(storageName,protocol,path,host,port,spaceToken,wsUrl)" % moduleName
        storage = eval( evalString )
        if not storage.isOK():
          errStr = "StorageFactory._generateStorageObject: Failed to instantiate storage plug in."
          gLogger.error( errStr, "%s" % ( moduleName ) )
          return S_ERROR( errStr )
      except Exception, x:
        errStr = "StorageFactory._generateStorageObject: Failed to instantiate %s(): %s" % ( moduleName, x )
        gLogger.exception( errStr )
        return S_ERROR( errStr )

      # Set extra parameters if any
      if parameters:
        result = storage.setParameters( parameters )
        if not result['OK']:
          return result

      # If use proxy, keep the original protocol name
      if self.proxy:
        storage.protocolName = protocolName
      return S_OK( storage )
コード例 #14
0
ファイル: StorageFactory.py プロジェクト: sposs/DIRAC
class StorageFactory:

  def __init__( self, useProxy=False ):

    self.rootConfigPath = '/Resources/StorageElements'
    self.valid = True
    self.proxy = False
    self.proxy = useProxy
    self.resourceStatus = ResourceStatus()


  ###########################################################################################
  #
  # Below are public methods for obtaining storage objects
  #

  def getStorageName( self, initialName ):
    return self._getConfigStorageName( initialName )

  def getStorage( self, parameterDict ):
    """ This instantiates a single storage for the details provided and doesn't check the CS.
    """
    # The storage name must be supplied.
    if parameterDict.has_key( 'StorageName' ):
      storageName = parameterDict['StorageName']
    else:
      errStr = "StorageFactory.getStorage: StorageName must be supplied"
      gLogger.error( errStr )
      return S_ERROR( errStr )

    # ProtocolName must be supplied otherwise nothing with work.
    if parameterDict.has_key( 'ProtocolName' ):
      protocolName = parameterDict['ProtocolName']
    else:
      errStr = "StorageFactory.getStorage: ProtocolName must be supplied"
      gLogger.error( errStr )
      return S_ERROR( errStr )

    # The other options need not always be specified
    if parameterDict.has_key( 'Protocol' ):
      protocol = parameterDict['Protocol']
    else:
      protocol = ''

    if parameterDict.has_key( 'Port' ):
      port = parameterDict['Port']
    else:
      port = ''

    if parameterDict.has_key( 'Host' ):
      host = parameterDict['Host']
    else:
      host = ''

    if parameterDict.has_key( 'Path' ):
      path = parameterDict['Path']
    else:
      path = ''

    if parameterDict.has_key( 'SpaceToken' ):
      spaceToken = parameterDict['SpaceToken']
    else:
      spaceToken = ''

    if parameterDict.has_key( 'WSUrl' ):
      wsPath = parameterDict['WSUrl']
    else:
      wsPath = ''

    return self.__generateStorageObject( storageName, protocolName, protocol, path, host, port, spaceToken, wsPath, parameterDict )


  def getStorages( self, storageName, protocolList = [] ):
    """ Get an instance of a Storage based on the DIRAC SE name based on the CS entries CS

        'storageName' is the DIRAC SE name i.e. 'CERN-RAW'
        'protocolList' is an optional list of protocols if a sub-set is desired i.e ['SRM2','SRM1']
    """
    self.remoteProtocols = []
    self.localProtocols = []
    self.name = ''
    self.options = {}
    self.protocolDetails = []
    self.storages = []

    # Get the name of the storage provided
    res = self._getConfigStorageName( storageName )
    if not res['OK']:
      self.valid = False
      return res
    storageName = res['Value']
    self.name = storageName

    # Get the options defined in the CS for this storage
    res = self._getConfigStorageOptions( storageName )
    if not res['OK']:
      self.valid = False
      return res
    self.options = res['Value']

    # Get the protocol specific details
    res = self._getConfigStorageProtocols( storageName )
    if not res['OK']:
      self.valid = False
      return res
    self.protocolDetails = res['Value']

    requestedLocalProtocols = []
    requestedRemoteProtocols = []
    requestedProtocolDetails = []
    turlProtocols = []
    # Generate the protocol specific plug-ins
    self.storages = []
    for protocolDict in self.protocolDetails:
      protocolName = protocolDict['ProtocolName']
      protocolRequested = True
      if protocolList:
        if protocolName not in protocolList:
          protocolRequested = False
      if protocolRequested:
        protocol = protocolDict['Protocol']
        host = protocolDict['Host']
        path = protocolDict['Path']
        port = protocolDict['Port']
        spaceToken = protocolDict['SpaceToken']
        wsUrl = protocolDict['WSUrl']
        res = self.__generateStorageObject( storageName, protocolName, protocol,
                                            path = path, host = host, port = port,
                                            spaceToken = spaceToken, wsUrl = wsUrl,
                                            parameters = protocolDict )
        if res['OK']:
          self.storages.append( res['Value'] )
          if protocolName in self.localProtocols:
            turlProtocols.append( protocol )
            requestedLocalProtocols.append( protocolName )
          if protocolName in self.remoteProtocols:
            requestedRemoteProtocols.append( protocolName )
          requestedProtocolDetails.append( protocolDict )
        else:
          gLogger.info( res['Message'] )

    if len( self.storages ) > 0:
      resDict = {}
      resDict['StorageName'] = self.name
      resDict['StorageOptions'] = self.options
      resDict['StorageObjects'] = self.storages
      resDict['LocalProtocols'] = requestedLocalProtocols
      resDict['RemoteProtocols'] = requestedRemoteProtocols
      resDict['ProtocolOptions'] = requestedProtocolDetails
      resDict['TurlProtocols'] = turlProtocols
      return S_OK( resDict )
    else:
      errStr = "StorageFactory.getStorages: Failed to instantiate any storage protocols."
      gLogger.error( errStr, self.name )
      return S_ERROR( errStr )
  ###########################################################################################
  #
  # Below are internal methods for obtaining section/option/value configuration
  #

  def _getConfigStorageName( self, storageName ):
    """
      This gets the name of the storage the configuration service.
      If the storage is an alias for another the resolution is performed.

      'storageName' is the storage section to check in the CS
    """
    configPath = '%s/%s' % ( self.rootConfigPath, storageName )
    res = gConfig.getOptions( configPath )
    if not res['OK']:
      errStr = "StorageFactory._getConfigStorageName: Failed to get storage options"
      gLogger.error( errStr, res['Message'] )
      return S_ERROR( errStr )
    if not res['Value']:
      errStr = "StorageFactory._getConfigStorageName: Supplied storage doesn't exist."
      gLogger.error( errStr, configPath )
      return S_ERROR( errStr )
    if 'Alias' in res['Value']:
      configPath = '%s/%s/Alias' % ( self.rootConfigPath, storageName )
      resolvedName = gConfig.getValue( configPath )
    else:
      resolvedName = storageName
    return S_OK( resolvedName )

  def _getConfigStorageOptions( self, storageName ):
    """ Get the options associated to the StorageElement as defined in the CS
    """
    storageConfigPath = '%s/%s' % ( self.rootConfigPath, storageName )
    res = gConfig.getOptions( storageConfigPath )
    if not res['OK']:
      errStr = "StorageFactory._getStorageOptions: Failed to get storage options."
      gLogger.error( errStr, "%s: %s" % ( storageName, res['Message'] ) )
      return S_ERROR( errStr )
    options = res['Value']
    optionsDict = {}
    for option in options:

      if option in [ 'ReadAccess', 'WriteAccess', 'CheckAccess', 'RemoveAccess']:
        continue
      optionConfigPath = '%s/%s' % ( storageConfigPath, option )
      optionsDict[option] = gConfig.getValue( optionConfigPath, '' )

    res = self.resourceStatus.getStorageElementStatus( storageName )
    if not res[ 'OK' ]:
      errStr = "StorageFactory._getStorageOptions: Failed to get storage status"
      gLogger.error( errStr, "%s: %s" % ( storageName, res['Message'] ) )
      return S_ERROR( errStr )

    # For safety, we did not add the ${statusType}Access keys
    # this requires modifications in the StorageElement class

    # We add the dictionary with the statusTypes and values
    # { 'statusType1' : 'status1', 'statusType2' : 'status2' ... }
    optionsDict.update( res[ 'Value' ][ storageName ] )

    return S_OK( optionsDict )

  def _getConfigStorageProtocols( self, storageName ):
    """ Protocol specific information is present as sections in the Storage configuration
    """
    storageConfigPath = '%s/%s' % ( self.rootConfigPath, storageName )
    res = gConfig.getSections( storageConfigPath )
    if not res['OK']:
      errStr = "StorageFactory._getConfigStorageProtocols: Failed to get storage sections"
      gLogger.error( errStr, "%s: %s" % ( storageName, res['Message'] ) )
      return S_ERROR( errStr )
    protocolSections = res['Value']
    sortedProtocols = sortList( protocolSections )
    protocolDetails = []
    for protocol in sortedProtocols:
      res = self._getConfigStorageProtocolDetails( storageName, protocol )
      if not res['OK']:
        return res
      protocolDetails.append( res['Value'] )
    self.protocols = self.localProtocols + self.remoteProtocols
    return S_OK( protocolDetails )

  def _getConfigStorageProtocolDetails( self, storageName, protocol ):
    """
      Parse the contents of the protocol block
    """
    # First obtain the options that are available
    protocolConfigPath = '%s/%s/%s' % ( self.rootConfigPath, storageName, protocol )
    res = gConfig.getOptions( protocolConfigPath )
    if not res['OK']:
      errStr = "StorageFactory.__getProtocolDetails: Failed to get protocol options."
      gLogger.error( errStr, "%s: %s" % ( storageName, protocol ) )
      return S_ERROR( errStr )
    options = res['Value']

    # We must have certain values internally even if not supplied in CS
    protocolDict = {'Access':'', 'Host':'', 'Path':'', 'Port':'', 'Protocol':'', 'ProtocolName':'', 'SpaceToken':'', 'WSUrl':''}
    for option in options:
      configPath = '%s/%s' % ( protocolConfigPath, option )
      optionValue = gConfig.getValue( configPath, '' )
      protocolDict[option] = optionValue

    # Now update the local and remote protocol lists.
    # A warning will be given if the Access option is not set.
    if protocolDict['Access'] == 'remote':
      self.remoteProtocols.append( protocolDict['ProtocolName'] )
    elif protocolDict['Access'] == 'local':
      self.localProtocols.append( protocolDict['ProtocolName'] )
    else:
      errStr = "StorageFactory.__getProtocolDetails: The 'Access' option for %s:%s is neither 'local' or 'remote'." % ( storageName, protocol )
      gLogger.warn( errStr )

    # The ProtocolName option must be defined
    if not protocolDict['ProtocolName']:
      errStr = "StorageFactory.__getProtocolDetails: 'ProtocolName' option is not defined."
      gLogger.error( errStr, "%s: %s" % ( storageName, protocol ) )
      return S_ERROR( errStr )
    return S_OK( protocolDict )

  ###########################################################################################
  #
  # Below is the method for obtaining the object instantiated for a provided storage configuration
  #

  def __generateStorageObject( self, storageName, protocolName, protocol, path = None,
                              host = None, port = None, spaceToken = None, wsUrl = None, parameters={} ):
    
    storageType = protocolName
    if self.proxy:
      storageType = 'Proxy'
    
    moduleRootPaths = getInstalledExtensions()
    moduleLoaded = False
    path = path.rstrip( '/' )
    if not path:
      path = '/'
    for moduleRootPath in moduleRootPaths:
      if moduleLoaded:
        break
      gLogger.verbose( "Trying to load from root path %s" % moduleRootPath )
      moduleFile = os.path.join( rootPath, moduleRootPath, "Resources", "Storage", "%sStorage.py" % storageType )
      gLogger.verbose( "Looking for file %s" % moduleFile )
      if not os.path.isfile( moduleFile ):
        continue
      try:
        # This inforces the convention that the plug in must be named after the protocol
        moduleName = "%sStorage" % ( storageType )
        storageModule = __import__( '%s.Resources.Storage.%s' % ( moduleRootPath, moduleName ),
                                    globals(), locals(), [moduleName] )
      except Exception, x:
        errStr = "StorageFactory._generateStorageObject: Failed to import %s: %s" % ( storageName, x )
        gLogger.exception( errStr )
        return S_ERROR( errStr )

      try:
        evalString = "storageModule.%s(storageName,protocol,path,host,port,spaceToken,wsUrl)" % moduleName
        storage = eval( evalString )
        if not storage.isOK():
          errStr = "StorageFactory._generateStorageObject: Failed to instantiate storage plug in."
          gLogger.error( errStr, "%s" % ( moduleName ) )
          return S_ERROR( errStr )
      except Exception, x:
        errStr = "StorageFactory._generateStorageObject: Failed to instantiate %s(): %s" % ( moduleName, x )
        gLogger.exception( errStr )
        return S_ERROR( errStr )
      
      # Set extra parameters if any
      if parameters:
        result = storage.setParameters( parameters )
        if not result['OK']:
          return result
      
      # If use proxy, keep the original protocol name
      if self.proxy:
        storage.protocolName = protocolName
      return S_OK( storage )
コード例 #15
0
ファイル: InputDataAgent.py プロジェクト: ptakha/DIRAC-1
class InputDataAgent(OptimizerModule):
    """
      The specific Optimizer must provide the following methods:
      - initializeOptimizer() before each execution cycle
      - checkJob() - the main method called for each job
  """

    #############################################################################
    def initializeOptimizer(self):
        """Initialize specific parameters for JobSanityAgent.
    """
        self.failedMinorStatus = self.am_getOption('/FailedJobStatus',
                                                   'Input Data Not Available')
        #this will ignore failover SE files
        self.checkFileMetadata = self.am_getOption('CheckFileMetadata', True)

        self.dataManager = DataManager()
        self.resourceStatus = ResourceStatus()
        self.fc = FileCatalog()

        self.seToSiteMapping = {}
        self.lastCScheck = 0
        self.cacheLength = 600

        return S_OK()

    #############################################################################
    def checkJob(self, job, classAdJob):
        """
    This method does the optimization corresponding to this Agent, 
    it is call for each job by the Optimizer framework
    """

        result = self.jobDB.getInputData(job)
        if not result['OK']:
            self.log.warn('Failed to get input data from JobdB for %s' % (job))
            self.log.warn(result['Message'])
            return result
        if not result['Value']:
            self.log.verbose('Job %s has no input data requirement' % (job))
            return self.setNextOptimizer(job)

        #Check if we already executed this Optimizer and the input data is resolved
        res = self.getOptimizerJobInfo(job,
                                       self.am_getModuleParam('optimizerName'))
        if res['OK'] and len(res['Value']):
            pass
        else:
            self.log.verbose(
                'Job %s has an input data requirement and will be processed' %
                (job))
            inputData = result['Value']
            result = self.__resolveInputData(job, inputData)
            if not result['OK']:
                self.log.warn(result['Message'])
                return result

        return self.setNextOptimizer(job)

    #############################################################################
    def __resolveInputData(self, job, inputData):
        """This method checks the file catalog for replica information.
    """
        lfns = [fname.replace('LFN:', '') for fname in inputData]

        start = time.time()
        # In order to place jobs on Hold if a certain SE is banned we need first to check first if
        # if the replicas are really available
        replicas = self.dataManager.getActiveReplicas(lfns)
        timing = time.time() - start
        self.log.verbose('Catalog Replicas Lookup Time: %.2f seconds ' %
                         (timing))
        if not replicas['OK']:
            self.log.warn(replicas['Message'])
            return replicas

        replicaDict = replicas['Value']

        siteCandidates = self.__checkReplicas(job, replicaDict)

        if not siteCandidates['OK']:
            self.log.warn(siteCandidates['Message'])
            return siteCandidates

        if self.checkFileMetadata:
            guids = True
            start = time.time()
            guidDict = self.fc.getFileMetadata(lfns)
            timing = time.time() - start
            self.log.info('Catalog Metadata Lookup Time: %.2f seconds ' %
                          (timing))

            if not guidDict['OK']:
                self.log.warn(guidDict['Message'])
                guids = False

            failed = guidDict['Value']['Failed']
            if failed:
                self.log.warn('Failed to establish some GUIDs')
                self.log.warn(failed)
                guids = False

            if guids:
                for lfn, reps in replicaDict['Successful'].items():
                    guidDict['Value']['Successful'][lfn].update(reps)
                replicas = guidDict

        resolvedData = {}
        resolvedData['Value'] = replicas
        resolvedData['SiteCandidates'] = siteCandidates['Value']
        result = self.setOptimizerJobInfo(
            job, self.am_getModuleParam('optimizerName'), resolvedData)
        if not result['OK']:
            self.log.warn(result['Message'])
            return result
        return S_OK(resolvedData)

    #############################################################################
    def __checkReplicas(self, job, replicaDict):
        """Check that all input lfns have valid replicas and can all be found at least in one single site.
    """
        badLFNs = []

        if replicaDict.has_key('Successful'):
            for lfn, reps in replicaDict['Successful'].items():
                if not reps:
                    badLFNs.append('LFN:%s Problem: No replicas available' %
                                   (lfn))
        else:
            return S_ERROR('No replica Info available')

        if replicaDict.has_key('Failed'):
            for lfn, cause in replicaDict['Failed'].items():
                badLFNs.append('LFN:%s Problem: %s' % (lfn, cause))

        if badLFNs:
            self.log.info('Found %s problematic LFN(s) for job %s' %
                          (len(badLFNs), job))
            param = '\n'.join(badLFNs)
            self.log.info(param)
            result = self.setJobParam(job,
                                      self.am_getModuleParam('optimizerName'),
                                      param)
            if not result['OK']:
                self.log.error(result['Message'])
            return S_ERROR('Input Data Not Available')

        return self.__getSiteCandidates(replicaDict['Successful'])

    #############################################################################
    # FIXME: right now this is unused...
    def __checkActiveSEs(self, job, replicaDict):
        """
    Check active SE and replicas and identify possible Site candidates for 
    the execution of the job
    """
        # Now let's check if some replicas might not be available due to banned SE's
        activeReplicas = self.dataManager.checkActiveReplicas(replicaDict)
        if not activeReplicas['OK']:
            # due to banned SE's input data might no be available
            msg = "On Hold: Missing replicas due to banned SE"
            self.log.info(msg)
            self.log.warn(activeReplicas['Message'])
            return S_ERROR(msg)

        activeReplicaDict = activeReplicas['Value']

        siteCandidates = self.__checkReplicas(job, activeReplicaDict)

        if not siteCandidates['OK']:
            # due to a banned SE's input data is not available at a single site
            msg = "On Hold: Input data not Available due to banned SE"
            self.log.info(msg)
            self.log.warn(siteCandidates['Message'])
            return S_ERROR(msg)

        resolvedData = {}
        resolvedData['Value'] = activeReplicas
        resolvedData['SiteCandidates'] = siteCandidates['Value']
        result = self.setOptimizerJobInfo(
            job, self.am_getModuleParam('optimizerName'), resolvedData)
        if not result['OK']:
            self.log.warn(result['Message'])
            return result
        return S_OK(resolvedData)

    #############################################################################
    def __getSitesForSE(self, se):
        """ Returns a list of sites having the given SE as a local one.
        Uses the local cache of the site-se information
    """

        # Empty the cache if too old
        if (time.time() - self.lastCScheck) > self.cacheLength:
            self.log.verbose('Resetting the SE to site mapping cache')
            self.seToSiteMapping = {}
            self.lastCScheck = time.time()

        if se not in self.seToSiteMapping:
            sites = getSitesForSE(se)
            if sites['OK']:
                self.seToSiteMapping[se] = list(sites['Value'])
            return sites
        else:
            return S_OK(self.seToSiteMapping[se])

    #############################################################################
    def __getSiteCandidates(self, inputData):
        """This method returns a list of possible site candidates based on the
       job input data requirement.  For each site candidate, the number of files
       on disk and tape is resolved.
    """

        fileSEs = {}
        for lfn, replicas in inputData.items():
            siteList = []
            for se in replicas.keys():
                sites = self.__getSitesForSE(se)
                if sites['OK']:
                    siteList += sites['Value']
            fileSEs[lfn] = uniqueElements(siteList)

        siteCandidates = []
        i = 0
        for _fileName, sites in fileSEs.items():
            if not i:
                siteCandidates = sites
            else:
                tempSite = []
                for site in siteCandidates:
                    if site in sites:
                        tempSite.append(site)
                siteCandidates = tempSite
            i += 1

        if not len(siteCandidates):
            return S_ERROR('No candidate sites available')

        #In addition, check number of files on tape and disk for each site
        #for optimizations during scheduling
        siteResult = {}
        for site in siteCandidates:
            siteResult[site] = {'disk': [], 'tape': []}

        seDict = {}
        for lfn, replicas in inputData.items():
            for se in replicas.keys():
                if se not in seDict:
                    sites = self.__getSitesForSE(se)
                    if not sites['OK']:
                        continue
                    try:
                        #storageElement = StorageElement( se )
                        result = self.resourceStatus.getStorageElementStatus(
                            se, statusType='ReadAccess')
                        if not result['OK']:
                            continue
                        seDict[se] = {
                            'Sites': sites['Value'],
                            'SEParams': result['Value'][se]
                        }
                        result = getStorageElementOptions(se)
                        if not result['OK']:
                            continue
                        seDict[se]['SEParams'].update(result['Value'])
                    except Exception:
                        self.log.exception(
                            'Failed to instantiate StorageElement( %s )' % se)
                        continue
                for site in seDict[se]['Sites']:
                    if site in siteCandidates:
                        if seDict[se]['SEParams']['ReadAccess'] and seDict[se][
                                'SEParams']['DiskSE']:
                            if lfn not in siteResult[site]['disk']:
                                siteResult[site]['disk'].append(lfn)
                                if lfn in siteResult[site]['tape']:
                                    siteResult[site]['tape'].remove(lfn)
                        if seDict[se]['SEParams']['ReadAccess'] and seDict[se][
                                'SEParams']['TapeSE']:
                            if lfn not in siteResult[site][
                                    'tape'] and lfn not in siteResult[site][
                                        'disk']:
                                siteResult[site]['tape'].append(lfn)

        for site in siteResult:
            siteResult[site]['disk'] = len(siteResult[site]['disk'])
            siteResult[site]['tape'] = len(siteResult[site]['tape'])
        return S_OK(siteResult)