Python TransformationClient.getCounters Examples

Programming Language: Python

Namespace/Package Name: DIRAC.TransformationSystem.Client.TransformationClient

Method/Function: getCounters

Examples at hotexamples.com: 5

Python TransformationClient.getCounters - 5 examples found. These are the top rated real world Python examples of DIRAC.TransformationSystem.Client.TransformationClient.TransformationClient.getCounters extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

getTransformations(30)

getTransformation(21)

getTransformationFiles(19)

addFilesToTransformation(19)

setTransformationParameter(17)

TransformationClient(16)

createTransformationInputDataQuery(11)

getTransformationTasks(10)

getTransformationParameters(10)

setFileStatusForTransformation(10)

setServer(9)

addTransformation(8)

getTransformationSummaryWeb(7)

cleanTransformation(6)

getTransformationInputDataQuery(6)

getTransformationStats(5)

getServer(5)

getTransformationLogging(5)

createTransformationMetaQuery(4)

setTaskStatusAndWmsID(4)

getTransformationRuns(4)

getTransformationMetaQuery(4)

getTransformationTaskStats(3)

getCounters(3)

extendTransformation(3)

getTransformationFilesCount(3)

addFile(2)

moveFilesToDerivedTransformation(2)

removeFile(2)

removeReplica(2)

setReplicaStatus(2)

addTaskForTransformation(2)

addReplica(2)

addDirectory(2)

getReplicas(2)

getBookkeepingQuery(2)

getAdditionalParameters(2)

deleteTransformation(2)

setTransformationRunStatus(1)

setTaskStatus(1)

addRunsMetadata(1)

setParameterToTransformationFiles(1)

getTransformationRunStats(1)

__init__(1)

getTransformationCountersStatuses(1)

getTasksToSubmit(1)

getRunsMetadata(1)

getDestinationForRun(1)

updateTransformationCounters(1)

Example #1

Show file

File: Utilities.py Project: DIRACGrid/DIRAC

class PluginUtilities(object):
  """
  Utility class used by plugins
  """

  def __init__(self, plugin='Standard', transClient=None, dataManager=None, fc=None,
               debug=False, transInThread=None, transID=None):
    """
    c'tor

    Setting defaults
    """
    # clients
    if transClient is None:
      self.transClient = TransformationClient()
    else:
      self.transClient = transClient
    if dataManager is None:
      self.dm = DataManager()
    else:
      self.dm = dataManager
    if fc is None:
      self.fc = FileCatalog()
    else:
      self.fc = fc

    self.dmsHelper = DMSHelpers()

    self.plugin = plugin
    self.transID = transID
    self.params = {}
    self.groupSize = 0
    self.maxFiles = 0
    self.cachedLFNSize = {}
    self.transString = ''
    self.debug = debug
    if transInThread is None:
      self.transInThread = {}
    else:
      self.transInThread = transInThread

    self.log = gLogger.getSubLogger(plugin)

  def logVerbose(self, message, param=''):
    """ logger helper """
    if self.debug:
      self.log.info('(V)' + self.transString + message, param)
    else:
      self.log.verbose(self.transString + message, param)

  def logDebug(self, message, param=''):
    """ logger helper """
    self.log.debug(self.transString + message, param)

  def logInfo(self, message, param=''):
    """ logger helper """
    self.log.info(self.transString + message, param)

  def logWarn(self, message, param=''):
    """ logger helper """
    self.log.warn(self.transString + message, param)

  def logError(self, message, param=''):
    """ logger helper """
    self.log.error(self.transString + message, param)

  def logException(self, message, param='', lException=False):
    """ logger helper """
    self.log.exception(self.transString + message, param, lException)

  def setParameters(self, params):
    """ Set the transformation parameters and extract transID """
    self.params = params
    self.transID = params['TransformationID']
    self.transString = self.transInThread.get(self.transID, ' [NoThread] [%d] ' % self.transID)

  # @timeThis
  def groupByReplicas(self, files, status):
    """
    Generates tasks based on the location of the input data

   :param dict fileReplicas:
              {'/this/is/at.1': ['SE1'],
               '/this/is/at.12': ['SE1', 'SE2'],
               '/this/is/at.2': ['SE2'],
               '/this/is/at_123': ['SE1', 'SE2', 'SE3'],
               '/this/is/at_23': ['SE2', 'SE3'],
               '/this/is/at_4': ['SE4']}

    """
    tasks = []
    nTasks = 0

    if not files:
      return S_OK(tasks)

    files = dict(files)

    # Parameters
    if not self.groupSize:
      self.groupSize = self.getPluginParam('GroupSize', 10)
    flush = (status == 'Flush')
    self.logVerbose(
        "groupByReplicas: %d files, groupSize %d, flush %s" %
        (len(files), self.groupSize, flush))

    # Consider files by groups of SEs, a file is only in one group
    # Then consider files site by site, but a file can now be at more than one site
    for groupSE in (True, False):
      if not files:
        break
      seFiles = getFileGroups(files, groupSE=groupSE)
      self.logDebug("fileGroups set: ", seFiles)

      for replicaSE in sortSEs(seFiles):
        lfns = seFiles[replicaSE]
        if lfns:
          tasksLfns = breakListIntoChunks(lfns, self.groupSize)
          lfnsInTasks = []
          for taskLfns in tasksLfns:
            if flush or (len(taskLfns) >= self.groupSize):
              tasks.append((replicaSE, taskLfns))
              lfnsInTasks += taskLfns
          # In case the file was at more than one site, remove it from the other sites' list
          # Remove files from global list
          for lfn in lfnsInTasks:
            files.pop(lfn)
          if not groupSE:
            # Remove files from other SEs
            for se in [se for se in seFiles if se != replicaSE]:
              seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks]
      self.logVerbose(
          "groupByReplicas: %d tasks created (groupSE %s)" %
          (len(tasks) - nTasks, str(groupSE)), "%d files not included in tasks" %
          len(files))
      nTasks = len(tasks)

    return S_OK(tasks)

  def createTasksBySize(self, lfns, replicaSE, fileSizes=None, flush=False):
    """
    Split files in groups according to the size and create tasks for a given SE
    """
    tasks = []
    if fileSizes is None:
      fileSizes = self._getFileSize(lfns).get('Value')
    if fileSizes is None:
      self.logWarn('Error getting file sizes, no tasks created')
      return tasks
    taskLfns = []
    taskSize = 0
    if not self.groupSize:
      # input size in GB converted to bytes
      self.groupSize = float(self.getPluginParam('GroupSize', 1.)) * 1000 * 1000 * 1000
    if not self.maxFiles:
      # FIXME: prepare for chaging the name of the ambiguoug  CS option
      self.maxFiles = self.getPluginParam('MaxFilesPerTask', self.getPluginParam('MaxFiles', 100))
    lfns = sorted(lfns, key=fileSizes.get)
    for lfn in lfns:
      size = fileSizes.get(lfn, 0)
      if size:
        if size > self.groupSize:
          tasks.append((replicaSE, [lfn]))
        else:
          taskSize += size
          taskLfns.append(lfn)
          if (taskSize > self.groupSize) or (len(taskLfns) >= self.maxFiles):
            tasks.append((replicaSE, taskLfns))
            taskLfns = []
            taskSize = 0
    if flush and taskLfns:
      tasks.append((replicaSE, taskLfns))
    if not tasks and not flush and taskLfns:
      self.logVerbose(
          'Not enough data to create a task, and flush not set (%d bytes for groupSize %d)' %
          (taskSize, self.groupSize))
    return tasks

  # @timeThis
  def groupBySize(self, files, status):
    """
    Generate a task for a given amount of data
    """
    tasks = []
    nTasks = 0

    if not len(files):
      return S_OK(tasks)

    files = dict(files)
    # Parameters
    if not self.groupSize:
      # input size in GB converted to bytes
      self.groupSize = float(self.getPluginParam('GroupSize', 1)) * 1000 * 1000 * 1000
    flush = (status == 'Flush')
    self.logVerbose(
        "groupBySize: %d files, groupSize: %d, flush: %s" %
        (len(files), self.groupSize, flush))

    # Get the file sizes
    res = self._getFileSize(files.keys())
    if not res['OK']:
      return res
    fileSizes = res['Value']

    for groupSE in (True, False):
      if not files:
        break
      seFiles = getFileGroups(files, groupSE=groupSE)

      for replicaSE in sorted(seFiles) if groupSE else sortSEs(seFiles):
        lfns = seFiles[replicaSE]
        newTasks = self.createTasksBySize(lfns, replicaSE, fileSizes=fileSizes, flush=flush)
        lfnsInTasks = []
        for task in newTasks:
          lfnsInTasks += task[1]
        tasks += newTasks

        # Remove the selected files from the size cache
        self.clearCachedFileSize(lfnsInTasks)
        if not groupSE:
          # Remove files from other SEs
          for se in [se for se in seFiles if se != replicaSE]:
            seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks]
        # Remove files from global list
        for lfn in lfnsInTasks:
          files.pop(lfn)

      self.logVerbose(
          "groupBySize: %d tasks created with groupSE %s" %
          (len(tasks) - nTasks, str(groupSE)))
      self.logVerbose("groupBySize: %d files have not been included in tasks" % len(files))
      nTasks = len(tasks)

    self.logVerbose("Grouped %d files by size" % len(files))
    return S_OK(tasks)

  def getExistingCounters(self, normalise=False, requestedSites=[]):
    res = self.transClient.getCounters('TransformationFiles', ['UsedSE'],
                                       {'TransformationID': self.params['TransformationID']})
    if not res['OK']:
      return res
    usageDict = {}
    for usedDict, count in res['Value']:
      usedSE = usedDict['UsedSE']
      if usedSE != 'Unknown':
        usageDict[usedSE] = count
    if requestedSites:
      siteDict = {}
      for se, count in usageDict.items():
        res = getSitesForSE(se)
        if not res['OK']:
          return res
        for site in res['Value']:
          if site in requestedSites:
            siteDict[site] = count
      usageDict = siteDict.copy()
    if normalise:
      usageDict = self._normaliseShares(usageDict)
    return S_OK(usageDict)

  # @timeThis
  def _getFileSize(self, lfns):
    """ Get file size from a cache, if not from the catalog
    #FIXME: have to fill the cachedLFNSize!
    """
    lfns = list(lfns)
    cachedLFNSize = dict(self.cachedLFNSize)

    fileSizes = {}
    for lfn in [lfn for lfn in lfns if lfn in cachedLFNSize]:
      fileSizes[lfn] = cachedLFNSize[lfn]
    self.logDebug(
        "Found cache hit for File size for %d files out of %d" %
        (len(fileSizes), len(lfns)))
    lfns = [lfn for lfn in lfns if lfn not in cachedLFNSize]
    if lfns:
      fileSizes = self._getFileSizeFromCatalog(lfns, fileSizes)
      if not fileSizes['OK']:
        self.logError(fileSizes['Message'])
        return fileSizes
      fileSizes = fileSizes['Value']
    return S_OK(fileSizes)

  # @timeThis
  def _getFileSizeFromCatalog(self, lfns, fileSizes):
    """
    Get file size from the catalog
    """
    lfns = list(lfns)
    fileSizes = dict(fileSizes)

    res = self.fc.getFileSize(lfns)
    if not res['OK']:
      return S_ERROR("Failed to get sizes for all files: %s" % res['Message'])
    if res['Value']['Failed']:
      errorReason = sorted(set(res['Value']['Failed'].values()))
      self.logWarn("Failed to get sizes for %d files:" % len(res['Value']['Failed']), errorReason)
    fileSizes.update(res['Value']['Successful'])
    self.cachedLFNSize.update((res['Value']['Successful']))
    self.logVerbose("Got size of %d files from catalog" % len(lfns))
    return S_OK(fileSizes)

  def clearCachedFileSize(self, lfns):
    """ Utility function
    """
    for lfn in [lfn for lfn in lfns if lfn in self.cachedLFNSize]:
      self.cachedLFNSize.pop(lfn)

  def getPluginParam(self, name, default=None):
    """ Get plugin parameters using specific settings or settings defined in the CS
        Caution: the type returned is that of the default value
    """
    # get the value of a parameter looking 1st in the CS
    if default is not None:
      valueType = type(default)
    else:
      valueType = None
    # First look at a generic value...
    optionPath = "TransformationPlugins/%s" % (name)
    value = Operations().getValue(optionPath, None)
    self.logVerbose("Default plugin param %s: '%s'" % (optionPath, value))
    # Then look at a plugin-specific value
    optionPath = "TransformationPlugins/%s/%s" % (self.plugin, name)
    value = Operations().getValue(optionPath, value)
    self.logVerbose("Specific plugin param %s: '%s'" % (optionPath, value))
    if value is not None:
      default = value
    # Finally look at a transformation-specific parameter
    value = self.params.get(name, default)
    self.logVerbose(
        "Transformation plugin param %s: '%s'. Convert to %s" %
        (name, value, str(valueType)))
    if valueType and not isinstance(value, valueType):
      if valueType is list:
        try:
          value = ast.literal_eval(value) if value and value != 'None' else []
        # literal_eval('SE-DST') -> ValueError
        # literal_eval('SE_MC-DST') -> SyntaxError
        # Don't ask...
        except (ValueError, SyntaxError):
          value = [val for val in value.replace(' ', '').split(',') if val]

      elif valueType is int:
        value = int(value)
      elif valueType is float:
        value = float(value)
      elif valueType is bool:
        if value in ('False', 'No', 'None', None, 0):
          value = False
        else:
          value = bool(value)
      elif valueType is not str:
        self.logWarn(
            "Unknown parameter type (%s) for %s, passed as string" %
            (str(valueType), name))
    self.logVerbose("Final plugin param %s: '%s'" % (name, value))
    return value

  @staticmethod
  def _normaliseShares(originalShares):
    """ Normalize shares to 1 """
    total = sum(float(share) for share in originalShares.values())
    return dict([(site, 100. * float(share) / total if total else 0.)
                 for site, share in originalShares.items()])

  def uniqueSEs(self, ses):
    """ return a list of SEs that are not physically the same """
    newSEs = []
    for se in ses:
      if not self.isSameSEInList(se, newSEs):
        newSEs.append(se)
    return newSEs

  def isSameSE(self, se1, se2):
    """ Check if 2 SEs are indeed the same.

        :param se1: name of the first StorageElement
        :param se2: name of the second StorageElement

        :returns: True/False if they are considered the same.
                  See :py:mod:`~DIRAC.Resources.Storage.StorageElement.StorageElementItem.isSameSE`
    """
    if se1 == se2:
      return True

    return StorageElement(se1).isSameSE(StorageElement(se2))

  def isSameSEInList(self, se1, seList):
    """ Check if an SE is the same as any in a list """
    if se1 in seList:
      return True
    for se in seList:
      if self.isSameSE(se1, se):
        return True
    return False

  def closerSEs(self, existingSEs, targetSEs, local=False):
    """ Order the targetSEs such that the first ones are closer to existingSEs. Keep all elements in targetSEs
    """
    setTarget = set(targetSEs)
    sameSEs = set([se1 for se1 in setTarget for se2 in existingSEs if self.isSameSE(se1, se2)])
    targetSEs = setTarget - set(sameSEs)
    if targetSEs:
      # Some SEs are left, look for sites
      existingSites = [self.dmsHelper.getLocalSiteForSE(se).get('Value')
                       for se in existingSEs]
      existingSites = set([site for site in existingSites if site])
      closeSEs = set([se for se in targetSEs
                      if self.dmsHelper.getLocalSiteForSE(se).get('Value') in existingSites])
      # print existingSEs, existingSites, targetSEs, closeSEs
      otherSEs = targetSEs - closeSEs
      targetSEs = list(closeSEs)
      random.shuffle(targetSEs)
      if not local and otherSEs:
        otherSEs = list(otherSEs)
        random.shuffle(otherSEs)
        targetSEs += otherSEs
    else:
      targetSEs = []
    return (targetSEs + list(sameSEs)) if not local else targetSEs

Example #2

Show file

File: Utilities.py Project: pmusset/DIRAC

class PluginUtilities(object):
  """
  Utility class used by plugins
  """

  def __init__(self, plugin='Standard', transClient=None, dataManager=None, fc=None,
               debug=False, transInThread=None, transID=None):
    """
    c'tor

    Setting defaults
    """
    # clients
    if transClient is None:
      self.transClient = TransformationClient()
    else:
      self.transClient = transClient
    if dataManager is None:
      self.dm = DataManager()
    else:
      self.dm = dataManager
    if fc is None:
      self.fc = FileCatalog()
    else:
      self.fc = fc

    self.dmsHelper = DMSHelpers()

    self.plugin = plugin
    self.transID = transID
    self.params = {}
    self.groupSize = 0
    self.maxFiles = 0
    self.cachedLFNSize = {}
    self.transString = ''
    self.debug = debug
    if transInThread is None:
      self.transInThread = {}
    else:
      self.transInThread = transInThread

    self.log = gLogger.getSubLogger(self.plugin +
                                    self.transInThread.get(self.transID, ' [NoThread] [%s] ' % self.transID))
    # FIXME: This doesn't work (yet) but should soon, will allow scripts to get the context
    self.log.showHeaders(True)

  def logVerbose(self, message, param=''):
    """ logger helper """
    if self.debug:
      log = gLogger.getSubLogger(self.plugin + ' (V)' +
                                 self.transInThread.get(self.transID, ' [NoThread] [%d] ' % self.transID))
      log.info(message, param)
    else:
      self.log.verbose(message, param)

  def logDebug(self, message, param=''):
    """ logger helper """
    self.log.debug(message, param)

  def logInfo(self, message, param=''):
    """ logger helper """
    self.log.info(message, param)

  def logWarn(self, message, param=''):
    """ logger helper """
    self.log.warn(message, param)

  def logError(self, message, param=''):
    """ logger helper """
    self.log.error(message, param)

  def logException(self, message, param='', lException=False):
    """ logger helper """
    self.log.exception(message, param, lException)

  def setParameters(self, params):
    """ Set the transformation parameters and extract transID """
    self.params = params
    self.transID = params['TransformationID']
    self.log = gLogger.getSubLogger(self.plugin +
                                    self.transInThread.get(self.transID, ' [NoThread] [%d] ' % self.transID))

  # @timeThis
  def groupByReplicas(self, files, status):
    """
    Generates tasks based on the location of the input data

   :param dict fileReplicas:
              {'/this/is/at.1': ['SE1'],
               '/this/is/at.12': ['SE1', 'SE2'],
               '/this/is/at.2': ['SE2'],
               '/this/is/at_123': ['SE1', 'SE2', 'SE3'],
               '/this/is/at_23': ['SE2', 'SE3'],
               '/this/is/at_4': ['SE4']}

    """
    tasks = []
    nTasks = 0

    if not files:
      return S_OK(tasks)

    files = dict(files)

    # Parameters
    if not self.groupSize:
      self.groupSize = self.getPluginParam('GroupSize', 10)
    flush = (status == 'Flush')
    self.logVerbose(
        "groupByReplicas: %d files, groupSize %d, flush %s" %
        (len(files), self.groupSize, flush))

    # Consider files by groups of SEs, a file is only in one group
    # Then consider files site by site, but a file can now be at more than one site
    for groupSE in (True, False):
      if not files:
        break
      seFiles = getFileGroups(files, groupSE=groupSE)
      self.logDebug("fileGroups set: ", seFiles)

      for replicaSE in sortSEs(seFiles):
        lfns = seFiles[replicaSE]
        if lfns:
          tasksLfns = breakListIntoChunks(lfns, self.groupSize)
          lfnsInTasks = []
          for taskLfns in tasksLfns:
            if flush or (len(taskLfns) >= self.groupSize):
              tasks.append((replicaSE, taskLfns))
              lfnsInTasks += taskLfns
          # In case the file was at more than one site, remove it from the other sites' list
          # Remove files from global list
          for lfn in lfnsInTasks:
            files.pop(lfn)
          if not groupSE:
            # Remove files from other SEs
            for se in [se for se in seFiles if se != replicaSE]:
              seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks]
      self.logVerbose(
          "groupByReplicas: %d tasks created (groupSE %s)" %
          (len(tasks) - nTasks, str(groupSE)), "%d files not included in tasks" %
          len(files))
      nTasks = len(tasks)

    return S_OK(tasks)

  def createTasksBySize(self, lfns, replicaSE, fileSizes=None, flush=False):
    """
    Split files in groups according to the size and create tasks for a given SE
    """
    tasks = []
    if fileSizes is None:
      fileSizes = self._getFileSize(lfns).get('Value')
    if fileSizes is None:
      self.logWarn('Error getting file sizes, no tasks created')
      return tasks
    taskLfns = []
    taskSize = 0
    if not self.groupSize:
      # input size in GB converted to bytes
      self.groupSize = float(self.getPluginParam('GroupSize', 1.)) * 1000 * 1000 * 1000
    if not self.maxFiles:
      # FIXME: prepare for chaging the name of the ambiguoug  CS option
      self.maxFiles = self.getPluginParam('MaxFilesPerTask', self.getPluginParam('MaxFiles', 100))
    lfns = sorted(lfns, key=fileSizes.get)
    for lfn in lfns:
      size = fileSizes.get(lfn, 0)
      if size:
        if size > self.groupSize:
          tasks.append((replicaSE, [lfn]))
        else:
          taskSize += size
          taskLfns.append(lfn)
          if (taskSize > self.groupSize) or (len(taskLfns) >= self.maxFiles):
            tasks.append((replicaSE, taskLfns))
            taskLfns = []
            taskSize = 0
    if flush and taskLfns:
      tasks.append((replicaSE, taskLfns))
    if not tasks and not flush and taskLfns:
      self.logVerbose(
          'Not enough data to create a task, and flush not set (%d bytes for groupSize %d)' %
          (taskSize, self.groupSize))
    return tasks

  # @timeThis
  def groupBySize(self, files, status):
    """
    Generate a task for a given amount of data
    """
    tasks = []
    nTasks = 0

    if not len(files):
      return S_OK(tasks)

    files = dict(files)
    # Parameters
    if not self.groupSize:
      # input size in GB converted to bytes
      self.groupSize = float(self.getPluginParam('GroupSize', 1)) * 1000 * 1000 * 1000
    flush = (status == 'Flush')
    self.logVerbose(
        "groupBySize: %d files, groupSize: %d, flush: %s" %
        (len(files), self.groupSize, flush))

    # Get the file sizes
    res = self._getFileSize(list(files))
    if not res['OK']:
      return res
    fileSizes = res['Value']

    for groupSE in (True, False):
      if not files:
        break
      seFiles = getFileGroups(files, groupSE=groupSE)

      for replicaSE in sorted(seFiles) if groupSE else sortSEs(seFiles):
        lfns = seFiles[replicaSE]
        newTasks = self.createTasksBySize(lfns, replicaSE, fileSizes=fileSizes, flush=flush)
        lfnsInTasks = []
        for task in newTasks:
          lfnsInTasks += task[1]
        tasks += newTasks

        # Remove the selected files from the size cache
        self.clearCachedFileSize(lfnsInTasks)
        if not groupSE:
          # Remove files from other SEs
          for se in [se for se in seFiles if se != replicaSE]:
            seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks]
        # Remove files from global list
        for lfn in lfnsInTasks:
          files.pop(lfn)

      self.logVerbose(
          "groupBySize: %d tasks created with groupSE %s" %
          (len(tasks) - nTasks, str(groupSE)))
      self.logVerbose("groupBySize: %d files have not been included in tasks" % len(files))
      nTasks = len(tasks)

    self.logVerbose("Grouped %d files by size" % len(files))
    return S_OK(tasks)

  def getExistingCounters(self, normalise=False, requestedSites=[]):
    res = self.transClient.getCounters('TransformationFiles', ['UsedSE'],
                                       {'TransformationID': self.params['TransformationID']})
    if not res['OK']:
      return res
    usageDict = {}
    for usedDict, count in res['Value']:
      usedSE = usedDict['UsedSE']
      if usedSE != 'Unknown':
        usageDict[usedSE] = count
    if requestedSites:
      siteDict = {}
      for se, count in usageDict.items():
        res = getSitesForSE(se)
        if not res['OK']:
          return res
        for site in res['Value']:
          if site in requestedSites:
            siteDict[site] = count
      usageDict = siteDict.copy()
    if normalise:
      usageDict = self._normaliseShares(usageDict)
    return S_OK(usageDict)

  # @timeThis
  def _getFileSize(self, lfns):
    """ Get file size from a cache, if not from the catalog
    #FIXME: have to fill the cachedLFNSize!
    """
    lfns = list(lfns)
    cachedLFNSize = dict(self.cachedLFNSize)

    fileSizes = {}
    for lfn in [lfn for lfn in lfns if lfn in cachedLFNSize]:
      fileSizes[lfn] = cachedLFNSize[lfn]
    self.logDebug(
        "Found cache hit for File size for %d files out of %d" %
        (len(fileSizes), len(lfns)))
    lfns = [lfn for lfn in lfns if lfn not in cachedLFNSize]
    if lfns:
      fileSizes = self._getFileSizeFromCatalog(lfns, fileSizes)
      if not fileSizes['OK']:
        self.logError(fileSizes['Message'])
        return fileSizes
      fileSizes = fileSizes['Value']
    return S_OK(fileSizes)

  # @timeThis
  def _getFileSizeFromCatalog(self, lfns, fileSizes):
    """
    Get file size from the catalog
    """
    lfns = list(lfns)
    fileSizes = dict(fileSizes)

    res = self.fc.getFileSize(lfns)
    if not res['OK']:
      return S_ERROR("Failed to get sizes for all files: %s" % res['Message'])
    if res['Value']['Failed']:
      errorReason = sorted(set(res['Value']['Failed'].values()))
      self.logWarn("Failed to get sizes for %d files:" % len(res['Value']['Failed']), errorReason)
    fileSizes.update(res['Value']['Successful'])
    self.cachedLFNSize.update((res['Value']['Successful']))
    self.logVerbose("Got size of %d files from catalog" % len(lfns))
    return S_OK(fileSizes)

  def clearCachedFileSize(self, lfns):
    """ Utility function
    """
    for lfn in [lfn for lfn in lfns if lfn in self.cachedLFNSize]:
      self.cachedLFNSize.pop(lfn)

  def getPluginParam(self, name, default=None):
    """ Get plugin parameters using specific settings or settings defined in the CS
        Caution: the type returned is that of the default value
    """
    # get the value of a parameter looking 1st in the CS
    if default is not None:
      valueType = type(default)
    else:
      valueType = None
    # First look at a generic value...
    optionPath = "TransformationPlugins/%s" % (name)
    value = Operations().getValue(optionPath, None)
    self.logVerbose("Default plugin param %s: '%s'" % (optionPath, value))
    # Then look at a plugin-specific value
    optionPath = "TransformationPlugins/%s/%s" % (self.plugin, name)
    value = Operations().getValue(optionPath, value)
    self.logVerbose("Specific plugin param %s: '%s'" % (optionPath, value))
    if value is not None:
      default = value
    # Finally look at a transformation-specific parameter
    value = self.params.get(name, default)
    self.logVerbose(
        "Transformation plugin param %s: '%s'. Convert to %s" %
        (name, value, str(valueType)))
    if valueType and not isinstance(value, valueType):
      if valueType is list:
        try:
          value = ast.literal_eval(value) if value and value != 'None' else []
        # literal_eval('SE-DST') -> ValueError
        # literal_eval('SE_MC-DST') -> SyntaxError
        # Don't ask...
        except (ValueError, SyntaxError):
          value = [val for val in value.replace(' ', '').split(',') if val]

      elif valueType is int:
        value = int(value)
      elif valueType is float:
        value = float(value)
      elif valueType is bool:
        if value in ('False', 'No', 'None', None, 0):
          value = False
        else:
          value = bool(value)
      elif valueType is not str:
        self.logWarn(
            "Unknown parameter type (%s) for %s, passed as string" %
            (str(valueType), name))
    self.logVerbose("Final plugin param %s: '%s'" % (name, value))
    return value

  @staticmethod
  def _normaliseShares(originalShares):
    """ Normalize shares to 1 """
    total = sum(float(share) for share in originalShares.values())
    return dict([(site, 100. * float(share) / total if total else 0.)
                 for site, share in originalShares.items()])

  def uniqueSEs(self, ses):
    """ return a list of SEs that are not physically the same """
    newSEs = []
    for se in ses:
      if not self.isSameSEInList(se, newSEs):
        newSEs.append(se)
    return newSEs

  def isSameSE(self, se1, se2):
    """ Check if 2 SEs are indeed the same.

        :param se1: name of the first StorageElement
        :param se2: name of the second StorageElement

        :returns: True/False if they are considered the same.
                  See :py:mod:`~DIRAC.Resources.Storage.StorageElement.StorageElementItem.isSameSE`
    """
    if se1 == se2:
      return True

    return StorageElement(se1).isSameSE(StorageElement(se2))

  def isSameSEInList(self, se1, seList):
    """ Check if an SE is the same as any in a list """
    if se1 in seList:
      return True
    for se in seList:
      if self.isSameSE(se1, se):
        return True
    return False

  def closerSEs(self, existingSEs, targetSEs, local=False):
    """ Order the targetSEs such that the first ones are closer to existingSEs. Keep all elements in targetSEs
    """
    setTarget = set(targetSEs)
    sameSEs = set([se1 for se1 in setTarget for se2 in existingSEs if self.isSameSE(se1, se2)])
    targetSEs = setTarget - set(sameSEs)
    if targetSEs:
      # Some SEs are left, look for sites
      existingSites = [self.dmsHelper.getLocalSiteForSE(se).get('Value')
                       for se in existingSEs]
      existingSites = set([site for site in existingSites if site])
      closeSEs = set([se for se in targetSEs
                      if self.dmsHelper.getLocalSiteForSE(se).get('Value') in existingSites])
      # print existingSEs, existingSites, targetSEs, closeSEs
      otherSEs = targetSEs - closeSEs
      targetSEs = list(closeSEs)
      random.shuffle(targetSEs)
      if not local and otherSEs:
        otherSEs = list(otherSEs)
        random.shuffle(otherSEs)
        targetSEs += otherSEs
    else:
      targetSEs = []
    return (targetSEs + list(sameSEs)) if not local else targetSEs

  @staticmethod
  def seParamtoList(inputParam):
    """Transform ``inputParam`` to list.

    :param inputParam: can be string, list, or string representation of list
    :returns: list
    """
    if not inputParam:
      return []
    if inputParam.count('['):
      return eval(inputParam)  # pylint: disable=eval-used
    elif isinstance(inputParam, list):
      return inputParam
    return [inputParam]

Example #3

Show file

class PluginUtilities(object):
    """
  Utility class used by plugins
  """
    def __init__(self,
                 plugin='Standard',
                 transClient=None,
                 dataManager=None,
                 fc=None,
                 debug=False,
                 transInThread=None,
                 transID=None):
        """
    c'tor

    Setting defaults
    """
        # clients
        if transClient is None:
            self.transClient = TransformationClient()
        else:
            self.transClient = transClient
        if dataManager is None:
            self.dm = DataManager()
        else:
            self.dm = dataManager
        if fc is None:
            self.fc = FileCatalog()
        else:
            self.fc = fc

        self.dmsHelper = DMSHelpers()

        self.plugin = plugin
        self.transID = transID
        self.params = {}
        self.groupSize = 0
        self.maxFiles = 0
        self.cachedLFNSize = {}
        self.transString = ''
        self.debug = debug
        self.seConfig = {}
        if transInThread is None:
            self.transInThread = {}
        else:
            self.transInThread = transInThread

        self.log = gLogger.getSubLogger("%s/PluginUtilities" % plugin)

    def logVerbose(self, message, param=''):
        if self.debug:
            self.log.info('(V)' + self.transString + message, param)
        else:
            self.log.verbose(self.transString + message, param)

    def logDebug(self, message, param=''):
        self.log.debug(self.transString + message, param)

    def logInfo(self, message, param=''):
        self.log.info(self.transString + message, param)

    def logWarn(self, message, param=''):
        self.log.warn(self.transString + message, param)

    def logError(self, message, param=''):
        self.log.error(self.transString + message, param)

    def logException(self, message, param='', lException=False):
        self.log.exception(self.transString + message, param, lException)

    def setParameters(self, params):
        self.params = params
        self.transID = params['TransformationID']
        self.transString = self.transInThread.get(
            self.transID,
            ' [NoThread] [%d] ' % self.transID) + '%s: ' % self.plugin

    @timeThis
    def groupByReplicas(self, files, status):
        """
    Generates tasks based on the location of the input data

   :param dict fileReplicas:
              {'/this/is/at.1': ['SE1'],
               '/this/is/at.12': ['SE1', 'SE2'],
               '/this/is/at.2': ['SE2'],
               '/this/is/at_123': ['SE1', 'SE2', 'SE3'],
               '/this/is/at_23': ['SE2', 'SE3'],
               '/this/is/at_4': ['SE4']}

    """
        tasks = []
        nTasks = 0

        if not len(files):
            return S_OK(tasks)

        files = dict(files)

        # Parameters
        if not self.groupSize:
            self.groupSize = self.getPluginParam('GroupSize', 10)
        flush = (status == 'Flush')
        self.logVerbose("groupByReplicas: %d files, groupSize %d, flush %s" %
                        (len(files), self.groupSize, flush))

        # Consider files by groups of SEs, a file is only in one group
        # Then consider files site by site, but a file can now be at more than one site
        for groupSE in (True, False):
            if not files:
                break
            seFiles = getFileGroups(files, groupSE=groupSE)
            self.logDebug("fileGroups set: ", seFiles)

            for replicaSE in sortSEs(seFiles):
                lfns = seFiles[replicaSE]
                if lfns:
                    tasksLfns = breakListIntoChunks(lfns, self.groupSize)
                    lfnsInTasks = []
                    for taskLfns in tasksLfns:
                        if (flush and not groupSE) or (len(taskLfns) >=
                                                       self.groupSize):
                            tasks.append((replicaSE, taskLfns))
                            lfnsInTasks += taskLfns
                    # In case the file was at more than one site, remove it from the other sites' list
                    # Remove files from global list
                    for lfn in lfnsInTasks:
                        files.pop(lfn)
                    if not groupSE:
                        # Remove files from other SEs
                        for se in [se for se in seFiles if se != replicaSE]:
                            seFiles[se] = [
                                lfn for lfn in seFiles[se]
                                if lfn not in lfnsInTasks
                            ]
            self.logVerbose(
                "groupByReplicas: %d tasks created (groupSE %s), %d files not included in tasks"
                % (len(tasks) - nTasks, str(groupSE), len(files)))
            nTasks = len(tasks)

        return S_OK(tasks)

    def createTasksBySize(self, lfns, replicaSE, fileSizes=None, flush=False):
        """
    Split files in groups according to the size and create tasks for a given SE
    """
        tasks = []
        if fileSizes is None:
            fileSizes = self._getFileSize(lfns).get('Value')
        if fileSizes is None:
            self.logWarn('Error getting file sizes, no tasks created')
            return tasks
        taskLfns = []
        taskSize = 0
        if not self.groupSize:
            self.groupSize = float(
                self.getPluginParam('GroupSize', 1.)
            ) * 1000 * 1000 * 1000  # input size in GB converted to bytes
        if not self.maxFiles:
            self.maxFiles = self.getPluginParam('MaxFiles', 100)
        lfns = sorted(lfns, key=fileSizes.get)
        for lfn in lfns:
            size = fileSizes.get(lfn, 0)
            if size:
                if size > self.groupSize:
                    tasks.append((replicaSE, [lfn]))
                else:
                    taskSize += size
                    taskLfns.append(lfn)
                    if (taskSize > self.groupSize) or (len(taskLfns) >=
                                                       self.maxFiles):
                        tasks.append((replicaSE, taskLfns))
                        taskLfns = []
                        taskSize = 0
        if flush and taskLfns:
            tasks.append((replicaSE, taskLfns))
        return tasks

    @timeThis
    def groupBySize(self, files, status):
        """
    Generate a task for a given amount of data
    """
        tasks = []
        nTasks = 0

        if not len(files):
            return S_OK(tasks)

        files = dict(files)
        # Parameters
        if not self.groupSize:
            self.groupSize = float(self.getPluginParam(
                'GroupSize',
                1)) * 1000 * 1000 * 1000  # input size in GB converted to bytes
        flush = (status == 'Flush')
        self.logVerbose("groupBySize: %d files, groupSize: %d, flush: %s" %
                        (len(files), self.groupSize, flush))

        # Get the file sizes
        res = self._getFileSize(files.keys())
        if not res['OK']:
            return res
        fileSizes = res['Value']

        for groupSE in (True, False):
            if not files:
                break
            seFiles = getFileGroups(files, groupSE=groupSE)

            for replicaSE in sorted(seFiles) if groupSE else sortSEs(seFiles):
                lfns = seFiles[replicaSE]
                newTasks = self.createTasksBySize(lfns,
                                                  replicaSE,
                                                  fileSizes=fileSizes,
                                                  flush=flush)
                lfnsInTasks = []
                for task in newTasks:
                    lfnsInTasks += task[1]
                tasks += newTasks

                # Remove the selected files from the size cache
                self.clearCachedFileSize(lfnsInTasks)
                if not groupSE:
                    # Remove files from other SEs
                    for se in [se for se in seFiles if se != replicaSE]:
                        seFiles[se] = [
                            lfn for lfn in seFiles[se]
                            if lfn not in lfnsInTasks
                        ]
                # Remove files from global list
                for lfn in lfnsInTasks:
                    files.pop(lfn)

            self.logVerbose("groupBySize: %d tasks created with groupSE %s" %
                            (len(tasks) - nTasks, str(groupSE)))
            self.logVerbose(
                "groupBySize: %d files have not been included in tasks" %
                len(files))
            nTasks = len(tasks)

        self.logVerbose("Grouped %d files by size" % len(files))
        return S_OK(tasks)

    def getExistingCounters(self, normalise=False, requestedSites=[]):
        res = self.transClient.getCounters(
            'TransformationFiles', ['UsedSE'],
            {'TransformationID': self.params['TransformationID']})
        if not res['OK']:
            return res
        usageDict = {}
        for usedDict, count in res['Value']:
            usedSE = usedDict['UsedSE']
            if usedSE != 'Unknown':
                usageDict[usedSE] = count
        if requestedSites:
            siteDict = {}
            for se, count in usageDict.items():
                res = getSitesForSE(se)
                if not res['OK']:
                    return res
                for site in res['Value']:
                    if site in requestedSites:
                        siteDict[site] = count
            usageDict = siteDict.copy()
        if normalise:
            usageDict = self._normaliseShares(usageDict)
        return S_OK(usageDict)

    @timeThis
    def _getFileSize(self, lfns):
        """ Get file size from a cache, if not from the catalog
    #FIXME: have to fill the cachedLFNSize!
    """
        lfns = list(lfns)
        cachedLFNSize = dict(self.cachedLFNSize)

        fileSizes = {}
        for lfn in [lfn for lfn in lfns if lfn in cachedLFNSize]:
            fileSizes[lfn] = cachedLFNSize[lfn]
        self.logDebug("Found cache hit for File size for %d files out of %d" %
                      (len(fileSizes), len(lfns)))
        lfns = [lfn for lfn in lfns if lfn not in cachedLFNSize]
        if lfns:
            fileSizes = self._getFileSizeFromCatalog(lfns, fileSizes)
            if not fileSizes['OK']:
                self.logError(fileSizes['Message'])
                return fileSizes
            fileSizes = fileSizes['Value']
        return S_OK(fileSizes)

    @timeThis
    def _getFileSizeFromCatalog(self, lfns, fileSizes):
        """
    Get file size from the catalog
    """
        lfns = list(lfns)
        fileSizes = dict(fileSizes)

        res = self.fc.getFileSize(lfns)
        if not res['OK']:
            return S_ERROR("Failed to get sizes for all files: %s" %
                           res['Message'])
        if res['Value']['Failed']:
            errorReason = sorted(set(res['Value']['Failed'].values()))
            self.logWarn(
                "Failed to get sizes for %d files:" %
                len(res['Value']['Failed']), errorReason)
        fileSizes.update(res['Value']['Successful'])
        self.cachedLFNSize.update((res['Value']['Successful']))
        self.logVerbose("Got size of %d files from catalog" % len(lfns))
        return S_OK(fileSizes)

    def clearCachedFileSize(self, lfns):
        """ Utility function
    """
        for lfn in [lfn for lfn in lfns if lfn in self.cachedLFNSize]:
            self.cachedLFNSize.pop(lfn)

    def getPluginParam(self, name, default=None):
        """ Get plugin parameters using specific settings or settings defined in the CS
        Caution: the type returned is that of the default value
    """
        # get the value of a parameter looking 1st in the CS
        if default != None:
            valueType = type(default)
        else:
            valueType = None
        # First look at a generic value...
        optionPath = "TransformationPlugins/%s" % (name)
        value = Operations().getValue(optionPath, None)
        self.logVerbose("Default plugin param %s: '%s'" % (optionPath, value))
        # Then look at a plugin-specific value
        optionPath = "TransformationPlugins/%s/%s" % (self.plugin, name)
        value = Operations().getValue(optionPath, value)
        self.logVerbose("Specific plugin param %s: '%s'" % (optionPath, value))
        if value != None:
            default = value
        # Finally look at a transformation-specific parameter
        value = self.params.get(name, default)
        self.logVerbose("Transformation plugin param %s: '%s'. Convert to %s" %
                        (name, value, str(valueType)))
        if valueType and type(value) is not valueType:
            if valueType is list:
                try:
                    value = ast.literal_eval(
                        value) if value and value != 'None' else []
                except Exception:
                    value = [
                        val for val in value.replace(' ', '').split(',') if val
                    ]
            elif valueType is int:
                value = int(value)
            elif valueType is float:
                value = float(value)
            elif valueType is bool:
                if value in ('False', 'No', 'None', None, 0):
                    value = False
                else:
                    value = bool(value)
            elif valueType is not str:
                self.logWarn(
                    "Unknown parameter type (%s) for %s, passed as string" %
                    (str(valueType), name))
        self.logVerbose("Final plugin param %s: '%s'" % (name, value))
        return value

    @staticmethod
    def _normaliseShares(originalShares):
        shares = originalShares.copy()
        total = 0.0
        for site in shares.keys():
            share = float(shares[site])
            shares[site] = share
            total += share
        for site in shares.keys():
            share = 100.0 * (shares[site] / total)
            shares[site] = share
        return shares

    def uniqueSEs(self, ses):
        newSEs = []
        for se in ses:
            if not self.isSameSEInList(se, newSEs):
                newSEs.append(se)
        return newSEs

    def isSameSE(self, se1, se2):
        if se1 == se2:
            return True
        for se in (se1, se2):
            if se not in self.seConfig:
                self.seConfig[se] = {}
                res = StorageElement(se).getStorageParameters('SRM2')
                if res['OK']:
                    params = res['Value']
                    for item in ('Host', 'Path'):
                        self.seConfig[se][item] = params[item].replace(
                            't1d1', 't0d1')
                else:
                    self.logError(
                        "Error getting StorageElement parameters for %s" % se,
                        res['Message'])

        return self.seConfig[se1] == self.seConfig[se2]

    def isSameSEInList(self, se1, seList):
        if se1 in seList:
            return True
        for se in seList:
            if self.isSameSE(se1, se):
                return True
        return False

    def closerSEs(self, existingSEs, targetSEs, local=False):
        """ Order the targetSEs such that the first ones are closer to existingSEs. Keep all elements in targetSEs
    """
        setTarget = set(targetSEs)
        sameSEs = set([
            se1 for se1 in setTarget for se2 in existingSEs
            if self.isSameSE(se1, se2)
        ])
        targetSEs = setTarget - set(sameSEs)
        if targetSEs:
            # Some SEs are left, look for sites
            existingSites = [
                self.dmsHelper.getLocalSiteForSE(se).get('Value')
                for se in existingSEs if not self.dmsHelper.isSEArchive(se)
            ]
            existingSites = set([site for site in existingSites if site])
            closeSEs = set([
                se for se in targetSEs if self.dmsHelper.getLocalSiteForSE(
                    se).get('Value') in existingSites
            ])
            # print existingSEs, existingSites, targetSEs, closeSEs
            otherSEs = targetSEs - closeSEs
            targetSEs = list(closeSEs)
            random.shuffle(targetSEs)
            if not local and otherSEs:
                otherSEs = list(otherSEs)
                random.shuffle(otherSEs)
                targetSEs += otherSEs
        else:
            targetSEs = []
        return (targetSEs + list(sameSEs)) if not local else targetSEs

Example #4

Show file

File: TransformationPlugin.py Project: Kiyoshi-Hayasaka/DIRAC

class TransformationPlugin( PluginBase ):
  """ A TransformationPlugin object should be instantiated by every transformation.
  """

  def __init__( self, plugin, transClient = None, dataManager = None ):
    """ plugin name has to be passed in: it will then be executed as one of the functions below, e.g.
        plugin = 'BySize' will execute TransformationPlugin('BySize')._BySize()
    """
    super( TransformationPlugin, self ).__init__( plugin )

    self.data = {}
    self.files = False
    if transClient is None:
      self.transClient = TransformationClient()
    else:
      self.transClient = transClient

    if dataManager is None:
      self.dm = DataManager()
    else:
      self.dm = dataManager

    self.fc = FileCatalog()


  def isOK( self ):
    self.valid = True
    if ( not self.data ) or ( not self.params ):
      self.valid = False
    return self.valid

  def setInputData( self, data ):
    self.data = data

  def setTransformationFiles( self, files ): #TODO ADDED
    self.files = files

  def _Standard( self ):
    """ Simply group by replica location
    """
    res = self._groupByReplicas()
    if not res['OK']:
      return res
    newTasks = []
    for _se, lfns in res['Value']:
      newTasks.append( ( '', lfns ) )
    return S_OK( newTasks )

  def _BySize( self ):
    """ Alias for groupBySize
    """
    return self._groupBySize()

  def _Broadcast( self ):
    """ This plug-in takes files found at the sourceSE and broadcasts to all (or a selection of) targetSEs.
    """
    if not self.params:
      return S_ERROR( "TransformationPlugin._Broadcast: The 'Broadcast' plugin requires additional parameters." )

    targetseParam = self.params['TargetSE']
    targetSEs = []
    sourceSEs = eval( self.params['SourceSE'] )
    if targetseParam.count( '[' ):
      targetSEs = eval( targetseParam )
    elif type(targetseParam)==type([]):
      targetSEs = targetseParam
    else:
      targetSEs = [targetseParam]
    #sourceSEs = eval(self.params['SourceSE'])
    #targetSEs = eval(self.params['TargetSE'])
    destinations = int( self.params.get( 'Destinations', 0 ) )
    if destinations and ( destinations >= len(targetSEs) ):
      destinations = 0

    status = self.params['Status']
    groupSize = self.params['GroupSize']#Number of files per tasks

    fileGroups = self._getFileGroups( self.data )#groups by SE
    targetSELfns = {}
    for replicaSE, lfns in fileGroups.items():
      ses = replicaSE.split( ',' )
      #sourceSites = self._getSitesForSEs(ses)
      atSource = False
      for se in ses:
        if se in sourceSEs:
          atSource = True
      if not atSource:
        continue

      for lfn in lfns:
        targets = []
        sources = self._getSitesForSEs( ses )
        random.shuffle( targetSEs )
        for targetSE in targetSEs:
          site = self._getSiteForSE( targetSE )['Value']
          if not site in sources:
            if ( destinations ) and ( len( targets ) >= destinations ):
              continue
            sources.append( site )
          targets.append( targetSE )#after all, if someone wants to copy to the source, it's his choice
        strTargetSEs = str.join( ',', sorted( targets ) )
        if not targetSELfns.has_key( strTargetSEs ):
          targetSELfns[strTargetSEs] = []
        targetSELfns[strTargetSEs].append( lfn )
    tasks = []
    for ses, lfns in targetSELfns.items():
      tasksLfns = breakListIntoChunks(lfns, groupSize)
      for taskLfns in tasksLfns:
        if ( status == 'Flush' ) or ( len( taskLfns ) >= int( groupSize ) ):
          #do not allow groups smaller than the groupSize, except if transformation is in flush state
          tasks.append( ( ses, taskLfns ) )
    return S_OK( tasks )

  def _ByShare( self, shareType = 'CPU' ):
    """ first get the shares from the CS, and then makes the grouping looking at the history
    """
    res = self._getShares( shareType, normalise = True )
    if not res['OK']:
      return res
    cpuShares = res['Value']
    gLogger.info( "Obtained the following target shares (%):" )
    for site in sorted( cpuShares.keys() ):
      gLogger.info( "%s: %.1f" % ( site.ljust( 15 ), cpuShares[site] ) )

    # Get the existing destinations from the transformationDB
    res = self._getExistingCounters( requestedSites = cpuShares.keys() )
    if not res['OK']:
      gLogger.error( "Failed to get existing file share", res['Message'] )
      return res
    existingCount = res['Value']
    if existingCount:
      gLogger.info( "Existing site utilization (%):" )
      normalisedExistingCount = self._normaliseShares( existingCount.copy() )
      for se in sorted( normalisedExistingCount.keys() ):
        gLogger.info( "%s: %.1f" % ( se.ljust( 15 ), normalisedExistingCount[se] ) )

    # Group the input files by their existing replicas
    res = self._groupByReplicas()
    if not res['OK']:
      return res
    replicaGroups = res['Value']

    tasks = []
    # For the replica groups 
    for replicaSE, lfns in replicaGroups:
      possibleSEs = replicaSE.split( ',' )
      # Determine the next site based on requested shares, existing usage and candidate sites
      res = self._getNextSite( existingCount, cpuShares, candidates = self._getSitesForSEs( possibleSEs ) )
      if not res['OK']:
        gLogger.error( "Failed to get next destination SE", res['Message'] )
        continue
      targetSite = res['Value']
      # Resolve the ses for the target site
      res = getSEsForSite( targetSite )
      if not res['OK']:
        continue
      ses = res['Value']
      # Determine the selected SE and create the task 
      for chosenSE in ses:
        if chosenSE in possibleSEs:
          tasks.append( ( chosenSE, lfns ) )
          if not existingCount.has_key( targetSite ):
            existingCount[targetSite] = 0
          existingCount[targetSite] += len( lfns )
    return S_OK( tasks )

  def _getShares( self, shareType, normalise = False ):
    """ Takes share from the CS, eventually normalize them
    """
    res = gConfig.getOptionsDict( '/Resources/Shares/%s' % shareType )
    if not res['OK']:
      return res
    if not res['Value']:
      return S_ERROR( "/Resources/Shares/%s option contains no shares" % shareType )
    shares = res['Value']
    for site, value in shares.items():
      shares[site] = float( value )
    if normalise:
      shares = self._normaliseShares( shares )
    if not shares:
      return S_ERROR( "No non-zero shares defined" )
    return S_OK( shares )

  def _getExistingCounters( self, normalise = False, requestedSites = [] ):
    res = self.transClient.getCounters( 'TransformationFiles', ['UsedSE'],
                                        {'TransformationID':self.params['TransformationID']} )
    if not res['OK']:
      return res
    usageDict = {}
    for usedDict, count in res['Value']:
      usedSE = usedDict['UsedSE']
      if usedSE != 'Unknown':
        usageDict[usedSE] = count
    if requestedSites:
      siteDict = {}
      for se, count in usageDict.items():
        res = getSitesForSE( se, gridName = 'LCG' )
        if not res['OK']:
          return res
        for site in res['Value']:
          if site in requestedSites:
            siteDict[site] = count
      usageDict = siteDict.copy()
    if normalise:
      usageDict = self._normaliseShares( usageDict )
    return S_OK( usageDict )

  @classmethod
  def _normaliseShares( self, originalShares ):
    shares = originalShares.copy()
    total = 0.0
    for site in shares.keys():
      share = float( shares[site] )
      shares[site] = share
      total += share
    for site in shares.keys():
      share = 100.0 * ( shares[site] / total )
      shares[site] = share
    return shares

  def _getNextSite( self, existingCount, cpuShares, candidates = [] ):
    # normalise the shares
    siteShare = self._normaliseShares( existingCount )
    # then fill the missing share values to 0
    for site in cpuShares.keys():
      if ( not siteShare.has_key( site ) ):
        siteShare[site] = 0.0
    # determine which site is furthest from its share
    chosenSite = ''
    minShareShortFall = -float( "inf" )
    for site, cpuShare in cpuShares.items():
      if ( candidates ) and not ( site in candidates ):
        continue
      if not cpuShare:
        continue
      existingShare = siteShare[site]
      shareShortFall = cpuShare - existingShare
      if shareShortFall > minShareShortFall:
        minShareShortFall = shareShortFall
        chosenSite = site
    return S_OK( chosenSite )

  def _groupByReplicas( self ):
    """ Generates a job based on the location of the input data """
    if not self.params:
      return S_ERROR( "TransformationPlugin._Standard: The 'Standard' plug-in requires parameters." )
    status = self.params['Status']
    groupSize = self.params['GroupSize']
    # Group files by SE
    fileGroups = self._getFileGroups( self.data )
    # Create tasks based on the group size
    tasks = []
    for replicaSE in sorted( fileGroups.keys() ):
      lfns = fileGroups[replicaSE]
      tasksLfns = breakListIntoChunks( lfns, groupSize )
      for taskLfns in tasksLfns:
        if ( status == 'Flush' ) or ( len( taskLfns ) >= int( groupSize ) ):
          tasks.append( ( replicaSE, taskLfns ) )
    return S_OK( tasks )

  def _groupBySize( self ):
    """ Generate a task for a given amount of data """
    if not self.params:
      return S_ERROR( "TransformationPlugin._BySize: The 'BySize' plug-in requires parameters." )
    status = self.params['Status']
    requestedSize = float( self.params['GroupSize'] ) * 1000 * 1000 * 1000 # input size in GB converted to bytes
    maxFiles = self.params.get( 'MaxFiles', 100 )
    # Group files by SE
    fileGroups = self._getFileGroups( self.data )
    # Get the file sizes
    res = self.fc.getFileSize( self.data )
    if not res['OK']:
      return S_ERROR( "Failed to get sizes for files" )
    if res['Value']['Failed']:
      return S_ERROR( "Failed to get sizes for all files" )
    fileSizes = res['Value']['Successful']
    tasks = []
    for replicaSE, lfns in fileGroups.items():
      taskLfns = []
      taskSize = 0
      for lfn in lfns:
        taskSize += fileSizes[lfn]
        taskLfns.append( lfn )
        if ( taskSize > requestedSize ) or ( len( taskLfns ) >= maxFiles ):
          tasks.append( ( replicaSE, taskLfns ) )
          taskLfns = []
          taskSize = 0
      if ( status == 'Flush' ) and taskLfns:
        tasks.append( ( replicaSE, taskLfns ) )
    return S_OK( tasks )

  @classmethod
  def _getFileGroups( cls, fileReplicas ):
    """ get file groups dictionary { "SE1,SE2,SE3" : [ lfn1, lfn2 ], ... }
    
    :param dict fileReplicas: { lfn : [SE1, SE2, SE3], ... }
    """
    fileGroups = {}
    for lfn, replicas in fileReplicas.items():
      replicaSEs = ",".join( sorted( list( set( replicas ) ) ) )
      if replicaSEs not in fileGroups:
        fileGroups[replicaSEs] = []
      fileGroups[replicaSEs].append( lfn )
    return fileGroups

  @classmethod
  def _getSiteForSE( cls, se ):
    """ Get site name for the given SE
    """
    result = getSitesForSE( se, gridName = 'LCG' )
    if not result['OK']:
      return result
    if result['Value']:
      return S_OK( result['Value'][0] )
    return S_OK( '' )

  @classmethod
  def _getSitesForSEs( cls, seList ):
    """ Get all the sites for the given SE list
    """
    sites = []
    for se in seList:
      result = getSitesForSE( se, gridName = 'LCG' )
      if result['OK']:
        sites += result['Value']
    return sites

Example #5

Show file

File: dirac-test-plugin.py Project: antolu/LHCbDIRAC

class fakeClient:
    def __init__(self, trans, transID, lfns, asIfProd):
        self.trans = trans
        self.transID = transID
        from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
        self.transClient = TransformationClient()
        from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
        self.bk = BookkeepingClient()
        from DIRAC.DataManagementSystem.Client.DataManager import DataManager
        self.dm = DataManager()
        self.asIfProd = asIfProd

        (self.transFiles, self.transReplicas) = self.prepareForPlugin(lfns)

    def addFilesToTransformation(self, transID, lfns):
        return S_OK({
            'Failed': {},
            'Successful': dict([(lfn, 'Added') for lfn in lfns])
        })

    def getTransformation(self, transID, extraParams=False):
        if transID == self.transID and self.asIfProd:
            transID = self.asIfProd
        if transID != self.transID:
            return self.transClient.getTransformation(transID)
        res = self.trans.getType()
        return DIRAC.S_OK({'Type': res['Value']})

    def getReplicas(self):
        return self.transReplicas

    def getFiles(self):
        return self.transFiles

    def getCounters(self, table, attrList, condDict):
        if condDict['TransformationID'] == self.transID and self.asIfProd:
            condDict['TransformationID'] = self.asIfProd
        if condDict['TransformationID'] != self.transID:
            return self.transClient.getCounters(table, attrList, condDict)
        possibleTargets = [
            'CERN-RAW', 'CNAF-RAW', 'GRIDKA-RAW', 'IN2P3-RAW', 'SARA-RAW',
            'PIC-RAW', 'RAL-RAW', 'RRCKI-RAW'
        ]
        counters = []
        for se in possibleTargets:
            counters.append(({'UsedSE': se}, 0))
        return DIRAC.S_OK(counters)

    def getBookkeepingQuery(self, transID):
        if transID == self.transID and self.asIfProd:
            return self.transClient.getBookkeepingQuery(asIfProd)
        return self.trans.getBkQuery()

    def insertTransformationRun(self, transID, runID, xx):
        return DIRAC.S_OK()

    def getTransformationRuns(self, condDict):
        if condDict['TransformationID'] == self.transID and self.asIfProd:
            condDict['TransformationID'] = self.asIfProd
        if condDict['TransformationID'] == self.transID:
            transRuns = []
            runs = condDict.get('RunNumber', [])
            if not runs and self.transFiles:
                res = self.bk.getFileMetadata(
                    [fileDict['LFN'] for fileDict in self.transFiles])
                if not res['OK']:
                    return res
                runs = list(
                    set(meta['RunNumber']
                        for meta in res['Value']['Successful'].itervalues()))
            for run in runs:
                transRuns.append({
                    'RunNumber': run,
                    'Status': "Active",
                    "SelectedSite": None
                })
            return DIRAC.S_OK(transRuns)
        else:
            return self.transClient.getTransformationRuns(condDict)

    def getTransformationFiles(self, condDict=None):
        if condDict.get('TransformationID') == self.transID and self.asIfProd:
            condDict['TransformationID'] = self.asIfProd
        if condDict.get('TransformationID') == self.transID:
            transFiles = []
            if 'Status' in condDict and 'Unused' not in condDict['Status']:
                return DIRAC.S_OK(transFiles)
            runs = None
            if 'RunNumber' in condDict:
                runs = condDict['RunNumber']
                if not isinstance(runs, list):
                    runs = [runs]
            for fileDict in self.transFiles:
                if not runs or fileDict['RunNumber'] in runs:
                    transFiles.append({
                        'LFN': fileDict['LFN'],
                        'Status': 'Unused',
                        'RunNumber': fileDict['RunNumber']
                    })
            return DIRAC.S_OK(transFiles)
        else:
            return self.transClient.getTransformationFiles(condDict=condDict)

    def setParameterToTransformationFiles(self, transID, lfnDict):
        """
    Update the transFiles with some parameters
    """
        if transID == self.transID:
            for fileDict in self.transFiles:
                fileDict.update(lfnDict.get(fileDict['LFN'], {}))
            return S_OK()
        else:
            return self.transClient.setParameterToTransformationFiles(
                transID, lfnDict)

    def getTransformationFilesCount(self, transID, field, selection=None):
        if selection is None:
            selection = {}
        if transID == self.transID or selection.get(
                'TransformationID') == self.transID:
            runs = selection.get('RunNumber')
            if runs and not isinstance(runs, list):
                runs = [runs]
            if field == 'Status':
                counters = {'Unused': 0}
                for fileDict in self.transFiles:
                    if not runs or fileDict['RunNumber'] in runs:
                        counters['Unused'] += 1
            elif field == 'RunNumber':
                counters = {}
                for fileDict in self.transFiles:
                    runID = fileDict['RunNumber']
                    if not runs or runID in runs:
                        counters.setdefault(runID, 0)
                        counters[runID] += 1
            else:
                return DIRAC.S_ERROR('Not implemented for field ' + field)
            counters['Total'] = sum(count for count in counters.itervalues())
            return DIRAC.S_OK(counters)
        else:
            return self.transClient.getTransformationFilesCount(
                transID, field, selection=selection)

    def getTransformationRunStats(self, transIDs):
        counters = {}
        for transID in transIDs:
            if transID == self.transID:
                for fileDict in self.transFiles:
                    runID = fileDict['RunNumber']
                    counters[transID][runID]['Unused'] = counters.setdefault(
                        transID, {}).setdefault(runID, {}).setdefault(
                            'Unused', 0) + 1
                for runID in counters[transID]:
                    counters[transID][runID]['Total'] = counters[transID][
                        runID]['Unused']
            else:
                res = self.transClient.getTransformationRunStats(transIDs)
                if res['OK']:
                    counters.update(res['Value'])
                else:
                    return res
        return DIRAC.S_OK(counters)

    def addRunsMetadata(self, runID, val):
        return self.transClient.addRunsMetadata(runID, val)

    def getRunsMetadata(self, runID):
        return self.transClient.getRunsMetadata(runID)

    def setTransformationRunStatus(self, transID, runID, status):
        return DIRAC.S_OK()

    def setTransformationRunsSite(self, transID, runID, site):
        return DIRAC.S_OK()

    def setFileStatusForTransformation(self, transID, status, lfns):
        return DIRAC.S_OK()

    def addTransformationRunFiles(self, transID, run, lfns):
        return DIRAC.S_OK()

    def setDestinationForRun(self, runID, site):
        return DIRAC.S_OK()

    def getDestinationForRun(self, runID):
        return self.transClient.getDestinationForRun(runID)

    def prepareForPlugin(self, lfns):
        import time
        print "Preparing the plugin input data (%d files)" % len(lfns)
        type = self.trans.getType()['Value']
        if not lfns:
            return (None, None)
        res = self.bk.getFileMetadata(lfns)
        if res['OK']:
            files = []
            for lfn, metadata in res['Value']['Successful'].iteritems():
                runID = metadata.get('RunNumber', 0)
                runDict = {"RunNumber": runID, "LFN": lfn}
                files.append(runDict)
        else:
            print "Error getting BK metadata", res['Message']
            return ([], {})
        replicas = {}
        startTime = time.time()
        from DIRAC.Core.Utilities.List import breakListIntoChunks
        for lfnChunk in breakListIntoChunks(lfns, 200):
            # print lfnChunk
            if type.lower() in ("replication", "removal"):
                res = self.dm.getReplicas(lfnChunk, getUrl=False)
            else:
                res = self.dm.getReplicasForJobs(lfnChunk, getUrl=False)
            # print res
            if res['OK']:
                for lfn, ses in res['Value']['Successful'].iteritems():
                    if ses:
                        replicas[lfn] = sorted(ses)
            else:
                print "Error getting replicas of %d files:" % len(
                    lfns), res['Message']
        print "Obtained replicas of %d files in %.3f seconds" % (
            len(lfns), time.time() - startTime)
        return (files, replicas)