Example #1
0
def getFilesToStage(lfnList,
                    jobState=None,
                    checkOnlyTapeSEs=None,
                    jobLog=None):
    """ Utility that returns out of a list of LFNs those files that are offline,
      and those for which at least one copy is online
  """
    if not lfnList:
        return S_OK({
            'onlineLFNs': [],
            'offlineLFNs': {},
            'failedLFNs': [],
            'absentLFNs': {}
        })

    dm = DataManager()
    if isinstance(lfnList, six.string_types):
        lfnList = [lfnList]

    lfnListReplicas = dm.getReplicasForJobs(lfnList, getUrl=False)
    if not lfnListReplicas['OK']:
        return lfnListReplicas

    offlineLFNsDict = {}
    onlineLFNs = {}
    offlineLFNs = {}
    absentLFNs = {}
    failedLFNs = set()
    if lfnListReplicas['Value']['Failed']:
        # Check if files are not existing
        for lfn, reason in lfnListReplicas['Value']['Failed'].items():
            # FIXME: awful check until FC returns a proper error
            if cmpError(reason, errno.ENOENT) or 'No such file' in reason:
                # The file doesn't exist, job must be Failed
                # FIXME: it is not possible to return here an S_ERROR(), return the message only
                absentLFNs[lfn] = S_ERROR(errno.ENOENT,
                                          'File not in FC')['Message']
        if absentLFNs:
            return S_OK({
                'onlineLFNs': list(onlineLFNs),
                'offlineLFNs': offlineLFNsDict,
                'failedLFNs': list(failedLFNs),
                'absentLFNs': absentLFNs
            })
        return S_ERROR("Failures in getting replicas")

    lfnListReplicas = lfnListReplicas['Value']['Successful']
    # If a file is reported here at a tape SE, it is not at a disk SE as we use disk in priority
    # We shall check all file anyway in order to make sure they exist
    seToLFNs = dict()
    for lfn, ses in lfnListReplicas.items():
        for se in ses:
            seToLFNs.setdefault(se, list()).append(lfn)

    if seToLFNs:
        if jobState:
            # Get user name and group from the job state
            userName = jobState.getAttribute('Owner')
            if not userName['OK']:
                return userName
            userName = userName['Value']

            userGroup = jobState.getAttribute('OwnerGroup')
            if not userGroup['OK']:
                return userGroup
            userGroup = userGroup['Value']
        else:
            userName = None
            userGroup = None
        # Check whether files are Online or Offline, or missing at SE
        result = _checkFilesToStage(
            seToLFNs,
            onlineLFNs,
            offlineLFNs,
            absentLFNs,  # pylint: disable=unexpected-keyword-arg
            checkOnlyTapeSEs=checkOnlyTapeSEs,
            jobLog=jobLog,
            proxyUserName=userName,
            proxyUserGroup=userGroup,
            executionLock=True)

        if not result['OK']:
            return result
        failedLFNs = set(lfnList) - set(onlineLFNs) - set(offlineLFNs) - set(
            absentLFNs)

        # Get the online SEs
        dmsHelper = DMSHelpers()
        onlineSEs = set(se for ses in onlineLFNs.values() for se in ses)
        onlineSites = set(
            dmsHelper.getLocalSiteForSE(se).get('Value')
            for se in onlineSEs) - {None}
        for lfn in offlineLFNs:
            ses = offlineLFNs[lfn]
            if len(ses) == 1:
                # No choice, let's go
                offlineLFNsDict.setdefault(ses[0], list()).append(lfn)
                continue
            # Try and get an SE at a site already with online files
            found = False
            if onlineSites:
                # If there is at least one online site, select one
                for se in ses:
                    site = dmsHelper.getLocalSiteForSE(se)
                    if site['OK']:
                        if site['Value'] in onlineSites:
                            offlineLFNsDict.setdefault(se, list()).append(lfn)
                            found = True
                            break
            # No online site found in common, select randomly
            if not found:
                offlineLFNsDict.setdefault(random.choice(ses),
                                           list()).append(lfn)

    return S_OK({
        'onlineLFNs': list(onlineLFNs),
        'offlineLFNs': offlineLFNsDict,
        'failedLFNs': list(failedLFNs),
        'absentLFNs': absentLFNs,
        'onlineSites': onlineSites
    })
Example #2
0
def getFilesToStage(lfnList, jobState=None):
    """ Utility that returns out of a list of LFNs those files that are offline,
      and those for which at least one copy is online
  """
    if not lfnList:
        return S_OK({
            'onlineLFNs': [],
            'offlineLFNs': {},
            'failedLFNs': [],
            'absentLFNs': {}
        })

    dm = DataManager()

    lfnListReplicas = dm.getReplicasForJobs(lfnList, getUrl=False)
    if not lfnListReplicas['OK']:
        return lfnListReplicas

    if lfnListReplicas['Value']['Failed']:
        return S_ERROR("Failures in getting replicas")

    lfnListReplicas = lfnListReplicas['Value']['Successful']
    # If a file is reported here at a tape SE, it is not at a disk SE as we use disk in priority
    # We shall check all file anyway in order to make sure they exist
    seToLFNs = dict()
    for lfn, ses in lfnListReplicas.iteritems():
        for se in ses:
            seToLFNs.setdefault(se, list()).append(lfn)

    offlineLFNsDict = {}
    onlineLFNs = set()
    offlineLFNs = {}
    absentLFNs = {}
    if seToLFNs:
        if jobState:
            # Get user name and group from the job state
            userName = jobState.getAttribute('Owner')
            if not userName['OK']:
                return userName
            userName = userName['Value']

            userGroup = jobState.getAttribute('OwnerGroup')
            if not userGroup['OK']:
                return userGroup
            userGroup = userGroup['Value']
        else:
            userName = None
            userGroup = None
        # Check whether files are Online or Offline, or missing at SE
        result = _checkFilesToStage(
            seToLFNs,
            onlineLFNs,
            offlineLFNs,
            absentLFNs,  # pylint: disable=unexpected-keyword-arg
            proxyUserName=userName,
            proxyUserGroup=userGroup,
            executionLock=True)

        if not result['OK']:
            return result
        failedLFNs = set(lfnList) - onlineLFNs - set(offlineLFNs) - set(
            absentLFNs)

        for lfn in offlineLFNs:
            ses = offlineLFNs[lfn]
            if ses:
                offlineLFNsDict.setdefault(random.choice(ses),
                                           list()).append(lfn)

    return S_OK({
        'onlineLFNs': list(onlineLFNs),
        'offlineLFNs': offlineLFNsDict,
        'failedLFNs': list(failedLFNs),
        'absentLFNs': absentLFNs
    })
Example #3
0
def getFilesToStage(lfnList, jobState=None):
    """ Utility that returns out of a list of LFNs those files that are offline,
      and those for which at least one copy is online
  """
    if not lfnList:
        return S_OK({'onlineLFNs': [], 'offlineLFNs': {}})

    dm = DataManager()

    lfnListReplicas = dm.getReplicasForJobs(lfnList, getUrl=False)
    if not lfnListReplicas['OK']:
        return lfnListReplicas

    if lfnListReplicas['Value']['Failed']:
        return S_ERROR("Failures in getting replicas")

    lfnListReplicas = lfnListReplicas['Value']['Successful']
    # Check whether there is any file that is only at a tape SE
    # If a file is reported here at a tape SE, it is not at a disk SE as we use disk in priority
    seToLFNs = dict()
    onlineLFNs = set()
    for lfn, ld in lfnListReplicas.iteritems():
        for se in ld:
            status = StorageElement(se).getStatus()
            if not status['OK']:
                gLogger.error("Could not get SE status",
                              "%s - %s" % (se, status['Message']))
                return status
            if status['Value']['DiskSE']:
                # File is at a disk SE, no need to stage
                onlineLFNs.add(lfn)
                break
            else:
                seToLFNs.setdefault(se, list()).append(lfn)

    offlineLFNsDict = {}
    if seToLFNs:
        # If some files are on Tape SEs, check whether they are online or offline
        if jobState:
            # Get user name and group from the job state
            userName = jobState.getAttribute('Owner')
            if not userName['OK']:
                return userName
            userName = userName['Value']

            userGroup = jobState.getAttribute('OwnerGroup')
            if not userGroup['OK']:
                return userGroup
            userGroup = userGroup['Value']
        else:
            userName = None
            userGroup = None
        result = _checkFilesToStage(
            seToLFNs,
            onlineLFNs,  # pylint: disable=unexpected-keyword-arg
            proxyUserName=userName,
            proxyUserGroup=userGroup,
            executionLock=True)
        if not result['OK']:
            return result
        offlineLFNs = set(lfnList) - onlineLFNs

        for offlineLFN in offlineLFNs:
            ses = lfnListReplicas[offlineLFN].keys()
            if ses:
                offlineLFNsDict.setdefault(random.choice(ses),
                                           list()).append(offlineLFN)

    return S_OK({
        'onlineLFNs': list(onlineLFNs),
        'offlineLFNs': offlineLFNsDict
    })
Example #4
0
class fakeClient:
    def __init__(self, trans, transID, lfns, asIfProd):
        self.trans = trans
        self.transID = transID
        from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient
        self.transClient = TransformationClient()
        from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient
        self.bk = BookkeepingClient()
        from DIRAC.DataManagementSystem.Client.DataManager import DataManager
        self.dm = DataManager()
        self.asIfProd = asIfProd

        (self.transFiles, self.transReplicas) = self.prepareForPlugin(lfns)

    def addFilesToTransformation(self, transID, lfns):
        return S_OK({
            'Failed': {},
            'Successful': dict([(lfn, 'Added') for lfn in lfns])
        })

    def getTransformation(self, transID, extraParams=False):
        if transID == self.transID and self.asIfProd:
            transID = self.asIfProd
        if transID != self.transID:
            return self.transClient.getTransformation(transID)
        res = self.trans.getType()
        return DIRAC.S_OK({'Type': res['Value']})

    def getReplicas(self):
        return self.transReplicas

    def getFiles(self):
        return self.transFiles

    def getCounters(self, table, attrList, condDict):
        if condDict['TransformationID'] == self.transID and self.asIfProd:
            condDict['TransformationID'] = self.asIfProd
        if condDict['TransformationID'] != self.transID:
            return self.transClient.getCounters(table, attrList, condDict)
        possibleTargets = [
            'CERN-RAW', 'CNAF-RAW', 'GRIDKA-RAW', 'IN2P3-RAW', 'SARA-RAW',
            'PIC-RAW', 'RAL-RAW', 'RRCKI-RAW'
        ]
        counters = []
        for se in possibleTargets:
            counters.append(({'UsedSE': se}, 0))
        return DIRAC.S_OK(counters)

    def getBookkeepingQuery(self, transID):
        if transID == self.transID and self.asIfProd:
            return self.transClient.getBookkeepingQuery(asIfProd)
        return self.trans.getBkQuery()

    def insertTransformationRun(self, transID, runID, xx):
        return DIRAC.S_OK()

    def getTransformationRuns(self, condDict):
        if condDict['TransformationID'] == self.transID and self.asIfProd:
            condDict['TransformationID'] = self.asIfProd
        if condDict['TransformationID'] == self.transID:
            transRuns = []
            runs = condDict.get('RunNumber', [])
            if not runs and self.transFiles:
                res = self.bk.getFileMetadata(
                    [fileDict['LFN'] for fileDict in self.transFiles])
                if not res['OK']:
                    return res
                runs = list(
                    set(meta['RunNumber']
                        for meta in res['Value']['Successful'].itervalues()))
            for run in runs:
                transRuns.append({
                    'RunNumber': run,
                    'Status': "Active",
                    "SelectedSite": None
                })
            return DIRAC.S_OK(transRuns)
        else:
            return self.transClient.getTransformationRuns(condDict)

    def getTransformationFiles(self, condDict=None):
        if condDict.get('TransformationID') == self.transID and self.asIfProd:
            condDict['TransformationID'] = self.asIfProd
        if condDict.get('TransformationID') == self.transID:
            transFiles = []
            if 'Status' in condDict and 'Unused' not in condDict['Status']:
                return DIRAC.S_OK(transFiles)
            runs = None
            if 'RunNumber' in condDict:
                runs = condDict['RunNumber']
                if not isinstance(runs, list):
                    runs = [runs]
            for fileDict in self.transFiles:
                if not runs or fileDict['RunNumber'] in runs:
                    transFiles.append({
                        'LFN': fileDict['LFN'],
                        'Status': 'Unused',
                        'RunNumber': fileDict['RunNumber']
                    })
            return DIRAC.S_OK(transFiles)
        else:
            return self.transClient.getTransformationFiles(condDict=condDict)

    def setParameterToTransformationFiles(self, transID, lfnDict):
        """
    Update the transFiles with some parameters
    """
        if transID == self.transID:
            for fileDict in self.transFiles:
                fileDict.update(lfnDict.get(fileDict['LFN'], {}))
            return S_OK()
        else:
            return self.transClient.setParameterToTransformationFiles(
                transID, lfnDict)

    def getTransformationFilesCount(self, transID, field, selection=None):
        if selection is None:
            selection = {}
        if transID == self.transID or selection.get(
                'TransformationID') == self.transID:
            runs = selection.get('RunNumber')
            if runs and not isinstance(runs, list):
                runs = [runs]
            if field == 'Status':
                counters = {'Unused': 0}
                for fileDict in self.transFiles:
                    if not runs or fileDict['RunNumber'] in runs:
                        counters['Unused'] += 1
            elif field == 'RunNumber':
                counters = {}
                for fileDict in self.transFiles:
                    runID = fileDict['RunNumber']
                    if not runs or runID in runs:
                        counters.setdefault(runID, 0)
                        counters[runID] += 1
            else:
                return DIRAC.S_ERROR('Not implemented for field ' + field)
            counters['Total'] = sum(count for count in counters.itervalues())
            return DIRAC.S_OK(counters)
        else:
            return self.transClient.getTransformationFilesCount(
                transID, field, selection=selection)

    def getTransformationRunStats(self, transIDs):
        counters = {}
        for transID in transIDs:
            if transID == self.transID:
                for fileDict in self.transFiles:
                    runID = fileDict['RunNumber']
                    counters[transID][runID]['Unused'] = counters.setdefault(
                        transID, {}).setdefault(runID, {}).setdefault(
                            'Unused', 0) + 1
                for runID in counters[transID]:
                    counters[transID][runID]['Total'] = counters[transID][
                        runID]['Unused']
            else:
                res = self.transClient.getTransformationRunStats(transIDs)
                if res['OK']:
                    counters.update(res['Value'])
                else:
                    return res
        return DIRAC.S_OK(counters)

    def addRunsMetadata(self, runID, val):
        return self.transClient.addRunsMetadata(runID, val)

    def getRunsMetadata(self, runID):
        return self.transClient.getRunsMetadata(runID)

    def setTransformationRunStatus(self, transID, runID, status):
        return DIRAC.S_OK()

    def setTransformationRunsSite(self, transID, runID, site):
        return DIRAC.S_OK()

    def setFileStatusForTransformation(self, transID, status, lfns):
        return DIRAC.S_OK()

    def addTransformationRunFiles(self, transID, run, lfns):
        return DIRAC.S_OK()

    def setDestinationForRun(self, runID, site):
        return DIRAC.S_OK()

    def getDestinationForRun(self, runID):
        return self.transClient.getDestinationForRun(runID)

    def prepareForPlugin(self, lfns):
        import time
        print "Preparing the plugin input data (%d files)" % len(lfns)
        type = self.trans.getType()['Value']
        if not lfns:
            return (None, None)
        res = self.bk.getFileMetadata(lfns)
        if res['OK']:
            files = []
            for lfn, metadata in res['Value']['Successful'].iteritems():
                runID = metadata.get('RunNumber', 0)
                runDict = {"RunNumber": runID, "LFN": lfn}
                files.append(runDict)
        else:
            print "Error getting BK metadata", res['Message']
            return ([], {})
        replicas = {}
        startTime = time.time()
        from DIRAC.Core.Utilities.List import breakListIntoChunks
        for lfnChunk in breakListIntoChunks(lfns, 200):
            # print lfnChunk
            if type.lower() in ("replication", "removal"):
                res = self.dm.getReplicas(lfnChunk, getUrl=False)
            else:
                res = self.dm.getReplicasForJobs(lfnChunk, getUrl=False)
            # print res
            if res['OK']:
                for lfn, ses in res['Value']['Successful'].iteritems():
                    if ses:
                        replicas[lfn] = sorted(ses)
            else:
                print "Error getting replicas of %d files:" % len(
                    lfns), res['Message']
        print "Obtained replicas of %d files in %.3f seconds" % (
            len(lfns), time.time() - startTime)
        return (files, replicas)
Example #5
0
def getFilesToStage( lfnList, jobState = None, checkOnlyTapeSEs = None, jobLog = None ):
  """ Utility that returns out of a list of LFNs those files that are offline,
      and those for which at least one copy is online
  """
  if not lfnList:
    return S_OK( {'onlineLFNs':[], 'offlineLFNs': {}, 'failedLFNs':[], 'absentLFNs':{}} )

  dm = DataManager()
  if isinstance( lfnList, basestring ):
    lfnList = [lfnList]

  lfnListReplicas = dm.getReplicasForJobs( lfnList, getUrl = False )
  if not lfnListReplicas['OK']:
    return lfnListReplicas

  offlineLFNsDict = {}
  onlineLFNs = {}
  offlineLFNs = {}
  absentLFNs = {}
  failedLFNs = set()
  if lfnListReplicas['Value']['Failed']:
    # Check if files are not existing
    for lfn, reason in lfnListReplicas['Value']['Failed'].iteritems():
      # FIXME: awful check until FC returns a proper error
      if cmpError( reason, errno.ENOENT ) or 'No such file' in reason:
        # The file doesn't exist, job must be Failed
        # FIXME: it is not possible to return here an S_ERROR(), return the message only
        absentLFNs[lfn] = S_ERROR( errno.ENOENT, 'File not in FC' )['Message']
    if absentLFNs:
      return S_OK({'onlineLFNs': list(onlineLFNs),
                   'offlineLFNs': offlineLFNsDict,
                   'failedLFNs': list(failedLFNs),
                   'absentLFNs': absentLFNs})
    return S_ERROR( "Failures in getting replicas" )

  lfnListReplicas = lfnListReplicas['Value']['Successful']
  # If a file is reported here at a tape SE, it is not at a disk SE as we use disk in priority
  # We shall check all file anyway in order to make sure they exist
  seToLFNs = dict()
  for lfn, ses in lfnListReplicas.iteritems():
    for se in ses:
      seToLFNs.setdefault( se, list() ).append( lfn )

  if seToLFNs:
    if jobState:
      # Get user name and group from the job state
      userName = jobState.getAttribute( 'Owner' )
      if not userName[ 'OK' ]:
        return userName
      userName = userName['Value']

      userGroup = jobState.getAttribute( 'OwnerGroup' )
      if not userGroup[ 'OK' ]:
        return userGroup
      userGroup = userGroup['Value']
    else:
      userName = None
      userGroup = None
    # Check whether files are Online or Offline, or missing at SE
    result = _checkFilesToStage( seToLFNs, onlineLFNs, offlineLFNs, absentLFNs,  # pylint: disable=unexpected-keyword-arg
                                 checkOnlyTapeSEs = checkOnlyTapeSEs, jobLog = jobLog,
                                 proxyUserName = userName,
                                 proxyUserGroup = userGroup,
                                 executionLock = True )

    if not result['OK']:
      return result
    failedLFNs = set( lfnList ) - set( onlineLFNs ) - set( offlineLFNs ) - set( absentLFNs )

    # Get the online SEs
    dmsHelper = DMSHelpers()
    onlineSEs = set( se for ses in onlineLFNs.values() for se in ses )
    onlineSites = set( dmsHelper.getLocalSiteForSE( se ).get( 'Value' ) for se in onlineSEs ) - {None}
    for lfn in offlineLFNs:
      ses = offlineLFNs[lfn]
      if len( ses ) == 1:
        # No choice, let's go
        offlineLFNsDict.setdefault( ses[0], list() ).append( lfn )
        continue
      # Try and get an SE at a site already with online files
      found = False
      if onlineSites:
        # If there is at least one online site, select one
        for se in ses:
          site = dmsHelper.getLocalSiteForSE( se )
          if site['OK']:
            if site['Value'] in onlineSites:
              offlineLFNsDict.setdefault( se, list() ).append( lfn )
              found = True
              break
      # No online site found in common, select randomly
      if not found:
        offlineLFNsDict.setdefault( random.choice( ses ), list() ).append( lfn )

  return S_OK({'onlineLFNs': list(onlineLFNs),
               'offlineLFNs': offlineLFNsDict,
               'failedLFNs': list(failedLFNs),
               'absentLFNs': absentLFNs,
               'onlineSites': onlineSites})