def getFilesToStage(lfnList, jobState=None, checkOnlyTapeSEs=None, jobLog=None): """ Utility that returns out of a list of LFNs those files that are offline, and those for which at least one copy is online """ if not lfnList: return S_OK({ 'onlineLFNs': [], 'offlineLFNs': {}, 'failedLFNs': [], 'absentLFNs': {} }) dm = DataManager() if isinstance(lfnList, six.string_types): lfnList = [lfnList] lfnListReplicas = dm.getReplicasForJobs(lfnList, getUrl=False) if not lfnListReplicas['OK']: return lfnListReplicas offlineLFNsDict = {} onlineLFNs = {} offlineLFNs = {} absentLFNs = {} failedLFNs = set() if lfnListReplicas['Value']['Failed']: # Check if files are not existing for lfn, reason in lfnListReplicas['Value']['Failed'].items(): # FIXME: awful check until FC returns a proper error if cmpError(reason, errno.ENOENT) or 'No such file' in reason: # The file doesn't exist, job must be Failed # FIXME: it is not possible to return here an S_ERROR(), return the message only absentLFNs[lfn] = S_ERROR(errno.ENOENT, 'File not in FC')['Message'] if absentLFNs: return S_OK({ 'onlineLFNs': list(onlineLFNs), 'offlineLFNs': offlineLFNsDict, 'failedLFNs': list(failedLFNs), 'absentLFNs': absentLFNs }) return S_ERROR("Failures in getting replicas") lfnListReplicas = lfnListReplicas['Value']['Successful'] # If a file is reported here at a tape SE, it is not at a disk SE as we use disk in priority # We shall check all file anyway in order to make sure they exist seToLFNs = dict() for lfn, ses in lfnListReplicas.items(): for se in ses: seToLFNs.setdefault(se, list()).append(lfn) if seToLFNs: if jobState: # Get user name and group from the job state userName = jobState.getAttribute('Owner') if not userName['OK']: return userName userName = userName['Value'] userGroup = jobState.getAttribute('OwnerGroup') if not userGroup['OK']: return userGroup userGroup = userGroup['Value'] else: userName = None userGroup = None # Check whether files are Online or Offline, or missing at SE result = _checkFilesToStage( seToLFNs, onlineLFNs, offlineLFNs, absentLFNs, # pylint: disable=unexpected-keyword-arg checkOnlyTapeSEs=checkOnlyTapeSEs, jobLog=jobLog, proxyUserName=userName, proxyUserGroup=userGroup, executionLock=True) if not result['OK']: return result failedLFNs = set(lfnList) - set(onlineLFNs) - set(offlineLFNs) - set( absentLFNs) # Get the online SEs dmsHelper = DMSHelpers() onlineSEs = set(se for ses in onlineLFNs.values() for se in ses) onlineSites = set( dmsHelper.getLocalSiteForSE(se).get('Value') for se in onlineSEs) - {None} for lfn in offlineLFNs: ses = offlineLFNs[lfn] if len(ses) == 1: # No choice, let's go offlineLFNsDict.setdefault(ses[0], list()).append(lfn) continue # Try and get an SE at a site already with online files found = False if onlineSites: # If there is at least one online site, select one for se in ses: site = dmsHelper.getLocalSiteForSE(se) if site['OK']: if site['Value'] in onlineSites: offlineLFNsDict.setdefault(se, list()).append(lfn) found = True break # No online site found in common, select randomly if not found: offlineLFNsDict.setdefault(random.choice(ses), list()).append(lfn) return S_OK({ 'onlineLFNs': list(onlineLFNs), 'offlineLFNs': offlineLFNsDict, 'failedLFNs': list(failedLFNs), 'absentLFNs': absentLFNs, 'onlineSites': onlineSites })
def getFilesToStage(lfnList, jobState=None): """ Utility that returns out of a list of LFNs those files that are offline, and those for which at least one copy is online """ if not lfnList: return S_OK({ 'onlineLFNs': [], 'offlineLFNs': {}, 'failedLFNs': [], 'absentLFNs': {} }) dm = DataManager() lfnListReplicas = dm.getReplicasForJobs(lfnList, getUrl=False) if not lfnListReplicas['OK']: return lfnListReplicas if lfnListReplicas['Value']['Failed']: return S_ERROR("Failures in getting replicas") lfnListReplicas = lfnListReplicas['Value']['Successful'] # If a file is reported here at a tape SE, it is not at a disk SE as we use disk in priority # We shall check all file anyway in order to make sure they exist seToLFNs = dict() for lfn, ses in lfnListReplicas.iteritems(): for se in ses: seToLFNs.setdefault(se, list()).append(lfn) offlineLFNsDict = {} onlineLFNs = set() offlineLFNs = {} absentLFNs = {} if seToLFNs: if jobState: # Get user name and group from the job state userName = jobState.getAttribute('Owner') if not userName['OK']: return userName userName = userName['Value'] userGroup = jobState.getAttribute('OwnerGroup') if not userGroup['OK']: return userGroup userGroup = userGroup['Value'] else: userName = None userGroup = None # Check whether files are Online or Offline, or missing at SE result = _checkFilesToStage( seToLFNs, onlineLFNs, offlineLFNs, absentLFNs, # pylint: disable=unexpected-keyword-arg proxyUserName=userName, proxyUserGroup=userGroup, executionLock=True) if not result['OK']: return result failedLFNs = set(lfnList) - onlineLFNs - set(offlineLFNs) - set( absentLFNs) for lfn in offlineLFNs: ses = offlineLFNs[lfn] if ses: offlineLFNsDict.setdefault(random.choice(ses), list()).append(lfn) return S_OK({ 'onlineLFNs': list(onlineLFNs), 'offlineLFNs': offlineLFNsDict, 'failedLFNs': list(failedLFNs), 'absentLFNs': absentLFNs })
def getFilesToStage(lfnList, jobState=None): """ Utility that returns out of a list of LFNs those files that are offline, and those for which at least one copy is online """ if not lfnList: return S_OK({'onlineLFNs': [], 'offlineLFNs': {}}) dm = DataManager() lfnListReplicas = dm.getReplicasForJobs(lfnList, getUrl=False) if not lfnListReplicas['OK']: return lfnListReplicas if lfnListReplicas['Value']['Failed']: return S_ERROR("Failures in getting replicas") lfnListReplicas = lfnListReplicas['Value']['Successful'] # Check whether there is any file that is only at a tape SE # If a file is reported here at a tape SE, it is not at a disk SE as we use disk in priority seToLFNs = dict() onlineLFNs = set() for lfn, ld in lfnListReplicas.iteritems(): for se in ld: status = StorageElement(se).getStatus() if not status['OK']: gLogger.error("Could not get SE status", "%s - %s" % (se, status['Message'])) return status if status['Value']['DiskSE']: # File is at a disk SE, no need to stage onlineLFNs.add(lfn) break else: seToLFNs.setdefault(se, list()).append(lfn) offlineLFNsDict = {} if seToLFNs: # If some files are on Tape SEs, check whether they are online or offline if jobState: # Get user name and group from the job state userName = jobState.getAttribute('Owner') if not userName['OK']: return userName userName = userName['Value'] userGroup = jobState.getAttribute('OwnerGroup') if not userGroup['OK']: return userGroup userGroup = userGroup['Value'] else: userName = None userGroup = None result = _checkFilesToStage( seToLFNs, onlineLFNs, # pylint: disable=unexpected-keyword-arg proxyUserName=userName, proxyUserGroup=userGroup, executionLock=True) if not result['OK']: return result offlineLFNs = set(lfnList) - onlineLFNs for offlineLFN in offlineLFNs: ses = lfnListReplicas[offlineLFN].keys() if ses: offlineLFNsDict.setdefault(random.choice(ses), list()).append(offlineLFN) return S_OK({ 'onlineLFNs': list(onlineLFNs), 'offlineLFNs': offlineLFNsDict })
class fakeClient: def __init__(self, trans, transID, lfns, asIfProd): self.trans = trans self.transID = transID from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient self.transClient = TransformationClient() from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient self.bk = BookkeepingClient() from DIRAC.DataManagementSystem.Client.DataManager import DataManager self.dm = DataManager() self.asIfProd = asIfProd (self.transFiles, self.transReplicas) = self.prepareForPlugin(lfns) def addFilesToTransformation(self, transID, lfns): return S_OK({ 'Failed': {}, 'Successful': dict([(lfn, 'Added') for lfn in lfns]) }) def getTransformation(self, transID, extraParams=False): if transID == self.transID and self.asIfProd: transID = self.asIfProd if transID != self.transID: return self.transClient.getTransformation(transID) res = self.trans.getType() return DIRAC.S_OK({'Type': res['Value']}) def getReplicas(self): return self.transReplicas def getFiles(self): return self.transFiles def getCounters(self, table, attrList, condDict): if condDict['TransformationID'] == self.transID and self.asIfProd: condDict['TransformationID'] = self.asIfProd if condDict['TransformationID'] != self.transID: return self.transClient.getCounters(table, attrList, condDict) possibleTargets = [ 'CERN-RAW', 'CNAF-RAW', 'GRIDKA-RAW', 'IN2P3-RAW', 'SARA-RAW', 'PIC-RAW', 'RAL-RAW', 'RRCKI-RAW' ] counters = [] for se in possibleTargets: counters.append(({'UsedSE': se}, 0)) return DIRAC.S_OK(counters) def getBookkeepingQuery(self, transID): if transID == self.transID and self.asIfProd: return self.transClient.getBookkeepingQuery(asIfProd) return self.trans.getBkQuery() def insertTransformationRun(self, transID, runID, xx): return DIRAC.S_OK() def getTransformationRuns(self, condDict): if condDict['TransformationID'] == self.transID and self.asIfProd: condDict['TransformationID'] = self.asIfProd if condDict['TransformationID'] == self.transID: transRuns = [] runs = condDict.get('RunNumber', []) if not runs and self.transFiles: res = self.bk.getFileMetadata( [fileDict['LFN'] for fileDict in self.transFiles]) if not res['OK']: return res runs = list( set(meta['RunNumber'] for meta in res['Value']['Successful'].itervalues())) for run in runs: transRuns.append({ 'RunNumber': run, 'Status': "Active", "SelectedSite": None }) return DIRAC.S_OK(transRuns) else: return self.transClient.getTransformationRuns(condDict) def getTransformationFiles(self, condDict=None): if condDict.get('TransformationID') == self.transID and self.asIfProd: condDict['TransformationID'] = self.asIfProd if condDict.get('TransformationID') == self.transID: transFiles = [] if 'Status' in condDict and 'Unused' not in condDict['Status']: return DIRAC.S_OK(transFiles) runs = None if 'RunNumber' in condDict: runs = condDict['RunNumber'] if not isinstance(runs, list): runs = [runs] for fileDict in self.transFiles: if not runs or fileDict['RunNumber'] in runs: transFiles.append({ 'LFN': fileDict['LFN'], 'Status': 'Unused', 'RunNumber': fileDict['RunNumber'] }) return DIRAC.S_OK(transFiles) else: return self.transClient.getTransformationFiles(condDict=condDict) def setParameterToTransformationFiles(self, transID, lfnDict): """ Update the transFiles with some parameters """ if transID == self.transID: for fileDict in self.transFiles: fileDict.update(lfnDict.get(fileDict['LFN'], {})) return S_OK() else: return self.transClient.setParameterToTransformationFiles( transID, lfnDict) def getTransformationFilesCount(self, transID, field, selection=None): if selection is None: selection = {} if transID == self.transID or selection.get( 'TransformationID') == self.transID: runs = selection.get('RunNumber') if runs and not isinstance(runs, list): runs = [runs] if field == 'Status': counters = {'Unused': 0} for fileDict in self.transFiles: if not runs or fileDict['RunNumber'] in runs: counters['Unused'] += 1 elif field == 'RunNumber': counters = {} for fileDict in self.transFiles: runID = fileDict['RunNumber'] if not runs or runID in runs: counters.setdefault(runID, 0) counters[runID] += 1 else: return DIRAC.S_ERROR('Not implemented for field ' + field) counters['Total'] = sum(count for count in counters.itervalues()) return DIRAC.S_OK(counters) else: return self.transClient.getTransformationFilesCount( transID, field, selection=selection) def getTransformationRunStats(self, transIDs): counters = {} for transID in transIDs: if transID == self.transID: for fileDict in self.transFiles: runID = fileDict['RunNumber'] counters[transID][runID]['Unused'] = counters.setdefault( transID, {}).setdefault(runID, {}).setdefault( 'Unused', 0) + 1 for runID in counters[transID]: counters[transID][runID]['Total'] = counters[transID][ runID]['Unused'] else: res = self.transClient.getTransformationRunStats(transIDs) if res['OK']: counters.update(res['Value']) else: return res return DIRAC.S_OK(counters) def addRunsMetadata(self, runID, val): return self.transClient.addRunsMetadata(runID, val) def getRunsMetadata(self, runID): return self.transClient.getRunsMetadata(runID) def setTransformationRunStatus(self, transID, runID, status): return DIRAC.S_OK() def setTransformationRunsSite(self, transID, runID, site): return DIRAC.S_OK() def setFileStatusForTransformation(self, transID, status, lfns): return DIRAC.S_OK() def addTransformationRunFiles(self, transID, run, lfns): return DIRAC.S_OK() def setDestinationForRun(self, runID, site): return DIRAC.S_OK() def getDestinationForRun(self, runID): return self.transClient.getDestinationForRun(runID) def prepareForPlugin(self, lfns): import time print "Preparing the plugin input data (%d files)" % len(lfns) type = self.trans.getType()['Value'] if not lfns: return (None, None) res = self.bk.getFileMetadata(lfns) if res['OK']: files = [] for lfn, metadata in res['Value']['Successful'].iteritems(): runID = metadata.get('RunNumber', 0) runDict = {"RunNumber": runID, "LFN": lfn} files.append(runDict) else: print "Error getting BK metadata", res['Message'] return ([], {}) replicas = {} startTime = time.time() from DIRAC.Core.Utilities.List import breakListIntoChunks for lfnChunk in breakListIntoChunks(lfns, 200): # print lfnChunk if type.lower() in ("replication", "removal"): res = self.dm.getReplicas(lfnChunk, getUrl=False) else: res = self.dm.getReplicasForJobs(lfnChunk, getUrl=False) # print res if res['OK']: for lfn, ses in res['Value']['Successful'].iteritems(): if ses: replicas[lfn] = sorted(ses) else: print "Error getting replicas of %d files:" % len( lfns), res['Message'] print "Obtained replicas of %d files in %.3f seconds" % ( len(lfns), time.time() - startTime) return (files, replicas)
def getFilesToStage( lfnList, jobState = None, checkOnlyTapeSEs = None, jobLog = None ): """ Utility that returns out of a list of LFNs those files that are offline, and those for which at least one copy is online """ if not lfnList: return S_OK( {'onlineLFNs':[], 'offlineLFNs': {}, 'failedLFNs':[], 'absentLFNs':{}} ) dm = DataManager() if isinstance( lfnList, basestring ): lfnList = [lfnList] lfnListReplicas = dm.getReplicasForJobs( lfnList, getUrl = False ) if not lfnListReplicas['OK']: return lfnListReplicas offlineLFNsDict = {} onlineLFNs = {} offlineLFNs = {} absentLFNs = {} failedLFNs = set() if lfnListReplicas['Value']['Failed']: # Check if files are not existing for lfn, reason in lfnListReplicas['Value']['Failed'].iteritems(): # FIXME: awful check until FC returns a proper error if cmpError( reason, errno.ENOENT ) or 'No such file' in reason: # The file doesn't exist, job must be Failed # FIXME: it is not possible to return here an S_ERROR(), return the message only absentLFNs[lfn] = S_ERROR( errno.ENOENT, 'File not in FC' )['Message'] if absentLFNs: return S_OK({'onlineLFNs': list(onlineLFNs), 'offlineLFNs': offlineLFNsDict, 'failedLFNs': list(failedLFNs), 'absentLFNs': absentLFNs}) return S_ERROR( "Failures in getting replicas" ) lfnListReplicas = lfnListReplicas['Value']['Successful'] # If a file is reported here at a tape SE, it is not at a disk SE as we use disk in priority # We shall check all file anyway in order to make sure they exist seToLFNs = dict() for lfn, ses in lfnListReplicas.iteritems(): for se in ses: seToLFNs.setdefault( se, list() ).append( lfn ) if seToLFNs: if jobState: # Get user name and group from the job state userName = jobState.getAttribute( 'Owner' ) if not userName[ 'OK' ]: return userName userName = userName['Value'] userGroup = jobState.getAttribute( 'OwnerGroup' ) if not userGroup[ 'OK' ]: return userGroup userGroup = userGroup['Value'] else: userName = None userGroup = None # Check whether files are Online or Offline, or missing at SE result = _checkFilesToStage( seToLFNs, onlineLFNs, offlineLFNs, absentLFNs, # pylint: disable=unexpected-keyword-arg checkOnlyTapeSEs = checkOnlyTapeSEs, jobLog = jobLog, proxyUserName = userName, proxyUserGroup = userGroup, executionLock = True ) if not result['OK']: return result failedLFNs = set( lfnList ) - set( onlineLFNs ) - set( offlineLFNs ) - set( absentLFNs ) # Get the online SEs dmsHelper = DMSHelpers() onlineSEs = set( se for ses in onlineLFNs.values() for se in ses ) onlineSites = set( dmsHelper.getLocalSiteForSE( se ).get( 'Value' ) for se in onlineSEs ) - {None} for lfn in offlineLFNs: ses = offlineLFNs[lfn] if len( ses ) == 1: # No choice, let's go offlineLFNsDict.setdefault( ses[0], list() ).append( lfn ) continue # Try and get an SE at a site already with online files found = False if onlineSites: # If there is at least one online site, select one for se in ses: site = dmsHelper.getLocalSiteForSE( se ) if site['OK']: if site['Value'] in onlineSites: offlineLFNsDict.setdefault( se, list() ).append( lfn ) found = True break # No online site found in common, select randomly if not found: offlineLFNsDict.setdefault( random.choice( ses ), list() ).append( lfn ) return S_OK({'onlineLFNs': list(onlineLFNs), 'offlineLFNs': offlineLFNsDict, 'failedLFNs': list(failedLFNs), 'absentLFNs': absentLFNs, 'onlineSites': onlineSites})