def isSameSiteSE(se1, se2): """ Check if the 2 SEs are at the same site """ dmsHelper = DMSHelpers() site1 = dmsHelper.getLocalSiteForSE(se1).get('Value') site2 = dmsHelper.getLocalSiteForSE(se2).get('Value') return site1 and site2 and site1 == site2
def isSameSiteSE( se1, se2 ): """ Check if the 2 SEs are at the same site """ dmsHelper = DMSHelpers() site1 = dmsHelper.getLocalSiteForSE( se1 ).get( 'Value' ) site2 = dmsHelper.getLocalSiteForSE( se2 ).get( 'Value' ) return site1 and site2 and site1 == site2
class PluginUtilities(object): """ Utility class used by plugins """ def __init__(self, plugin='Standard', transClient=None, dataManager=None, fc=None, debug=False, transInThread=None, transID=None): """ c'tor Setting defaults """ # clients if transClient is None: self.transClient = TransformationClient() else: self.transClient = transClient if dataManager is None: self.dm = DataManager() else: self.dm = dataManager if fc is None: self.fc = FileCatalog() else: self.fc = fc self.dmsHelper = DMSHelpers() self.plugin = plugin self.transID = transID self.params = {} self.groupSize = 0 self.maxFiles = 0 self.cachedLFNSize = {} self.transString = '' self.debug = debug if transInThread is None: self.transInThread = {} else: self.transInThread = transInThread self.log = gLogger.getSubLogger(plugin) def logVerbose(self, message, param=''): """ logger helper """ if self.debug: self.log.info('(V)' + self.transString + message, param) else: self.log.verbose(self.transString + message, param) def logDebug(self, message, param=''): """ logger helper """ self.log.debug(self.transString + message, param) def logInfo(self, message, param=''): """ logger helper """ self.log.info(self.transString + message, param) def logWarn(self, message, param=''): """ logger helper """ self.log.warn(self.transString + message, param) def logError(self, message, param=''): """ logger helper """ self.log.error(self.transString + message, param) def logException(self, message, param='', lException=False): """ logger helper """ self.log.exception(self.transString + message, param, lException) def setParameters(self, params): """ Set the transformation parameters and extract transID """ self.params = params self.transID = params['TransformationID'] self.transString = self.transInThread.get(self.transID, ' [NoThread] [%d] ' % self.transID) # @timeThis def groupByReplicas(self, files, status): """ Generates tasks based on the location of the input data :param dict fileReplicas: {'/this/is/at.1': ['SE1'], '/this/is/at.12': ['SE1', 'SE2'], '/this/is/at.2': ['SE2'], '/this/is/at_123': ['SE1', 'SE2', 'SE3'], '/this/is/at_23': ['SE2', 'SE3'], '/this/is/at_4': ['SE4']} """ tasks = [] nTasks = 0 if not files: return S_OK(tasks) files = dict(files) # Parameters if not self.groupSize: self.groupSize = self.getPluginParam('GroupSize', 10) flush = (status == 'Flush') self.logVerbose( "groupByReplicas: %d files, groupSize %d, flush %s" % (len(files), self.groupSize, flush)) # Consider files by groups of SEs, a file is only in one group # Then consider files site by site, but a file can now be at more than one site for groupSE in (True, False): if not files: break seFiles = getFileGroups(files, groupSE=groupSE) self.logDebug("fileGroups set: ", seFiles) for replicaSE in sortSEs(seFiles): lfns = seFiles[replicaSE] if lfns: tasksLfns = breakListIntoChunks(lfns, self.groupSize) lfnsInTasks = [] for taskLfns in tasksLfns: if flush or (len(taskLfns) >= self.groupSize): tasks.append((replicaSE, taskLfns)) lfnsInTasks += taskLfns # In case the file was at more than one site, remove it from the other sites' list # Remove files from global list for lfn in lfnsInTasks: files.pop(lfn) if not groupSE: # Remove files from other SEs for se in [se for se in seFiles if se != replicaSE]: seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks] self.logVerbose( "groupByReplicas: %d tasks created (groupSE %s)" % (len(tasks) - nTasks, str(groupSE)), "%d files not included in tasks" % len(files)) nTasks = len(tasks) return S_OK(tasks) def createTasksBySize(self, lfns, replicaSE, fileSizes=None, flush=False): """ Split files in groups according to the size and create tasks for a given SE """ tasks = [] if fileSizes is None: fileSizes = self._getFileSize(lfns).get('Value') if fileSizes is None: self.logWarn('Error getting file sizes, no tasks created') return tasks taskLfns = [] taskSize = 0 if not self.groupSize: # input size in GB converted to bytes self.groupSize = float(self.getPluginParam('GroupSize', 1.)) * 1000 * 1000 * 1000 if not self.maxFiles: # FIXME: prepare for chaging the name of the ambiguoug CS option self.maxFiles = self.getPluginParam('MaxFilesPerTask', self.getPluginParam('MaxFiles', 100)) lfns = sorted(lfns, key=fileSizes.get) for lfn in lfns: size = fileSizes.get(lfn, 0) if size: if size > self.groupSize: tasks.append((replicaSE, [lfn])) else: taskSize += size taskLfns.append(lfn) if (taskSize > self.groupSize) or (len(taskLfns) >= self.maxFiles): tasks.append((replicaSE, taskLfns)) taskLfns = [] taskSize = 0 if flush and taskLfns: tasks.append((replicaSE, taskLfns)) if not tasks and not flush and taskLfns: self.logVerbose( 'Not enough data to create a task, and flush not set (%d bytes for groupSize %d)' % (taskSize, self.groupSize)) return tasks # @timeThis def groupBySize(self, files, status): """ Generate a task for a given amount of data """ tasks = [] nTasks = 0 if not len(files): return S_OK(tasks) files = dict(files) # Parameters if not self.groupSize: # input size in GB converted to bytes self.groupSize = float(self.getPluginParam('GroupSize', 1)) * 1000 * 1000 * 1000 flush = (status == 'Flush') self.logVerbose( "groupBySize: %d files, groupSize: %d, flush: %s" % (len(files), self.groupSize, flush)) # Get the file sizes res = self._getFileSize(files.keys()) if not res['OK']: return res fileSizes = res['Value'] for groupSE in (True, False): if not files: break seFiles = getFileGroups(files, groupSE=groupSE) for replicaSE in sorted(seFiles) if groupSE else sortSEs(seFiles): lfns = seFiles[replicaSE] newTasks = self.createTasksBySize(lfns, replicaSE, fileSizes=fileSizes, flush=flush) lfnsInTasks = [] for task in newTasks: lfnsInTasks += task[1] tasks += newTasks # Remove the selected files from the size cache self.clearCachedFileSize(lfnsInTasks) if not groupSE: # Remove files from other SEs for se in [se for se in seFiles if se != replicaSE]: seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks] # Remove files from global list for lfn in lfnsInTasks: files.pop(lfn) self.logVerbose( "groupBySize: %d tasks created with groupSE %s" % (len(tasks) - nTasks, str(groupSE))) self.logVerbose("groupBySize: %d files have not been included in tasks" % len(files)) nTasks = len(tasks) self.logVerbose("Grouped %d files by size" % len(files)) return S_OK(tasks) def getExistingCounters(self, normalise=False, requestedSites=[]): res = self.transClient.getCounters('TransformationFiles', ['UsedSE'], {'TransformationID': self.params['TransformationID']}) if not res['OK']: return res usageDict = {} for usedDict, count in res['Value']: usedSE = usedDict['UsedSE'] if usedSE != 'Unknown': usageDict[usedSE] = count if requestedSites: siteDict = {} for se, count in usageDict.items(): res = getSitesForSE(se) if not res['OK']: return res for site in res['Value']: if site in requestedSites: siteDict[site] = count usageDict = siteDict.copy() if normalise: usageDict = self._normaliseShares(usageDict) return S_OK(usageDict) # @timeThis def _getFileSize(self, lfns): """ Get file size from a cache, if not from the catalog #FIXME: have to fill the cachedLFNSize! """ lfns = list(lfns) cachedLFNSize = dict(self.cachedLFNSize) fileSizes = {} for lfn in [lfn for lfn in lfns if lfn in cachedLFNSize]: fileSizes[lfn] = cachedLFNSize[lfn] self.logDebug( "Found cache hit for File size for %d files out of %d" % (len(fileSizes), len(lfns))) lfns = [lfn for lfn in lfns if lfn not in cachedLFNSize] if lfns: fileSizes = self._getFileSizeFromCatalog(lfns, fileSizes) if not fileSizes['OK']: self.logError(fileSizes['Message']) return fileSizes fileSizes = fileSizes['Value'] return S_OK(fileSizes) # @timeThis def _getFileSizeFromCatalog(self, lfns, fileSizes): """ Get file size from the catalog """ lfns = list(lfns) fileSizes = dict(fileSizes) res = self.fc.getFileSize(lfns) if not res['OK']: return S_ERROR("Failed to get sizes for all files: %s" % res['Message']) if res['Value']['Failed']: errorReason = sorted(set(res['Value']['Failed'].values())) self.logWarn("Failed to get sizes for %d files:" % len(res['Value']['Failed']), errorReason) fileSizes.update(res['Value']['Successful']) self.cachedLFNSize.update((res['Value']['Successful'])) self.logVerbose("Got size of %d files from catalog" % len(lfns)) return S_OK(fileSizes) def clearCachedFileSize(self, lfns): """ Utility function """ for lfn in [lfn for lfn in lfns if lfn in self.cachedLFNSize]: self.cachedLFNSize.pop(lfn) def getPluginParam(self, name, default=None): """ Get plugin parameters using specific settings or settings defined in the CS Caution: the type returned is that of the default value """ # get the value of a parameter looking 1st in the CS if default is not None: valueType = type(default) else: valueType = None # First look at a generic value... optionPath = "TransformationPlugins/%s" % (name) value = Operations().getValue(optionPath, None) self.logVerbose("Default plugin param %s: '%s'" % (optionPath, value)) # Then look at a plugin-specific value optionPath = "TransformationPlugins/%s/%s" % (self.plugin, name) value = Operations().getValue(optionPath, value) self.logVerbose("Specific plugin param %s: '%s'" % (optionPath, value)) if value is not None: default = value # Finally look at a transformation-specific parameter value = self.params.get(name, default) self.logVerbose( "Transformation plugin param %s: '%s'. Convert to %s" % (name, value, str(valueType))) if valueType and not isinstance(value, valueType): if valueType is list: try: value = ast.literal_eval(value) if value and value != 'None' else [] # literal_eval('SE-DST') -> ValueError # literal_eval('SE_MC-DST') -> SyntaxError # Don't ask... except (ValueError, SyntaxError): value = [val for val in value.replace(' ', '').split(',') if val] elif valueType is int: value = int(value) elif valueType is float: value = float(value) elif valueType is bool: if value in ('False', 'No', 'None', None, 0): value = False else: value = bool(value) elif valueType is not str: self.logWarn( "Unknown parameter type (%s) for %s, passed as string" % (str(valueType), name)) self.logVerbose("Final plugin param %s: '%s'" % (name, value)) return value @staticmethod def _normaliseShares(originalShares): """ Normalize shares to 1 """ total = sum(float(share) for share in originalShares.values()) return dict([(site, 100. * float(share) / total if total else 0.) for site, share in originalShares.items()]) def uniqueSEs(self, ses): """ return a list of SEs that are not physically the same """ newSEs = [] for se in ses: if not self.isSameSEInList(se, newSEs): newSEs.append(se) return newSEs def isSameSE(self, se1, se2): """ Check if 2 SEs are indeed the same. :param se1: name of the first StorageElement :param se2: name of the second StorageElement :returns: True/False if they are considered the same. See :py:mod:`~DIRAC.Resources.Storage.StorageElement.StorageElementItem.isSameSE` """ if se1 == se2: return True return StorageElement(se1).isSameSE(StorageElement(se2)) def isSameSEInList(self, se1, seList): """ Check if an SE is the same as any in a list """ if se1 in seList: return True for se in seList: if self.isSameSE(se1, se): return True return False def closerSEs(self, existingSEs, targetSEs, local=False): """ Order the targetSEs such that the first ones are closer to existingSEs. Keep all elements in targetSEs """ setTarget = set(targetSEs) sameSEs = set([se1 for se1 in setTarget for se2 in existingSEs if self.isSameSE(se1, se2)]) targetSEs = setTarget - set(sameSEs) if targetSEs: # Some SEs are left, look for sites existingSites = [self.dmsHelper.getLocalSiteForSE(se).get('Value') for se in existingSEs] existingSites = set([site for site in existingSites if site]) closeSEs = set([se for se in targetSEs if self.dmsHelper.getLocalSiteForSE(se).get('Value') in existingSites]) # print existingSEs, existingSites, targetSEs, closeSEs otherSEs = targetSEs - closeSEs targetSEs = list(closeSEs) random.shuffle(targetSEs) if not local and otherSEs: otherSEs = list(otherSEs) random.shuffle(otherSEs) targetSEs += otherSEs else: targetSEs = [] return (targetSEs + list(sameSEs)) if not local else targetSEs
def getFilesToStage(lfnList, jobState=None, checkOnlyTapeSEs=None, jobLog=None): """ Utility that returns out of a list of LFNs those files that are offline, and those for which at least one copy is online """ if not lfnList: return S_OK({ 'onlineLFNs': [], 'offlineLFNs': {}, 'failedLFNs': [], 'absentLFNs': {} }) dm = DataManager() if isinstance(lfnList, six.string_types): lfnList = [lfnList] lfnListReplicas = dm.getReplicasForJobs(lfnList, getUrl=False) if not lfnListReplicas['OK']: return lfnListReplicas offlineLFNsDict = {} onlineLFNs = {} offlineLFNs = {} absentLFNs = {} failedLFNs = set() if lfnListReplicas['Value']['Failed']: # Check if files are not existing for lfn, reason in lfnListReplicas['Value']['Failed'].items(): # FIXME: awful check until FC returns a proper error if cmpError(reason, errno.ENOENT) or 'No such file' in reason: # The file doesn't exist, job must be Failed # FIXME: it is not possible to return here an S_ERROR(), return the message only absentLFNs[lfn] = S_ERROR(errno.ENOENT, 'File not in FC')['Message'] if absentLFNs: return S_OK({ 'onlineLFNs': list(onlineLFNs), 'offlineLFNs': offlineLFNsDict, 'failedLFNs': list(failedLFNs), 'absentLFNs': absentLFNs }) return S_ERROR("Failures in getting replicas") lfnListReplicas = lfnListReplicas['Value']['Successful'] # If a file is reported here at a tape SE, it is not at a disk SE as we use disk in priority # We shall check all file anyway in order to make sure they exist seToLFNs = dict() for lfn, ses in lfnListReplicas.items(): for se in ses: seToLFNs.setdefault(se, list()).append(lfn) if seToLFNs: if jobState: # Get user name and group from the job state userName = jobState.getAttribute('Owner') if not userName['OK']: return userName userName = userName['Value'] userGroup = jobState.getAttribute('OwnerGroup') if not userGroup['OK']: return userGroup userGroup = userGroup['Value'] else: userName = None userGroup = None # Check whether files are Online or Offline, or missing at SE result = _checkFilesToStage( seToLFNs, onlineLFNs, offlineLFNs, absentLFNs, # pylint: disable=unexpected-keyword-arg checkOnlyTapeSEs=checkOnlyTapeSEs, jobLog=jobLog, proxyUserName=userName, proxyUserGroup=userGroup, executionLock=True) if not result['OK']: return result failedLFNs = set(lfnList) - set(onlineLFNs) - set(offlineLFNs) - set( absentLFNs) # Get the online SEs dmsHelper = DMSHelpers() onlineSEs = set(se for ses in onlineLFNs.values() for se in ses) onlineSites = set( dmsHelper.getLocalSiteForSE(se).get('Value') for se in onlineSEs) - {None} for lfn in offlineLFNs: ses = offlineLFNs[lfn] if len(ses) == 1: # No choice, let's go offlineLFNsDict.setdefault(ses[0], list()).append(lfn) continue # Try and get an SE at a site already with online files found = False if onlineSites: # If there is at least one online site, select one for se in ses: site = dmsHelper.getLocalSiteForSE(se) if site['OK']: if site['Value'] in onlineSites: offlineLFNsDict.setdefault(se, list()).append(lfn) found = True break # No online site found in common, select randomly if not found: offlineLFNsDict.setdefault(random.choice(ses), list()).append(lfn) return S_OK({ 'onlineLFNs': list(onlineLFNs), 'offlineLFNs': offlineLFNsDict, 'failedLFNs': list(failedLFNs), 'absentLFNs': absentLFNs, 'onlineSites': onlineSites })
class PluginUtilities(object): """ Utility class used by plugins """ def __init__(self, plugin='Standard', transClient=None, dataManager=None, fc=None, debug=False, transInThread=None, transID=None): """ c'tor Setting defaults """ # clients if transClient is None: self.transClient = TransformationClient() else: self.transClient = transClient if dataManager is None: self.dm = DataManager() else: self.dm = dataManager if fc is None: self.fc = FileCatalog() else: self.fc = fc self.dmsHelper = DMSHelpers() self.plugin = plugin self.transID = transID self.params = {} self.groupSize = 0 self.maxFiles = 0 self.cachedLFNSize = {} self.transString = '' self.debug = debug if transInThread is None: self.transInThread = {} else: self.transInThread = transInThread self.log = gLogger.getSubLogger(self.plugin + self.transInThread.get(self.transID, ' [NoThread] [%s] ' % self.transID)) # FIXME: This doesn't work (yet) but should soon, will allow scripts to get the context self.log.showHeaders(True) def logVerbose(self, message, param=''): """ logger helper """ if self.debug: log = gLogger.getSubLogger(self.plugin + ' (V)' + self.transInThread.get(self.transID, ' [NoThread] [%d] ' % self.transID)) log.info(message, param) else: self.log.verbose(message, param) def logDebug(self, message, param=''): """ logger helper """ self.log.debug(message, param) def logInfo(self, message, param=''): """ logger helper """ self.log.info(message, param) def logWarn(self, message, param=''): """ logger helper """ self.log.warn(message, param) def logError(self, message, param=''): """ logger helper """ self.log.error(message, param) def logException(self, message, param='', lException=False): """ logger helper """ self.log.exception(message, param, lException) def setParameters(self, params): """ Set the transformation parameters and extract transID """ self.params = params self.transID = params['TransformationID'] self.log = gLogger.getSubLogger(self.plugin + self.transInThread.get(self.transID, ' [NoThread] [%d] ' % self.transID)) # @timeThis def groupByReplicas(self, files, status): """ Generates tasks based on the location of the input data :param dict fileReplicas: {'/this/is/at.1': ['SE1'], '/this/is/at.12': ['SE1', 'SE2'], '/this/is/at.2': ['SE2'], '/this/is/at_123': ['SE1', 'SE2', 'SE3'], '/this/is/at_23': ['SE2', 'SE3'], '/this/is/at_4': ['SE4']} """ tasks = [] nTasks = 0 if not files: return S_OK(tasks) files = dict(files) # Parameters if not self.groupSize: self.groupSize = self.getPluginParam('GroupSize', 10) flush = (status == 'Flush') self.logVerbose( "groupByReplicas: %d files, groupSize %d, flush %s" % (len(files), self.groupSize, flush)) # Consider files by groups of SEs, a file is only in one group # Then consider files site by site, but a file can now be at more than one site for groupSE in (True, False): if not files: break seFiles = getFileGroups(files, groupSE=groupSE) self.logDebug("fileGroups set: ", seFiles) for replicaSE in sortSEs(seFiles): lfns = seFiles[replicaSE] if lfns: tasksLfns = breakListIntoChunks(lfns, self.groupSize) lfnsInTasks = [] for taskLfns in tasksLfns: if flush or (len(taskLfns) >= self.groupSize): tasks.append((replicaSE, taskLfns)) lfnsInTasks += taskLfns # In case the file was at more than one site, remove it from the other sites' list # Remove files from global list for lfn in lfnsInTasks: files.pop(lfn) if not groupSE: # Remove files from other SEs for se in [se for se in seFiles if se != replicaSE]: seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks] self.logVerbose( "groupByReplicas: %d tasks created (groupSE %s)" % (len(tasks) - nTasks, str(groupSE)), "%d files not included in tasks" % len(files)) nTasks = len(tasks) return S_OK(tasks) def createTasksBySize(self, lfns, replicaSE, fileSizes=None, flush=False): """ Split files in groups according to the size and create tasks for a given SE """ tasks = [] if fileSizes is None: fileSizes = self._getFileSize(lfns).get('Value') if fileSizes is None: self.logWarn('Error getting file sizes, no tasks created') return tasks taskLfns = [] taskSize = 0 if not self.groupSize: # input size in GB converted to bytes self.groupSize = float(self.getPluginParam('GroupSize', 1.)) * 1000 * 1000 * 1000 if not self.maxFiles: # FIXME: prepare for chaging the name of the ambiguoug CS option self.maxFiles = self.getPluginParam('MaxFilesPerTask', self.getPluginParam('MaxFiles', 100)) lfns = sorted(lfns, key=fileSizes.get) for lfn in lfns: size = fileSizes.get(lfn, 0) if size: if size > self.groupSize: tasks.append((replicaSE, [lfn])) else: taskSize += size taskLfns.append(lfn) if (taskSize > self.groupSize) or (len(taskLfns) >= self.maxFiles): tasks.append((replicaSE, taskLfns)) taskLfns = [] taskSize = 0 if flush and taskLfns: tasks.append((replicaSE, taskLfns)) if not tasks and not flush and taskLfns: self.logVerbose( 'Not enough data to create a task, and flush not set (%d bytes for groupSize %d)' % (taskSize, self.groupSize)) return tasks # @timeThis def groupBySize(self, files, status): """ Generate a task for a given amount of data """ tasks = [] nTasks = 0 if not len(files): return S_OK(tasks) files = dict(files) # Parameters if not self.groupSize: # input size in GB converted to bytes self.groupSize = float(self.getPluginParam('GroupSize', 1)) * 1000 * 1000 * 1000 flush = (status == 'Flush') self.logVerbose( "groupBySize: %d files, groupSize: %d, flush: %s" % (len(files), self.groupSize, flush)) # Get the file sizes res = self._getFileSize(list(files)) if not res['OK']: return res fileSizes = res['Value'] for groupSE in (True, False): if not files: break seFiles = getFileGroups(files, groupSE=groupSE) for replicaSE in sorted(seFiles) if groupSE else sortSEs(seFiles): lfns = seFiles[replicaSE] newTasks = self.createTasksBySize(lfns, replicaSE, fileSizes=fileSizes, flush=flush) lfnsInTasks = [] for task in newTasks: lfnsInTasks += task[1] tasks += newTasks # Remove the selected files from the size cache self.clearCachedFileSize(lfnsInTasks) if not groupSE: # Remove files from other SEs for se in [se for se in seFiles if se != replicaSE]: seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks] # Remove files from global list for lfn in lfnsInTasks: files.pop(lfn) self.logVerbose( "groupBySize: %d tasks created with groupSE %s" % (len(tasks) - nTasks, str(groupSE))) self.logVerbose("groupBySize: %d files have not been included in tasks" % len(files)) nTasks = len(tasks) self.logVerbose("Grouped %d files by size" % len(files)) return S_OK(tasks) def getExistingCounters(self, normalise=False, requestedSites=[]): res = self.transClient.getCounters('TransformationFiles', ['UsedSE'], {'TransformationID': self.params['TransformationID']}) if not res['OK']: return res usageDict = {} for usedDict, count in res['Value']: usedSE = usedDict['UsedSE'] if usedSE != 'Unknown': usageDict[usedSE] = count if requestedSites: siteDict = {} for se, count in usageDict.items(): res = getSitesForSE(se) if not res['OK']: return res for site in res['Value']: if site in requestedSites: siteDict[site] = count usageDict = siteDict.copy() if normalise: usageDict = self._normaliseShares(usageDict) return S_OK(usageDict) # @timeThis def _getFileSize(self, lfns): """ Get file size from a cache, if not from the catalog #FIXME: have to fill the cachedLFNSize! """ lfns = list(lfns) cachedLFNSize = dict(self.cachedLFNSize) fileSizes = {} for lfn in [lfn for lfn in lfns if lfn in cachedLFNSize]: fileSizes[lfn] = cachedLFNSize[lfn] self.logDebug( "Found cache hit for File size for %d files out of %d" % (len(fileSizes), len(lfns))) lfns = [lfn for lfn in lfns if lfn not in cachedLFNSize] if lfns: fileSizes = self._getFileSizeFromCatalog(lfns, fileSizes) if not fileSizes['OK']: self.logError(fileSizes['Message']) return fileSizes fileSizes = fileSizes['Value'] return S_OK(fileSizes) # @timeThis def _getFileSizeFromCatalog(self, lfns, fileSizes): """ Get file size from the catalog """ lfns = list(lfns) fileSizes = dict(fileSizes) res = self.fc.getFileSize(lfns) if not res['OK']: return S_ERROR("Failed to get sizes for all files: %s" % res['Message']) if res['Value']['Failed']: errorReason = sorted(set(res['Value']['Failed'].values())) self.logWarn("Failed to get sizes for %d files:" % len(res['Value']['Failed']), errorReason) fileSizes.update(res['Value']['Successful']) self.cachedLFNSize.update((res['Value']['Successful'])) self.logVerbose("Got size of %d files from catalog" % len(lfns)) return S_OK(fileSizes) def clearCachedFileSize(self, lfns): """ Utility function """ for lfn in [lfn for lfn in lfns if lfn in self.cachedLFNSize]: self.cachedLFNSize.pop(lfn) def getPluginParam(self, name, default=None): """ Get plugin parameters using specific settings or settings defined in the CS Caution: the type returned is that of the default value """ # get the value of a parameter looking 1st in the CS if default is not None: valueType = type(default) else: valueType = None # First look at a generic value... optionPath = "TransformationPlugins/%s" % (name) value = Operations().getValue(optionPath, None) self.logVerbose("Default plugin param %s: '%s'" % (optionPath, value)) # Then look at a plugin-specific value optionPath = "TransformationPlugins/%s/%s" % (self.plugin, name) value = Operations().getValue(optionPath, value) self.logVerbose("Specific plugin param %s: '%s'" % (optionPath, value)) if value is not None: default = value # Finally look at a transformation-specific parameter value = self.params.get(name, default) self.logVerbose( "Transformation plugin param %s: '%s'. Convert to %s" % (name, value, str(valueType))) if valueType and not isinstance(value, valueType): if valueType is list: try: value = ast.literal_eval(value) if value and value != 'None' else [] # literal_eval('SE-DST') -> ValueError # literal_eval('SE_MC-DST') -> SyntaxError # Don't ask... except (ValueError, SyntaxError): value = [val for val in value.replace(' ', '').split(',') if val] elif valueType is int: value = int(value) elif valueType is float: value = float(value) elif valueType is bool: if value in ('False', 'No', 'None', None, 0): value = False else: value = bool(value) elif valueType is not str: self.logWarn( "Unknown parameter type (%s) for %s, passed as string" % (str(valueType), name)) self.logVerbose("Final plugin param %s: '%s'" % (name, value)) return value @staticmethod def _normaliseShares(originalShares): """ Normalize shares to 1 """ total = sum(float(share) for share in originalShares.values()) return dict([(site, 100. * float(share) / total if total else 0.) for site, share in originalShares.items()]) def uniqueSEs(self, ses): """ return a list of SEs that are not physically the same """ newSEs = [] for se in ses: if not self.isSameSEInList(se, newSEs): newSEs.append(se) return newSEs def isSameSE(self, se1, se2): """ Check if 2 SEs are indeed the same. :param se1: name of the first StorageElement :param se2: name of the second StorageElement :returns: True/False if they are considered the same. See :py:mod:`~DIRAC.Resources.Storage.StorageElement.StorageElementItem.isSameSE` """ if se1 == se2: return True return StorageElement(se1).isSameSE(StorageElement(se2)) def isSameSEInList(self, se1, seList): """ Check if an SE is the same as any in a list """ if se1 in seList: return True for se in seList: if self.isSameSE(se1, se): return True return False def closerSEs(self, existingSEs, targetSEs, local=False): """ Order the targetSEs such that the first ones are closer to existingSEs. Keep all elements in targetSEs """ setTarget = set(targetSEs) sameSEs = set([se1 for se1 in setTarget for se2 in existingSEs if self.isSameSE(se1, se2)]) targetSEs = setTarget - set(sameSEs) if targetSEs: # Some SEs are left, look for sites existingSites = [self.dmsHelper.getLocalSiteForSE(se).get('Value') for se in existingSEs] existingSites = set([site for site in existingSites if site]) closeSEs = set([se for se in targetSEs if self.dmsHelper.getLocalSiteForSE(se).get('Value') in existingSites]) # print existingSEs, existingSites, targetSEs, closeSEs otherSEs = targetSEs - closeSEs targetSEs = list(closeSEs) random.shuffle(targetSEs) if not local and otherSEs: otherSEs = list(otherSEs) random.shuffle(otherSEs) targetSEs += otherSEs else: targetSEs = [] return (targetSEs + list(sameSEs)) if not local else targetSEs @staticmethod def seParamtoList(inputParam): """Transform ``inputParam`` to list. :param inputParam: can be string, list, or string representation of list :returns: list """ if not inputParam: return [] if inputParam.count('['): return eval(inputParam) # pylint: disable=eval-used elif isinstance(inputParam, list): return inputParam return [inputParam]
class PluginUtilities(object): """ Utility class used by plugins """ def __init__(self, plugin='Standard', transClient=None, dataManager=None, fc=None, debug=False, transInThread=None, transID=None): """ c'tor Setting defaults """ # clients if transClient is None: self.transClient = TransformationClient() else: self.transClient = transClient if dataManager is None: self.dm = DataManager() else: self.dm = dataManager if fc is None: self.fc = FileCatalog() else: self.fc = fc self.dmsHelper = DMSHelpers() self.plugin = plugin self.transID = transID self.params = {} self.groupSize = 0 self.maxFiles = 0 self.cachedLFNSize = {} self.transString = '' self.debug = debug self.seConfig = {} if transInThread is None: self.transInThread = {} else: self.transInThread = transInThread self.log = gLogger.getSubLogger("%s/PluginUtilities" % plugin) def logVerbose(self, message, param=''): if self.debug: self.log.info('(V)' + self.transString + message, param) else: self.log.verbose(self.transString + message, param) def logDebug(self, message, param=''): self.log.debug(self.transString + message, param) def logInfo(self, message, param=''): self.log.info(self.transString + message, param) def logWarn(self, message, param=''): self.log.warn(self.transString + message, param) def logError(self, message, param=''): self.log.error(self.transString + message, param) def logException(self, message, param='', lException=False): self.log.exception(self.transString + message, param, lException) def setParameters(self, params): self.params = params self.transID = params['TransformationID'] self.transString = self.transInThread.get( self.transID, ' [NoThread] [%d] ' % self.transID) + '%s: ' % self.plugin @timeThis def groupByReplicas(self, files, status): """ Generates tasks based on the location of the input data :param dict fileReplicas: {'/this/is/at.1': ['SE1'], '/this/is/at.12': ['SE1', 'SE2'], '/this/is/at.2': ['SE2'], '/this/is/at_123': ['SE1', 'SE2', 'SE3'], '/this/is/at_23': ['SE2', 'SE3'], '/this/is/at_4': ['SE4']} """ tasks = [] nTasks = 0 if not len(files): return S_OK(tasks) files = dict(files) # Parameters if not self.groupSize: self.groupSize = self.getPluginParam('GroupSize', 10) flush = (status == 'Flush') self.logVerbose("groupByReplicas: %d files, groupSize %d, flush %s" % (len(files), self.groupSize, flush)) # Consider files by groups of SEs, a file is only in one group # Then consider files site by site, but a file can now be at more than one site for groupSE in (True, False): if not files: break seFiles = getFileGroups(files, groupSE=groupSE) self.logDebug("fileGroups set: ", seFiles) for replicaSE in sortSEs(seFiles): lfns = seFiles[replicaSE] if lfns: tasksLfns = breakListIntoChunks(lfns, self.groupSize) lfnsInTasks = [] for taskLfns in tasksLfns: if (flush and not groupSE) or (len(taskLfns) >= self.groupSize): tasks.append((replicaSE, taskLfns)) lfnsInTasks += taskLfns # In case the file was at more than one site, remove it from the other sites' list # Remove files from global list for lfn in lfnsInTasks: files.pop(lfn) if not groupSE: # Remove files from other SEs for se in [se for se in seFiles if se != replicaSE]: seFiles[se] = [ lfn for lfn in seFiles[se] if lfn not in lfnsInTasks ] self.logVerbose( "groupByReplicas: %d tasks created (groupSE %s), %d files not included in tasks" % (len(tasks) - nTasks, str(groupSE), len(files))) nTasks = len(tasks) return S_OK(tasks) def createTasksBySize(self, lfns, replicaSE, fileSizes=None, flush=False): """ Split files in groups according to the size and create tasks for a given SE """ tasks = [] if fileSizes is None: fileSizes = self._getFileSize(lfns).get('Value') if fileSizes is None: self.logWarn('Error getting file sizes, no tasks created') return tasks taskLfns = [] taskSize = 0 if not self.groupSize: self.groupSize = float( self.getPluginParam('GroupSize', 1.) ) * 1000 * 1000 * 1000 # input size in GB converted to bytes if not self.maxFiles: self.maxFiles = self.getPluginParam('MaxFiles', 100) lfns = sorted(lfns, key=fileSizes.get) for lfn in lfns: size = fileSizes.get(lfn, 0) if size: if size > self.groupSize: tasks.append((replicaSE, [lfn])) else: taskSize += size taskLfns.append(lfn) if (taskSize > self.groupSize) or (len(taskLfns) >= self.maxFiles): tasks.append((replicaSE, taskLfns)) taskLfns = [] taskSize = 0 if flush and taskLfns: tasks.append((replicaSE, taskLfns)) return tasks @timeThis def groupBySize(self, files, status): """ Generate a task for a given amount of data """ tasks = [] nTasks = 0 if not len(files): return S_OK(tasks) files = dict(files) # Parameters if not self.groupSize: self.groupSize = float(self.getPluginParam( 'GroupSize', 1)) * 1000 * 1000 * 1000 # input size in GB converted to bytes flush = (status == 'Flush') self.logVerbose("groupBySize: %d files, groupSize: %d, flush: %s" % (len(files), self.groupSize, flush)) # Get the file sizes res = self._getFileSize(files.keys()) if not res['OK']: return res fileSizes = res['Value'] for groupSE in (True, False): if not files: break seFiles = getFileGroups(files, groupSE=groupSE) for replicaSE in sorted(seFiles) if groupSE else sortSEs(seFiles): lfns = seFiles[replicaSE] newTasks = self.createTasksBySize(lfns, replicaSE, fileSizes=fileSizes, flush=flush) lfnsInTasks = [] for task in newTasks: lfnsInTasks += task[1] tasks += newTasks # Remove the selected files from the size cache self.clearCachedFileSize(lfnsInTasks) if not groupSE: # Remove files from other SEs for se in [se for se in seFiles if se != replicaSE]: seFiles[se] = [ lfn for lfn in seFiles[se] if lfn not in lfnsInTasks ] # Remove files from global list for lfn in lfnsInTasks: files.pop(lfn) self.logVerbose("groupBySize: %d tasks created with groupSE %s" % (len(tasks) - nTasks, str(groupSE))) self.logVerbose( "groupBySize: %d files have not been included in tasks" % len(files)) nTasks = len(tasks) self.logVerbose("Grouped %d files by size" % len(files)) return S_OK(tasks) def getExistingCounters(self, normalise=False, requestedSites=[]): res = self.transClient.getCounters( 'TransformationFiles', ['UsedSE'], {'TransformationID': self.params['TransformationID']}) if not res['OK']: return res usageDict = {} for usedDict, count in res['Value']: usedSE = usedDict['UsedSE'] if usedSE != 'Unknown': usageDict[usedSE] = count if requestedSites: siteDict = {} for se, count in usageDict.items(): res = getSitesForSE(se) if not res['OK']: return res for site in res['Value']: if site in requestedSites: siteDict[site] = count usageDict = siteDict.copy() if normalise: usageDict = self._normaliseShares(usageDict) return S_OK(usageDict) @timeThis def _getFileSize(self, lfns): """ Get file size from a cache, if not from the catalog #FIXME: have to fill the cachedLFNSize! """ lfns = list(lfns) cachedLFNSize = dict(self.cachedLFNSize) fileSizes = {} for lfn in [lfn for lfn in lfns if lfn in cachedLFNSize]: fileSizes[lfn] = cachedLFNSize[lfn] self.logDebug("Found cache hit for File size for %d files out of %d" % (len(fileSizes), len(lfns))) lfns = [lfn for lfn in lfns if lfn not in cachedLFNSize] if lfns: fileSizes = self._getFileSizeFromCatalog(lfns, fileSizes) if not fileSizes['OK']: self.logError(fileSizes['Message']) return fileSizes fileSizes = fileSizes['Value'] return S_OK(fileSizes) @timeThis def _getFileSizeFromCatalog(self, lfns, fileSizes): """ Get file size from the catalog """ lfns = list(lfns) fileSizes = dict(fileSizes) res = self.fc.getFileSize(lfns) if not res['OK']: return S_ERROR("Failed to get sizes for all files: %s" % res['Message']) if res['Value']['Failed']: errorReason = sorted(set(res['Value']['Failed'].values())) self.logWarn( "Failed to get sizes for %d files:" % len(res['Value']['Failed']), errorReason) fileSizes.update(res['Value']['Successful']) self.cachedLFNSize.update((res['Value']['Successful'])) self.logVerbose("Got size of %d files from catalog" % len(lfns)) return S_OK(fileSizes) def clearCachedFileSize(self, lfns): """ Utility function """ for lfn in [lfn for lfn in lfns if lfn in self.cachedLFNSize]: self.cachedLFNSize.pop(lfn) def getPluginParam(self, name, default=None): """ Get plugin parameters using specific settings or settings defined in the CS Caution: the type returned is that of the default value """ # get the value of a parameter looking 1st in the CS if default != None: valueType = type(default) else: valueType = None # First look at a generic value... optionPath = "TransformationPlugins/%s" % (name) value = Operations().getValue(optionPath, None) self.logVerbose("Default plugin param %s: '%s'" % (optionPath, value)) # Then look at a plugin-specific value optionPath = "TransformationPlugins/%s/%s" % (self.plugin, name) value = Operations().getValue(optionPath, value) self.logVerbose("Specific plugin param %s: '%s'" % (optionPath, value)) if value != None: default = value # Finally look at a transformation-specific parameter value = self.params.get(name, default) self.logVerbose("Transformation plugin param %s: '%s'. Convert to %s" % (name, value, str(valueType))) if valueType and type(value) is not valueType: if valueType is list: try: value = ast.literal_eval( value) if value and value != 'None' else [] except Exception: value = [ val for val in value.replace(' ', '').split(',') if val ] elif valueType is int: value = int(value) elif valueType is float: value = float(value) elif valueType is bool: if value in ('False', 'No', 'None', None, 0): value = False else: value = bool(value) elif valueType is not str: self.logWarn( "Unknown parameter type (%s) for %s, passed as string" % (str(valueType), name)) self.logVerbose("Final plugin param %s: '%s'" % (name, value)) return value @staticmethod def _normaliseShares(originalShares): shares = originalShares.copy() total = 0.0 for site in shares.keys(): share = float(shares[site]) shares[site] = share total += share for site in shares.keys(): share = 100.0 * (shares[site] / total) shares[site] = share return shares def uniqueSEs(self, ses): newSEs = [] for se in ses: if not self.isSameSEInList(se, newSEs): newSEs.append(se) return newSEs def isSameSE(self, se1, se2): if se1 == se2: return True for se in (se1, se2): if se not in self.seConfig: self.seConfig[se] = {} res = StorageElement(se).getStorageParameters('SRM2') if res['OK']: params = res['Value'] for item in ('Host', 'Path'): self.seConfig[se][item] = params[item].replace( 't1d1', 't0d1') else: self.logError( "Error getting StorageElement parameters for %s" % se, res['Message']) return self.seConfig[se1] == self.seConfig[se2] def isSameSEInList(self, se1, seList): if se1 in seList: return True for se in seList: if self.isSameSE(se1, se): return True return False def closerSEs(self, existingSEs, targetSEs, local=False): """ Order the targetSEs such that the first ones are closer to existingSEs. Keep all elements in targetSEs """ setTarget = set(targetSEs) sameSEs = set([ se1 for se1 in setTarget for se2 in existingSEs if self.isSameSE(se1, se2) ]) targetSEs = setTarget - set(sameSEs) if targetSEs: # Some SEs are left, look for sites existingSites = [ self.dmsHelper.getLocalSiteForSE(se).get('Value') for se in existingSEs if not self.dmsHelper.isSEArchive(se) ] existingSites = set([site for site in existingSites if site]) closeSEs = set([ se for se in targetSEs if self.dmsHelper.getLocalSiteForSE( se).get('Value') in existingSites ]) # print existingSEs, existingSites, targetSEs, closeSEs otherSEs = targetSEs - closeSEs targetSEs = list(closeSEs) random.shuffle(targetSEs) if not local and otherSEs: otherSEs = list(otherSEs) random.shuffle(otherSEs) targetSEs += otherSEs else: targetSEs = [] return (targetSEs + list(sameSEs)) if not local else targetSEs
def execute(): """ Parse the options and execute the script """ bkQuery = dmScript.getBKQuery() fileType = bkQuery.getFileTypeList() if not set(fileType) & {'FULL.DST', 'RDST', 'SDST'}: gLogger.error("Please provide a reconstruction BK path") DIRAC.exit(1) from LHCbDIRAC.TransformationSystem.Client.TransformationClient import TransformationClient from DIRAC.DataManagementSystem.Client.DataManager import DataManager from DIRAC.Core.Utilities.List import breakListIntoChunks from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient from DIRAC.DataManagementSystem.Utilities.DMSHelpers import DMSHelpers, resolveSEGroup bk = BookkeepingClient() tr = TransformationClient() dm = DataManager() dmsHelper = DMSHelpers() bkQueryDict = bkQuery.getQueryDict() gLogger.notice("For BK Query:", str(bkQueryDict)) progressBar = ProgressBar(1, title="Running BK query...", step=1) res = bk.getFilesWithMetadata(bkQueryDict) if not res['OK']: gLogger.error("Error getting files from BK", res['Message']) DIRAC.exit(2) if 'ParameterNames' in res.get('Value', {}): parameterNames = res['Value']['ParameterNames'] info = res['Value']['Records'] progressBar.endLoop("Obtained %d files" % len(info)) else: gLogger.error('\nNo metadata found') DIRAC.exit(3) lfns = [] runLFNs = {} for item in info: metadata = dict(zip(parameterNames, item)) lfn = metadata['FileName'] lfns.append(lfn) runLFNs.setdefault(metadata['RunNumber'], []).append(lfn) chunkSize = 1000 progressBar = ProgressBar(len(lfns), title='Getting replicas of %d files' % len(lfns), chunk=chunkSize) replicas = {} errors = {} for lfnChunk in breakListIntoChunks(lfns, chunkSize): progressBar.loop() res = dm.getReplicas(lfnChunk, getUrl=False) if not res['OK']: errors.setdefault(res['Message'], []).extend(lfnChunk) else: replicas.update(res['Value']['Successful']) for lfn, error in res['Value']['Failed'].iteritems(): errors.setdefault(error, []).append(lfn) progressBar.endLoop() for error, lfns in errors.iteritems(): gLogger.error(error, 'for %d files' % len(lfns)) tier1RDST = set(resolveSEGroup('Tier1-RDST')) setOK = 0 errors = {} progressBar = ProgressBar(len(runLFNs), title='Defining destination for %d runs' % len(runLFNs), step=10) for run, lfns in runLFNs.iteritems(): progressBar.loop() res = tr.getDestinationForRun(run) if res.get('Value'): errors.setdefault('Destination already set', []).append(str(run)) continue # print 'Run', run, len( lfns ), 'Files', lfns[:3] seCounts = {} for lfn in lfns: for se in tier1RDST.intersection(replicas.get(lfn, [])): seCounts[se] = seCounts.setdefault(se, 0) + 1 # print seCounts maxi = 0 seMax = None for se, count in seCounts.iteritems(): if count > maxi: seMax = se maxi = count if not seMax: errors.setdefault('No SE found, use CERN-RDST', []).append(str(run)) seMax = 'CERN-RDST' # SE found, get its site res = dmsHelper.getLocalSiteForSE(seMax) if res['OK']: site = res['Value'] res = tr.setDestinationForRun(run, site) if not res['OK']: errors.setdefault(res['Message'], []).append(str(run)) else: setOK += 1 progressBar.endLoop('Successfully set destination for %d runs' % setOK) for error, runs in errors.iteritems(): gLogger.error(error, 'for runs %s' % ','.join(runs))
def getFilesToStage( lfnList, jobState = None, checkOnlyTapeSEs = None, jobLog = None ): """ Utility that returns out of a list of LFNs those files that are offline, and those for which at least one copy is online """ if not lfnList: return S_OK( {'onlineLFNs':[], 'offlineLFNs': {}, 'failedLFNs':[], 'absentLFNs':{}} ) dm = DataManager() if isinstance( lfnList, basestring ): lfnList = [lfnList] lfnListReplicas = dm.getReplicasForJobs( lfnList, getUrl = False ) if not lfnListReplicas['OK']: return lfnListReplicas offlineLFNsDict = {} onlineLFNs = {} offlineLFNs = {} absentLFNs = {} failedLFNs = set() if lfnListReplicas['Value']['Failed']: # Check if files are not existing for lfn, reason in lfnListReplicas['Value']['Failed'].iteritems(): # FIXME: awful check until FC returns a proper error if cmpError( reason, errno.ENOENT ) or 'No such file' in reason: # The file doesn't exist, job must be Failed # FIXME: it is not possible to return here an S_ERROR(), return the message only absentLFNs[lfn] = S_ERROR( errno.ENOENT, 'File not in FC' )['Message'] if absentLFNs: return S_OK({'onlineLFNs': list(onlineLFNs), 'offlineLFNs': offlineLFNsDict, 'failedLFNs': list(failedLFNs), 'absentLFNs': absentLFNs}) return S_ERROR( "Failures in getting replicas" ) lfnListReplicas = lfnListReplicas['Value']['Successful'] # If a file is reported here at a tape SE, it is not at a disk SE as we use disk in priority # We shall check all file anyway in order to make sure they exist seToLFNs = dict() for lfn, ses in lfnListReplicas.iteritems(): for se in ses: seToLFNs.setdefault( se, list() ).append( lfn ) if seToLFNs: if jobState: # Get user name and group from the job state userName = jobState.getAttribute( 'Owner' ) if not userName[ 'OK' ]: return userName userName = userName['Value'] userGroup = jobState.getAttribute( 'OwnerGroup' ) if not userGroup[ 'OK' ]: return userGroup userGroup = userGroup['Value'] else: userName = None userGroup = None # Check whether files are Online or Offline, or missing at SE result = _checkFilesToStage( seToLFNs, onlineLFNs, offlineLFNs, absentLFNs, # pylint: disable=unexpected-keyword-arg checkOnlyTapeSEs = checkOnlyTapeSEs, jobLog = jobLog, proxyUserName = userName, proxyUserGroup = userGroup, executionLock = True ) if not result['OK']: return result failedLFNs = set( lfnList ) - set( onlineLFNs ) - set( offlineLFNs ) - set( absentLFNs ) # Get the online SEs dmsHelper = DMSHelpers() onlineSEs = set( se for ses in onlineLFNs.values() for se in ses ) onlineSites = set( dmsHelper.getLocalSiteForSE( se ).get( 'Value' ) for se in onlineSEs ) - {None} for lfn in offlineLFNs: ses = offlineLFNs[lfn] if len( ses ) == 1: # No choice, let's go offlineLFNsDict.setdefault( ses[0], list() ).append( lfn ) continue # Try and get an SE at a site already with online files found = False if onlineSites: # If there is at least one online site, select one for se in ses: site = dmsHelper.getLocalSiteForSE( se ) if site['OK']: if site['Value'] in onlineSites: offlineLFNsDict.setdefault( se, list() ).append( lfn ) found = True break # No online site found in common, select randomly if not found: offlineLFNsDict.setdefault( random.choice( ses ), list() ).append( lfn ) return S_OK({'onlineLFNs': list(onlineLFNs), 'offlineLFNs': offlineLFNsDict, 'failedLFNs': list(failedLFNs), 'absentLFNs': absentLFNs, 'onlineSites': onlineSites})