class PluginUtilities(object): """ Utility class used by plugins """ def __init__(self, plugin='Standard', transClient=None, dataManager=None, fc=None, debug=False, transInThread=None, transID=None): """ c'tor Setting defaults """ # clients if transClient is None: self.transClient = TransformationClient() else: self.transClient = transClient if dataManager is None: self.dm = DataManager() else: self.dm = dataManager if fc is None: self.fc = FileCatalog() else: self.fc = fc self.dmsHelper = DMSHelpers() self.plugin = plugin self.transID = transID self.params = {} self.groupSize = 0 self.maxFiles = 0 self.cachedLFNSize = {} self.transString = '' self.debug = debug if transInThread is None: self.transInThread = {} else: self.transInThread = transInThread self.log = gLogger.getSubLogger(plugin) def logVerbose(self, message, param=''): """ logger helper """ if self.debug: self.log.info('(V)' + self.transString + message, param) else: self.log.verbose(self.transString + message, param) def logDebug(self, message, param=''): """ logger helper """ self.log.debug(self.transString + message, param) def logInfo(self, message, param=''): """ logger helper """ self.log.info(self.transString + message, param) def logWarn(self, message, param=''): """ logger helper """ self.log.warn(self.transString + message, param) def logError(self, message, param=''): """ logger helper """ self.log.error(self.transString + message, param) def logException(self, message, param='', lException=False): """ logger helper """ self.log.exception(self.transString + message, param, lException) def setParameters(self, params): """ Set the transformation parameters and extract transID """ self.params = params self.transID = params['TransformationID'] self.transString = self.transInThread.get(self.transID, ' [NoThread] [%d] ' % self.transID) # @timeThis def groupByReplicas(self, files, status): """ Generates tasks based on the location of the input data :param dict fileReplicas: {'/this/is/at.1': ['SE1'], '/this/is/at.12': ['SE1', 'SE2'], '/this/is/at.2': ['SE2'], '/this/is/at_123': ['SE1', 'SE2', 'SE3'], '/this/is/at_23': ['SE2', 'SE3'], '/this/is/at_4': ['SE4']} """ tasks = [] nTasks = 0 if not files: return S_OK(tasks) files = dict(files) # Parameters if not self.groupSize: self.groupSize = self.getPluginParam('GroupSize', 10) flush = (status == 'Flush') self.logVerbose( "groupByReplicas: %d files, groupSize %d, flush %s" % (len(files), self.groupSize, flush)) # Consider files by groups of SEs, a file is only in one group # Then consider files site by site, but a file can now be at more than one site for groupSE in (True, False): if not files: break seFiles = getFileGroups(files, groupSE=groupSE) self.logDebug("fileGroups set: ", seFiles) for replicaSE in sortSEs(seFiles): lfns = seFiles[replicaSE] if lfns: tasksLfns = breakListIntoChunks(lfns, self.groupSize) lfnsInTasks = [] for taskLfns in tasksLfns: if flush or (len(taskLfns) >= self.groupSize): tasks.append((replicaSE, taskLfns)) lfnsInTasks += taskLfns # In case the file was at more than one site, remove it from the other sites' list # Remove files from global list for lfn in lfnsInTasks: files.pop(lfn) if not groupSE: # Remove files from other SEs for se in [se for se in seFiles if se != replicaSE]: seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks] self.logVerbose( "groupByReplicas: %d tasks created (groupSE %s)" % (len(tasks) - nTasks, str(groupSE)), "%d files not included in tasks" % len(files)) nTasks = len(tasks) return S_OK(tasks) def createTasksBySize(self, lfns, replicaSE, fileSizes=None, flush=False): """ Split files in groups according to the size and create tasks for a given SE """ tasks = [] if fileSizes is None: fileSizes = self._getFileSize(lfns).get('Value') if fileSizes is None: self.logWarn('Error getting file sizes, no tasks created') return tasks taskLfns = [] taskSize = 0 if not self.groupSize: # input size in GB converted to bytes self.groupSize = float(self.getPluginParam('GroupSize', 1.)) * 1000 * 1000 * 1000 if not self.maxFiles: # FIXME: prepare for chaging the name of the ambiguoug CS option self.maxFiles = self.getPluginParam('MaxFilesPerTask', self.getPluginParam('MaxFiles', 100)) lfns = sorted(lfns, key=fileSizes.get) for lfn in lfns: size = fileSizes.get(lfn, 0) if size: if size > self.groupSize: tasks.append((replicaSE, [lfn])) else: taskSize += size taskLfns.append(lfn) if (taskSize > self.groupSize) or (len(taskLfns) >= self.maxFiles): tasks.append((replicaSE, taskLfns)) taskLfns = [] taskSize = 0 if flush and taskLfns: tasks.append((replicaSE, taskLfns)) if not tasks and not flush and taskLfns: self.logVerbose( 'Not enough data to create a task, and flush not set (%d bytes for groupSize %d)' % (taskSize, self.groupSize)) return tasks # @timeThis def groupBySize(self, files, status): """ Generate a task for a given amount of data """ tasks = [] nTasks = 0 if not len(files): return S_OK(tasks) files = dict(files) # Parameters if not self.groupSize: # input size in GB converted to bytes self.groupSize = float(self.getPluginParam('GroupSize', 1)) * 1000 * 1000 * 1000 flush = (status == 'Flush') self.logVerbose( "groupBySize: %d files, groupSize: %d, flush: %s" % (len(files), self.groupSize, flush)) # Get the file sizes res = self._getFileSize(files.keys()) if not res['OK']: return res fileSizes = res['Value'] for groupSE in (True, False): if not files: break seFiles = getFileGroups(files, groupSE=groupSE) for replicaSE in sorted(seFiles) if groupSE else sortSEs(seFiles): lfns = seFiles[replicaSE] newTasks = self.createTasksBySize(lfns, replicaSE, fileSizes=fileSizes, flush=flush) lfnsInTasks = [] for task in newTasks: lfnsInTasks += task[1] tasks += newTasks # Remove the selected files from the size cache self.clearCachedFileSize(lfnsInTasks) if not groupSE: # Remove files from other SEs for se in [se for se in seFiles if se != replicaSE]: seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks] # Remove files from global list for lfn in lfnsInTasks: files.pop(lfn) self.logVerbose( "groupBySize: %d tasks created with groupSE %s" % (len(tasks) - nTasks, str(groupSE))) self.logVerbose("groupBySize: %d files have not been included in tasks" % len(files)) nTasks = len(tasks) self.logVerbose("Grouped %d files by size" % len(files)) return S_OK(tasks) def getExistingCounters(self, normalise=False, requestedSites=[]): res = self.transClient.getCounters('TransformationFiles', ['UsedSE'], {'TransformationID': self.params['TransformationID']}) if not res['OK']: return res usageDict = {} for usedDict, count in res['Value']: usedSE = usedDict['UsedSE'] if usedSE != 'Unknown': usageDict[usedSE] = count if requestedSites: siteDict = {} for se, count in usageDict.items(): res = getSitesForSE(se) if not res['OK']: return res for site in res['Value']: if site in requestedSites: siteDict[site] = count usageDict = siteDict.copy() if normalise: usageDict = self._normaliseShares(usageDict) return S_OK(usageDict) # @timeThis def _getFileSize(self, lfns): """ Get file size from a cache, if not from the catalog #FIXME: have to fill the cachedLFNSize! """ lfns = list(lfns) cachedLFNSize = dict(self.cachedLFNSize) fileSizes = {} for lfn in [lfn for lfn in lfns if lfn in cachedLFNSize]: fileSizes[lfn] = cachedLFNSize[lfn] self.logDebug( "Found cache hit for File size for %d files out of %d" % (len(fileSizes), len(lfns))) lfns = [lfn for lfn in lfns if lfn not in cachedLFNSize] if lfns: fileSizes = self._getFileSizeFromCatalog(lfns, fileSizes) if not fileSizes['OK']: self.logError(fileSizes['Message']) return fileSizes fileSizes = fileSizes['Value'] return S_OK(fileSizes) # @timeThis def _getFileSizeFromCatalog(self, lfns, fileSizes): """ Get file size from the catalog """ lfns = list(lfns) fileSizes = dict(fileSizes) res = self.fc.getFileSize(lfns) if not res['OK']: return S_ERROR("Failed to get sizes for all files: %s" % res['Message']) if res['Value']['Failed']: errorReason = sorted(set(res['Value']['Failed'].values())) self.logWarn("Failed to get sizes for %d files:" % len(res['Value']['Failed']), errorReason) fileSizes.update(res['Value']['Successful']) self.cachedLFNSize.update((res['Value']['Successful'])) self.logVerbose("Got size of %d files from catalog" % len(lfns)) return S_OK(fileSizes) def clearCachedFileSize(self, lfns): """ Utility function """ for lfn in [lfn for lfn in lfns if lfn in self.cachedLFNSize]: self.cachedLFNSize.pop(lfn) def getPluginParam(self, name, default=None): """ Get plugin parameters using specific settings or settings defined in the CS Caution: the type returned is that of the default value """ # get the value of a parameter looking 1st in the CS if default is not None: valueType = type(default) else: valueType = None # First look at a generic value... optionPath = "TransformationPlugins/%s" % (name) value = Operations().getValue(optionPath, None) self.logVerbose("Default plugin param %s: '%s'" % (optionPath, value)) # Then look at a plugin-specific value optionPath = "TransformationPlugins/%s/%s" % (self.plugin, name) value = Operations().getValue(optionPath, value) self.logVerbose("Specific plugin param %s: '%s'" % (optionPath, value)) if value is not None: default = value # Finally look at a transformation-specific parameter value = self.params.get(name, default) self.logVerbose( "Transformation plugin param %s: '%s'. Convert to %s" % (name, value, str(valueType))) if valueType and not isinstance(value, valueType): if valueType is list: try: value = ast.literal_eval(value) if value and value != 'None' else [] # literal_eval('SE-DST') -> ValueError # literal_eval('SE_MC-DST') -> SyntaxError # Don't ask... except (ValueError, SyntaxError): value = [val for val in value.replace(' ', '').split(',') if val] elif valueType is int: value = int(value) elif valueType is float: value = float(value) elif valueType is bool: if value in ('False', 'No', 'None', None, 0): value = False else: value = bool(value) elif valueType is not str: self.logWarn( "Unknown parameter type (%s) for %s, passed as string" % (str(valueType), name)) self.logVerbose("Final plugin param %s: '%s'" % (name, value)) return value @staticmethod def _normaliseShares(originalShares): """ Normalize shares to 1 """ total = sum(float(share) for share in originalShares.values()) return dict([(site, 100. * float(share) / total if total else 0.) for site, share in originalShares.items()]) def uniqueSEs(self, ses): """ return a list of SEs that are not physically the same """ newSEs = [] for se in ses: if not self.isSameSEInList(se, newSEs): newSEs.append(se) return newSEs def isSameSE(self, se1, se2): """ Check if 2 SEs are indeed the same. :param se1: name of the first StorageElement :param se2: name of the second StorageElement :returns: True/False if they are considered the same. See :py:mod:`~DIRAC.Resources.Storage.StorageElement.StorageElementItem.isSameSE` """ if se1 == se2: return True return StorageElement(se1).isSameSE(StorageElement(se2)) def isSameSEInList(self, se1, seList): """ Check if an SE is the same as any in a list """ if se1 in seList: return True for se in seList: if self.isSameSE(se1, se): return True return False def closerSEs(self, existingSEs, targetSEs, local=False): """ Order the targetSEs such that the first ones are closer to existingSEs. Keep all elements in targetSEs """ setTarget = set(targetSEs) sameSEs = set([se1 for se1 in setTarget for se2 in existingSEs if self.isSameSE(se1, se2)]) targetSEs = setTarget - set(sameSEs) if targetSEs: # Some SEs are left, look for sites existingSites = [self.dmsHelper.getLocalSiteForSE(se).get('Value') for se in existingSEs] existingSites = set([site for site in existingSites if site]) closeSEs = set([se for se in targetSEs if self.dmsHelper.getLocalSiteForSE(se).get('Value') in existingSites]) # print existingSEs, existingSites, targetSEs, closeSEs otherSEs = targetSEs - closeSEs targetSEs = list(closeSEs) random.shuffle(targetSEs) if not local and otherSEs: otherSEs = list(otherSEs) random.shuffle(otherSEs) targetSEs += otherSEs else: targetSEs = [] return (targetSEs + list(sameSEs)) if not local else targetSEs
class PluginUtilities(object): """ Utility class used by plugins """ def __init__(self, plugin='Standard', transClient=None, dataManager=None, fc=None, debug=False, transInThread=None, transID=None): """ c'tor Setting defaults """ # clients if transClient is None: self.transClient = TransformationClient() else: self.transClient = transClient if dataManager is None: self.dm = DataManager() else: self.dm = dataManager if fc is None: self.fc = FileCatalog() else: self.fc = fc self.dmsHelper = DMSHelpers() self.plugin = plugin self.transID = transID self.params = {} self.groupSize = 0 self.maxFiles = 0 self.cachedLFNSize = {} self.transString = '' self.debug = debug if transInThread is None: self.transInThread = {} else: self.transInThread = transInThread self.log = gLogger.getSubLogger(self.plugin + self.transInThread.get(self.transID, ' [NoThread] [%s] ' % self.transID)) # FIXME: This doesn't work (yet) but should soon, will allow scripts to get the context self.log.showHeaders(True) def logVerbose(self, message, param=''): """ logger helper """ if self.debug: log = gLogger.getSubLogger(self.plugin + ' (V)' + self.transInThread.get(self.transID, ' [NoThread] [%d] ' % self.transID)) log.info(message, param) else: self.log.verbose(message, param) def logDebug(self, message, param=''): """ logger helper """ self.log.debug(message, param) def logInfo(self, message, param=''): """ logger helper """ self.log.info(message, param) def logWarn(self, message, param=''): """ logger helper """ self.log.warn(message, param) def logError(self, message, param=''): """ logger helper """ self.log.error(message, param) def logException(self, message, param='', lException=False): """ logger helper """ self.log.exception(message, param, lException) def setParameters(self, params): """ Set the transformation parameters and extract transID """ self.params = params self.transID = params['TransformationID'] self.log = gLogger.getSubLogger(self.plugin + self.transInThread.get(self.transID, ' [NoThread] [%d] ' % self.transID)) # @timeThis def groupByReplicas(self, files, status): """ Generates tasks based on the location of the input data :param dict fileReplicas: {'/this/is/at.1': ['SE1'], '/this/is/at.12': ['SE1', 'SE2'], '/this/is/at.2': ['SE2'], '/this/is/at_123': ['SE1', 'SE2', 'SE3'], '/this/is/at_23': ['SE2', 'SE3'], '/this/is/at_4': ['SE4']} """ tasks = [] nTasks = 0 if not files: return S_OK(tasks) files = dict(files) # Parameters if not self.groupSize: self.groupSize = self.getPluginParam('GroupSize', 10) flush = (status == 'Flush') self.logVerbose( "groupByReplicas: %d files, groupSize %d, flush %s" % (len(files), self.groupSize, flush)) # Consider files by groups of SEs, a file is only in one group # Then consider files site by site, but a file can now be at more than one site for groupSE in (True, False): if not files: break seFiles = getFileGroups(files, groupSE=groupSE) self.logDebug("fileGroups set: ", seFiles) for replicaSE in sortSEs(seFiles): lfns = seFiles[replicaSE] if lfns: tasksLfns = breakListIntoChunks(lfns, self.groupSize) lfnsInTasks = [] for taskLfns in tasksLfns: if flush or (len(taskLfns) >= self.groupSize): tasks.append((replicaSE, taskLfns)) lfnsInTasks += taskLfns # In case the file was at more than one site, remove it from the other sites' list # Remove files from global list for lfn in lfnsInTasks: files.pop(lfn) if not groupSE: # Remove files from other SEs for se in [se for se in seFiles if se != replicaSE]: seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks] self.logVerbose( "groupByReplicas: %d tasks created (groupSE %s)" % (len(tasks) - nTasks, str(groupSE)), "%d files not included in tasks" % len(files)) nTasks = len(tasks) return S_OK(tasks) def createTasksBySize(self, lfns, replicaSE, fileSizes=None, flush=False): """ Split files in groups according to the size and create tasks for a given SE """ tasks = [] if fileSizes is None: fileSizes = self._getFileSize(lfns).get('Value') if fileSizes is None: self.logWarn('Error getting file sizes, no tasks created') return tasks taskLfns = [] taskSize = 0 if not self.groupSize: # input size in GB converted to bytes self.groupSize = float(self.getPluginParam('GroupSize', 1.)) * 1000 * 1000 * 1000 if not self.maxFiles: # FIXME: prepare for chaging the name of the ambiguoug CS option self.maxFiles = self.getPluginParam('MaxFilesPerTask', self.getPluginParam('MaxFiles', 100)) lfns = sorted(lfns, key=fileSizes.get) for lfn in lfns: size = fileSizes.get(lfn, 0) if size: if size > self.groupSize: tasks.append((replicaSE, [lfn])) else: taskSize += size taskLfns.append(lfn) if (taskSize > self.groupSize) or (len(taskLfns) >= self.maxFiles): tasks.append((replicaSE, taskLfns)) taskLfns = [] taskSize = 0 if flush and taskLfns: tasks.append((replicaSE, taskLfns)) if not tasks and not flush and taskLfns: self.logVerbose( 'Not enough data to create a task, and flush not set (%d bytes for groupSize %d)' % (taskSize, self.groupSize)) return tasks # @timeThis def groupBySize(self, files, status): """ Generate a task for a given amount of data """ tasks = [] nTasks = 0 if not len(files): return S_OK(tasks) files = dict(files) # Parameters if not self.groupSize: # input size in GB converted to bytes self.groupSize = float(self.getPluginParam('GroupSize', 1)) * 1000 * 1000 * 1000 flush = (status == 'Flush') self.logVerbose( "groupBySize: %d files, groupSize: %d, flush: %s" % (len(files), self.groupSize, flush)) # Get the file sizes res = self._getFileSize(list(files)) if not res['OK']: return res fileSizes = res['Value'] for groupSE in (True, False): if not files: break seFiles = getFileGroups(files, groupSE=groupSE) for replicaSE in sorted(seFiles) if groupSE else sortSEs(seFiles): lfns = seFiles[replicaSE] newTasks = self.createTasksBySize(lfns, replicaSE, fileSizes=fileSizes, flush=flush) lfnsInTasks = [] for task in newTasks: lfnsInTasks += task[1] tasks += newTasks # Remove the selected files from the size cache self.clearCachedFileSize(lfnsInTasks) if not groupSE: # Remove files from other SEs for se in [se for se in seFiles if se != replicaSE]: seFiles[se] = [lfn for lfn in seFiles[se] if lfn not in lfnsInTasks] # Remove files from global list for lfn in lfnsInTasks: files.pop(lfn) self.logVerbose( "groupBySize: %d tasks created with groupSE %s" % (len(tasks) - nTasks, str(groupSE))) self.logVerbose("groupBySize: %d files have not been included in tasks" % len(files)) nTasks = len(tasks) self.logVerbose("Grouped %d files by size" % len(files)) return S_OK(tasks) def getExistingCounters(self, normalise=False, requestedSites=[]): res = self.transClient.getCounters('TransformationFiles', ['UsedSE'], {'TransformationID': self.params['TransformationID']}) if not res['OK']: return res usageDict = {} for usedDict, count in res['Value']: usedSE = usedDict['UsedSE'] if usedSE != 'Unknown': usageDict[usedSE] = count if requestedSites: siteDict = {} for se, count in usageDict.items(): res = getSitesForSE(se) if not res['OK']: return res for site in res['Value']: if site in requestedSites: siteDict[site] = count usageDict = siteDict.copy() if normalise: usageDict = self._normaliseShares(usageDict) return S_OK(usageDict) # @timeThis def _getFileSize(self, lfns): """ Get file size from a cache, if not from the catalog #FIXME: have to fill the cachedLFNSize! """ lfns = list(lfns) cachedLFNSize = dict(self.cachedLFNSize) fileSizes = {} for lfn in [lfn for lfn in lfns if lfn in cachedLFNSize]: fileSizes[lfn] = cachedLFNSize[lfn] self.logDebug( "Found cache hit for File size for %d files out of %d" % (len(fileSizes), len(lfns))) lfns = [lfn for lfn in lfns if lfn not in cachedLFNSize] if lfns: fileSizes = self._getFileSizeFromCatalog(lfns, fileSizes) if not fileSizes['OK']: self.logError(fileSizes['Message']) return fileSizes fileSizes = fileSizes['Value'] return S_OK(fileSizes) # @timeThis def _getFileSizeFromCatalog(self, lfns, fileSizes): """ Get file size from the catalog """ lfns = list(lfns) fileSizes = dict(fileSizes) res = self.fc.getFileSize(lfns) if not res['OK']: return S_ERROR("Failed to get sizes for all files: %s" % res['Message']) if res['Value']['Failed']: errorReason = sorted(set(res['Value']['Failed'].values())) self.logWarn("Failed to get sizes for %d files:" % len(res['Value']['Failed']), errorReason) fileSizes.update(res['Value']['Successful']) self.cachedLFNSize.update((res['Value']['Successful'])) self.logVerbose("Got size of %d files from catalog" % len(lfns)) return S_OK(fileSizes) def clearCachedFileSize(self, lfns): """ Utility function """ for lfn in [lfn for lfn in lfns if lfn in self.cachedLFNSize]: self.cachedLFNSize.pop(lfn) def getPluginParam(self, name, default=None): """ Get plugin parameters using specific settings or settings defined in the CS Caution: the type returned is that of the default value """ # get the value of a parameter looking 1st in the CS if default is not None: valueType = type(default) else: valueType = None # First look at a generic value... optionPath = "TransformationPlugins/%s" % (name) value = Operations().getValue(optionPath, None) self.logVerbose("Default plugin param %s: '%s'" % (optionPath, value)) # Then look at a plugin-specific value optionPath = "TransformationPlugins/%s/%s" % (self.plugin, name) value = Operations().getValue(optionPath, value) self.logVerbose("Specific plugin param %s: '%s'" % (optionPath, value)) if value is not None: default = value # Finally look at a transformation-specific parameter value = self.params.get(name, default) self.logVerbose( "Transformation plugin param %s: '%s'. Convert to %s" % (name, value, str(valueType))) if valueType and not isinstance(value, valueType): if valueType is list: try: value = ast.literal_eval(value) if value and value != 'None' else [] # literal_eval('SE-DST') -> ValueError # literal_eval('SE_MC-DST') -> SyntaxError # Don't ask... except (ValueError, SyntaxError): value = [val for val in value.replace(' ', '').split(',') if val] elif valueType is int: value = int(value) elif valueType is float: value = float(value) elif valueType is bool: if value in ('False', 'No', 'None', None, 0): value = False else: value = bool(value) elif valueType is not str: self.logWarn( "Unknown parameter type (%s) for %s, passed as string" % (str(valueType), name)) self.logVerbose("Final plugin param %s: '%s'" % (name, value)) return value @staticmethod def _normaliseShares(originalShares): """ Normalize shares to 1 """ total = sum(float(share) for share in originalShares.values()) return dict([(site, 100. * float(share) / total if total else 0.) for site, share in originalShares.items()]) def uniqueSEs(self, ses): """ return a list of SEs that are not physically the same """ newSEs = [] for se in ses: if not self.isSameSEInList(se, newSEs): newSEs.append(se) return newSEs def isSameSE(self, se1, se2): """ Check if 2 SEs are indeed the same. :param se1: name of the first StorageElement :param se2: name of the second StorageElement :returns: True/False if they are considered the same. See :py:mod:`~DIRAC.Resources.Storage.StorageElement.StorageElementItem.isSameSE` """ if se1 == se2: return True return StorageElement(se1).isSameSE(StorageElement(se2)) def isSameSEInList(self, se1, seList): """ Check if an SE is the same as any in a list """ if se1 in seList: return True for se in seList: if self.isSameSE(se1, se): return True return False def closerSEs(self, existingSEs, targetSEs, local=False): """ Order the targetSEs such that the first ones are closer to existingSEs. Keep all elements in targetSEs """ setTarget = set(targetSEs) sameSEs = set([se1 for se1 in setTarget for se2 in existingSEs if self.isSameSE(se1, se2)]) targetSEs = setTarget - set(sameSEs) if targetSEs: # Some SEs are left, look for sites existingSites = [self.dmsHelper.getLocalSiteForSE(se).get('Value') for se in existingSEs] existingSites = set([site for site in existingSites if site]) closeSEs = set([se for se in targetSEs if self.dmsHelper.getLocalSiteForSE(se).get('Value') in existingSites]) # print existingSEs, existingSites, targetSEs, closeSEs otherSEs = targetSEs - closeSEs targetSEs = list(closeSEs) random.shuffle(targetSEs) if not local and otherSEs: otherSEs = list(otherSEs) random.shuffle(otherSEs) targetSEs += otherSEs else: targetSEs = [] return (targetSEs + list(sameSEs)) if not local else targetSEs @staticmethod def seParamtoList(inputParam): """Transform ``inputParam`` to list. :param inputParam: can be string, list, or string representation of list :returns: list """ if not inputParam: return [] if inputParam.count('['): return eval(inputParam) # pylint: disable=eval-used elif isinstance(inputParam, list): return inputParam return [inputParam]
class PluginUtilities(object): """ Utility class used by plugins """ def __init__(self, plugin='Standard', transClient=None, dataManager=None, fc=None, debug=False, transInThread=None, transID=None): """ c'tor Setting defaults """ # clients if transClient is None: self.transClient = TransformationClient() else: self.transClient = transClient if dataManager is None: self.dm = DataManager() else: self.dm = dataManager if fc is None: self.fc = FileCatalog() else: self.fc = fc self.dmsHelper = DMSHelpers() self.plugin = plugin self.transID = transID self.params = {} self.groupSize = 0 self.maxFiles = 0 self.cachedLFNSize = {} self.transString = '' self.debug = debug self.seConfig = {} if transInThread is None: self.transInThread = {} else: self.transInThread = transInThread self.log = gLogger.getSubLogger("%s/PluginUtilities" % plugin) def logVerbose(self, message, param=''): if self.debug: self.log.info('(V)' + self.transString + message, param) else: self.log.verbose(self.transString + message, param) def logDebug(self, message, param=''): self.log.debug(self.transString + message, param) def logInfo(self, message, param=''): self.log.info(self.transString + message, param) def logWarn(self, message, param=''): self.log.warn(self.transString + message, param) def logError(self, message, param=''): self.log.error(self.transString + message, param) def logException(self, message, param='', lException=False): self.log.exception(self.transString + message, param, lException) def setParameters(self, params): self.params = params self.transID = params['TransformationID'] self.transString = self.transInThread.get( self.transID, ' [NoThread] [%d] ' % self.transID) + '%s: ' % self.plugin @timeThis def groupByReplicas(self, files, status): """ Generates tasks based on the location of the input data :param dict fileReplicas: {'/this/is/at.1': ['SE1'], '/this/is/at.12': ['SE1', 'SE2'], '/this/is/at.2': ['SE2'], '/this/is/at_123': ['SE1', 'SE2', 'SE3'], '/this/is/at_23': ['SE2', 'SE3'], '/this/is/at_4': ['SE4']} """ tasks = [] nTasks = 0 if not len(files): return S_OK(tasks) files = dict(files) # Parameters if not self.groupSize: self.groupSize = self.getPluginParam('GroupSize', 10) flush = (status == 'Flush') self.logVerbose("groupByReplicas: %d files, groupSize %d, flush %s" % (len(files), self.groupSize, flush)) # Consider files by groups of SEs, a file is only in one group # Then consider files site by site, but a file can now be at more than one site for groupSE in (True, False): if not files: break seFiles = getFileGroups(files, groupSE=groupSE) self.logDebug("fileGroups set: ", seFiles) for replicaSE in sortSEs(seFiles): lfns = seFiles[replicaSE] if lfns: tasksLfns = breakListIntoChunks(lfns, self.groupSize) lfnsInTasks = [] for taskLfns in tasksLfns: if (flush and not groupSE) or (len(taskLfns) >= self.groupSize): tasks.append((replicaSE, taskLfns)) lfnsInTasks += taskLfns # In case the file was at more than one site, remove it from the other sites' list # Remove files from global list for lfn in lfnsInTasks: files.pop(lfn) if not groupSE: # Remove files from other SEs for se in [se for se in seFiles if se != replicaSE]: seFiles[se] = [ lfn for lfn in seFiles[se] if lfn not in lfnsInTasks ] self.logVerbose( "groupByReplicas: %d tasks created (groupSE %s), %d files not included in tasks" % (len(tasks) - nTasks, str(groupSE), len(files))) nTasks = len(tasks) return S_OK(tasks) def createTasksBySize(self, lfns, replicaSE, fileSizes=None, flush=False): """ Split files in groups according to the size and create tasks for a given SE """ tasks = [] if fileSizes is None: fileSizes = self._getFileSize(lfns).get('Value') if fileSizes is None: self.logWarn('Error getting file sizes, no tasks created') return tasks taskLfns = [] taskSize = 0 if not self.groupSize: self.groupSize = float( self.getPluginParam('GroupSize', 1.) ) * 1000 * 1000 * 1000 # input size in GB converted to bytes if not self.maxFiles: self.maxFiles = self.getPluginParam('MaxFiles', 100) lfns = sorted(lfns, key=fileSizes.get) for lfn in lfns: size = fileSizes.get(lfn, 0) if size: if size > self.groupSize: tasks.append((replicaSE, [lfn])) else: taskSize += size taskLfns.append(lfn) if (taskSize > self.groupSize) or (len(taskLfns) >= self.maxFiles): tasks.append((replicaSE, taskLfns)) taskLfns = [] taskSize = 0 if flush and taskLfns: tasks.append((replicaSE, taskLfns)) return tasks @timeThis def groupBySize(self, files, status): """ Generate a task for a given amount of data """ tasks = [] nTasks = 0 if not len(files): return S_OK(tasks) files = dict(files) # Parameters if not self.groupSize: self.groupSize = float(self.getPluginParam( 'GroupSize', 1)) * 1000 * 1000 * 1000 # input size in GB converted to bytes flush = (status == 'Flush') self.logVerbose("groupBySize: %d files, groupSize: %d, flush: %s" % (len(files), self.groupSize, flush)) # Get the file sizes res = self._getFileSize(files.keys()) if not res['OK']: return res fileSizes = res['Value'] for groupSE in (True, False): if not files: break seFiles = getFileGroups(files, groupSE=groupSE) for replicaSE in sorted(seFiles) if groupSE else sortSEs(seFiles): lfns = seFiles[replicaSE] newTasks = self.createTasksBySize(lfns, replicaSE, fileSizes=fileSizes, flush=flush) lfnsInTasks = [] for task in newTasks: lfnsInTasks += task[1] tasks += newTasks # Remove the selected files from the size cache self.clearCachedFileSize(lfnsInTasks) if not groupSE: # Remove files from other SEs for se in [se for se in seFiles if se != replicaSE]: seFiles[se] = [ lfn for lfn in seFiles[se] if lfn not in lfnsInTasks ] # Remove files from global list for lfn in lfnsInTasks: files.pop(lfn) self.logVerbose("groupBySize: %d tasks created with groupSE %s" % (len(tasks) - nTasks, str(groupSE))) self.logVerbose( "groupBySize: %d files have not been included in tasks" % len(files)) nTasks = len(tasks) self.logVerbose("Grouped %d files by size" % len(files)) return S_OK(tasks) def getExistingCounters(self, normalise=False, requestedSites=[]): res = self.transClient.getCounters( 'TransformationFiles', ['UsedSE'], {'TransformationID': self.params['TransformationID']}) if not res['OK']: return res usageDict = {} for usedDict, count in res['Value']: usedSE = usedDict['UsedSE'] if usedSE != 'Unknown': usageDict[usedSE] = count if requestedSites: siteDict = {} for se, count in usageDict.items(): res = getSitesForSE(se) if not res['OK']: return res for site in res['Value']: if site in requestedSites: siteDict[site] = count usageDict = siteDict.copy() if normalise: usageDict = self._normaliseShares(usageDict) return S_OK(usageDict) @timeThis def _getFileSize(self, lfns): """ Get file size from a cache, if not from the catalog #FIXME: have to fill the cachedLFNSize! """ lfns = list(lfns) cachedLFNSize = dict(self.cachedLFNSize) fileSizes = {} for lfn in [lfn for lfn in lfns if lfn in cachedLFNSize]: fileSizes[lfn] = cachedLFNSize[lfn] self.logDebug("Found cache hit for File size for %d files out of %d" % (len(fileSizes), len(lfns))) lfns = [lfn for lfn in lfns if lfn not in cachedLFNSize] if lfns: fileSizes = self._getFileSizeFromCatalog(lfns, fileSizes) if not fileSizes['OK']: self.logError(fileSizes['Message']) return fileSizes fileSizes = fileSizes['Value'] return S_OK(fileSizes) @timeThis def _getFileSizeFromCatalog(self, lfns, fileSizes): """ Get file size from the catalog """ lfns = list(lfns) fileSizes = dict(fileSizes) res = self.fc.getFileSize(lfns) if not res['OK']: return S_ERROR("Failed to get sizes for all files: %s" % res['Message']) if res['Value']['Failed']: errorReason = sorted(set(res['Value']['Failed'].values())) self.logWarn( "Failed to get sizes for %d files:" % len(res['Value']['Failed']), errorReason) fileSizes.update(res['Value']['Successful']) self.cachedLFNSize.update((res['Value']['Successful'])) self.logVerbose("Got size of %d files from catalog" % len(lfns)) return S_OK(fileSizes) def clearCachedFileSize(self, lfns): """ Utility function """ for lfn in [lfn for lfn in lfns if lfn in self.cachedLFNSize]: self.cachedLFNSize.pop(lfn) def getPluginParam(self, name, default=None): """ Get plugin parameters using specific settings or settings defined in the CS Caution: the type returned is that of the default value """ # get the value of a parameter looking 1st in the CS if default != None: valueType = type(default) else: valueType = None # First look at a generic value... optionPath = "TransformationPlugins/%s" % (name) value = Operations().getValue(optionPath, None) self.logVerbose("Default plugin param %s: '%s'" % (optionPath, value)) # Then look at a plugin-specific value optionPath = "TransformationPlugins/%s/%s" % (self.plugin, name) value = Operations().getValue(optionPath, value) self.logVerbose("Specific plugin param %s: '%s'" % (optionPath, value)) if value != None: default = value # Finally look at a transformation-specific parameter value = self.params.get(name, default) self.logVerbose("Transformation plugin param %s: '%s'. Convert to %s" % (name, value, str(valueType))) if valueType and type(value) is not valueType: if valueType is list: try: value = ast.literal_eval( value) if value and value != 'None' else [] except Exception: value = [ val for val in value.replace(' ', '').split(',') if val ] elif valueType is int: value = int(value) elif valueType is float: value = float(value) elif valueType is bool: if value in ('False', 'No', 'None', None, 0): value = False else: value = bool(value) elif valueType is not str: self.logWarn( "Unknown parameter type (%s) for %s, passed as string" % (str(valueType), name)) self.logVerbose("Final plugin param %s: '%s'" % (name, value)) return value @staticmethod def _normaliseShares(originalShares): shares = originalShares.copy() total = 0.0 for site in shares.keys(): share = float(shares[site]) shares[site] = share total += share for site in shares.keys(): share = 100.0 * (shares[site] / total) shares[site] = share return shares def uniqueSEs(self, ses): newSEs = [] for se in ses: if not self.isSameSEInList(se, newSEs): newSEs.append(se) return newSEs def isSameSE(self, se1, se2): if se1 == se2: return True for se in (se1, se2): if se not in self.seConfig: self.seConfig[se] = {} res = StorageElement(se).getStorageParameters('SRM2') if res['OK']: params = res['Value'] for item in ('Host', 'Path'): self.seConfig[se][item] = params[item].replace( 't1d1', 't0d1') else: self.logError( "Error getting StorageElement parameters for %s" % se, res['Message']) return self.seConfig[se1] == self.seConfig[se2] def isSameSEInList(self, se1, seList): if se1 in seList: return True for se in seList: if self.isSameSE(se1, se): return True return False def closerSEs(self, existingSEs, targetSEs, local=False): """ Order the targetSEs such that the first ones are closer to existingSEs. Keep all elements in targetSEs """ setTarget = set(targetSEs) sameSEs = set([ se1 for se1 in setTarget for se2 in existingSEs if self.isSameSE(se1, se2) ]) targetSEs = setTarget - set(sameSEs) if targetSEs: # Some SEs are left, look for sites existingSites = [ self.dmsHelper.getLocalSiteForSE(se).get('Value') for se in existingSEs if not self.dmsHelper.isSEArchive(se) ] existingSites = set([site for site in existingSites if site]) closeSEs = set([ se for se in targetSEs if self.dmsHelper.getLocalSiteForSE( se).get('Value') in existingSites ]) # print existingSEs, existingSites, targetSEs, closeSEs otherSEs = targetSEs - closeSEs targetSEs = list(closeSEs) random.shuffle(targetSEs) if not local and otherSEs: otherSEs = list(otherSEs) random.shuffle(otherSEs) targetSEs += otherSEs else: targetSEs = [] return (targetSEs + list(sameSEs)) if not local else targetSEs
class TransformationPlugin( PluginBase ): """ A TransformationPlugin object should be instantiated by every transformation. """ def __init__( self, plugin, transClient = None, dataManager = None ): """ plugin name has to be passed in: it will then be executed as one of the functions below, e.g. plugin = 'BySize' will execute TransformationPlugin('BySize')._BySize() """ super( TransformationPlugin, self ).__init__( plugin ) self.data = {} self.files = False if transClient is None: self.transClient = TransformationClient() else: self.transClient = transClient if dataManager is None: self.dm = DataManager() else: self.dm = dataManager self.fc = FileCatalog() def isOK( self ): self.valid = True if ( not self.data ) or ( not self.params ): self.valid = False return self.valid def setInputData( self, data ): self.data = data def setTransformationFiles( self, files ): #TODO ADDED self.files = files def _Standard( self ): """ Simply group by replica location """ res = self._groupByReplicas() if not res['OK']: return res newTasks = [] for _se, lfns in res['Value']: newTasks.append( ( '', lfns ) ) return S_OK( newTasks ) def _BySize( self ): """ Alias for groupBySize """ return self._groupBySize() def _Broadcast( self ): """ This plug-in takes files found at the sourceSE and broadcasts to all (or a selection of) targetSEs. """ if not self.params: return S_ERROR( "TransformationPlugin._Broadcast: The 'Broadcast' plugin requires additional parameters." ) targetseParam = self.params['TargetSE'] targetSEs = [] sourceSEs = eval( self.params['SourceSE'] ) if targetseParam.count( '[' ): targetSEs = eval( targetseParam ) elif type(targetseParam)==type([]): targetSEs = targetseParam else: targetSEs = [targetseParam] #sourceSEs = eval(self.params['SourceSE']) #targetSEs = eval(self.params['TargetSE']) destinations = int( self.params.get( 'Destinations', 0 ) ) if destinations and ( destinations >= len(targetSEs) ): destinations = 0 status = self.params['Status'] groupSize = self.params['GroupSize']#Number of files per tasks fileGroups = self._getFileGroups( self.data )#groups by SE targetSELfns = {} for replicaSE, lfns in fileGroups.items(): ses = replicaSE.split( ',' ) #sourceSites = self._getSitesForSEs(ses) atSource = False for se in ses: if se in sourceSEs: atSource = True if not atSource: continue for lfn in lfns: targets = [] sources = self._getSitesForSEs( ses ) random.shuffle( targetSEs ) for targetSE in targetSEs: site = self._getSiteForSE( targetSE )['Value'] if not site in sources: if ( destinations ) and ( len( targets ) >= destinations ): continue sources.append( site ) targets.append( targetSE )#after all, if someone wants to copy to the source, it's his choice strTargetSEs = str.join( ',', sorted( targets ) ) if not targetSELfns.has_key( strTargetSEs ): targetSELfns[strTargetSEs] = [] targetSELfns[strTargetSEs].append( lfn ) tasks = [] for ses, lfns in targetSELfns.items(): tasksLfns = breakListIntoChunks(lfns, groupSize) for taskLfns in tasksLfns: if ( status == 'Flush' ) or ( len( taskLfns ) >= int( groupSize ) ): #do not allow groups smaller than the groupSize, except if transformation is in flush state tasks.append( ( ses, taskLfns ) ) return S_OK( tasks ) def _ByShare( self, shareType = 'CPU' ): """ first get the shares from the CS, and then makes the grouping looking at the history """ res = self._getShares( shareType, normalise = True ) if not res['OK']: return res cpuShares = res['Value'] gLogger.info( "Obtained the following target shares (%):" ) for site in sorted( cpuShares.keys() ): gLogger.info( "%s: %.1f" % ( site.ljust( 15 ), cpuShares[site] ) ) # Get the existing destinations from the transformationDB res = self._getExistingCounters( requestedSites = cpuShares.keys() ) if not res['OK']: gLogger.error( "Failed to get existing file share", res['Message'] ) return res existingCount = res['Value'] if existingCount: gLogger.info( "Existing site utilization (%):" ) normalisedExistingCount = self._normaliseShares( existingCount.copy() ) for se in sorted( normalisedExistingCount.keys() ): gLogger.info( "%s: %.1f" % ( se.ljust( 15 ), normalisedExistingCount[se] ) ) # Group the input files by their existing replicas res = self._groupByReplicas() if not res['OK']: return res replicaGroups = res['Value'] tasks = [] # For the replica groups for replicaSE, lfns in replicaGroups: possibleSEs = replicaSE.split( ',' ) # Determine the next site based on requested shares, existing usage and candidate sites res = self._getNextSite( existingCount, cpuShares, candidates = self._getSitesForSEs( possibleSEs ) ) if not res['OK']: gLogger.error( "Failed to get next destination SE", res['Message'] ) continue targetSite = res['Value'] # Resolve the ses for the target site res = getSEsForSite( targetSite ) if not res['OK']: continue ses = res['Value'] # Determine the selected SE and create the task for chosenSE in ses: if chosenSE in possibleSEs: tasks.append( ( chosenSE, lfns ) ) if not existingCount.has_key( targetSite ): existingCount[targetSite] = 0 existingCount[targetSite] += len( lfns ) return S_OK( tasks ) def _getShares( self, shareType, normalise = False ): """ Takes share from the CS, eventually normalize them """ res = gConfig.getOptionsDict( '/Resources/Shares/%s' % shareType ) if not res['OK']: return res if not res['Value']: return S_ERROR( "/Resources/Shares/%s option contains no shares" % shareType ) shares = res['Value'] for site, value in shares.items(): shares[site] = float( value ) if normalise: shares = self._normaliseShares( shares ) if not shares: return S_ERROR( "No non-zero shares defined" ) return S_OK( shares ) def _getExistingCounters( self, normalise = False, requestedSites = [] ): res = self.transClient.getCounters( 'TransformationFiles', ['UsedSE'], {'TransformationID':self.params['TransformationID']} ) if not res['OK']: return res usageDict = {} for usedDict, count in res['Value']: usedSE = usedDict['UsedSE'] if usedSE != 'Unknown': usageDict[usedSE] = count if requestedSites: siteDict = {} for se, count in usageDict.items(): res = getSitesForSE( se, gridName = 'LCG' ) if not res['OK']: return res for site in res['Value']: if site in requestedSites: siteDict[site] = count usageDict = siteDict.copy() if normalise: usageDict = self._normaliseShares( usageDict ) return S_OK( usageDict ) @classmethod def _normaliseShares( self, originalShares ): shares = originalShares.copy() total = 0.0 for site in shares.keys(): share = float( shares[site] ) shares[site] = share total += share for site in shares.keys(): share = 100.0 * ( shares[site] / total ) shares[site] = share return shares def _getNextSite( self, existingCount, cpuShares, candidates = [] ): # normalise the shares siteShare = self._normaliseShares( existingCount ) # then fill the missing share values to 0 for site in cpuShares.keys(): if ( not siteShare.has_key( site ) ): siteShare[site] = 0.0 # determine which site is furthest from its share chosenSite = '' minShareShortFall = -float( "inf" ) for site, cpuShare in cpuShares.items(): if ( candidates ) and not ( site in candidates ): continue if not cpuShare: continue existingShare = siteShare[site] shareShortFall = cpuShare - existingShare if shareShortFall > minShareShortFall: minShareShortFall = shareShortFall chosenSite = site return S_OK( chosenSite ) def _groupByReplicas( self ): """ Generates a job based on the location of the input data """ if not self.params: return S_ERROR( "TransformationPlugin._Standard: The 'Standard' plug-in requires parameters." ) status = self.params['Status'] groupSize = self.params['GroupSize'] # Group files by SE fileGroups = self._getFileGroups( self.data ) # Create tasks based on the group size tasks = [] for replicaSE in sorted( fileGroups.keys() ): lfns = fileGroups[replicaSE] tasksLfns = breakListIntoChunks( lfns, groupSize ) for taskLfns in tasksLfns: if ( status == 'Flush' ) or ( len( taskLfns ) >= int( groupSize ) ): tasks.append( ( replicaSE, taskLfns ) ) return S_OK( tasks ) def _groupBySize( self ): """ Generate a task for a given amount of data """ if not self.params: return S_ERROR( "TransformationPlugin._BySize: The 'BySize' plug-in requires parameters." ) status = self.params['Status'] requestedSize = float( self.params['GroupSize'] ) * 1000 * 1000 * 1000 # input size in GB converted to bytes maxFiles = self.params.get( 'MaxFiles', 100 ) # Group files by SE fileGroups = self._getFileGroups( self.data ) # Get the file sizes res = self.fc.getFileSize( self.data ) if not res['OK']: return S_ERROR( "Failed to get sizes for files" ) if res['Value']['Failed']: return S_ERROR( "Failed to get sizes for all files" ) fileSizes = res['Value']['Successful'] tasks = [] for replicaSE, lfns in fileGroups.items(): taskLfns = [] taskSize = 0 for lfn in lfns: taskSize += fileSizes[lfn] taskLfns.append( lfn ) if ( taskSize > requestedSize ) or ( len( taskLfns ) >= maxFiles ): tasks.append( ( replicaSE, taskLfns ) ) taskLfns = [] taskSize = 0 if ( status == 'Flush' ) and taskLfns: tasks.append( ( replicaSE, taskLfns ) ) return S_OK( tasks ) @classmethod def _getFileGroups( cls, fileReplicas ): """ get file groups dictionary { "SE1,SE2,SE3" : [ lfn1, lfn2 ], ... } :param dict fileReplicas: { lfn : [SE1, SE2, SE3], ... } """ fileGroups = {} for lfn, replicas in fileReplicas.items(): replicaSEs = ",".join( sorted( list( set( replicas ) ) ) ) if replicaSEs not in fileGroups: fileGroups[replicaSEs] = [] fileGroups[replicaSEs].append( lfn ) return fileGroups @classmethod def _getSiteForSE( cls, se ): """ Get site name for the given SE """ result = getSitesForSE( se, gridName = 'LCG' ) if not result['OK']: return result if result['Value']: return S_OK( result['Value'][0] ) return S_OK( '' ) @classmethod def _getSitesForSEs( cls, seList ): """ Get all the sites for the given SE list """ sites = [] for se in seList: result = getSitesForSE( se, gridName = 'LCG' ) if result['OK']: sites += result['Value'] return sites
class fakeClient: def __init__(self, trans, transID, lfns, asIfProd): self.trans = trans self.transID = transID from DIRAC.TransformationSystem.Client.TransformationClient import TransformationClient self.transClient = TransformationClient() from LHCbDIRAC.BookkeepingSystem.Client.BookkeepingClient import BookkeepingClient self.bk = BookkeepingClient() from DIRAC.DataManagementSystem.Client.DataManager import DataManager self.dm = DataManager() self.asIfProd = asIfProd (self.transFiles, self.transReplicas) = self.prepareForPlugin(lfns) def addFilesToTransformation(self, transID, lfns): return S_OK({ 'Failed': {}, 'Successful': dict([(lfn, 'Added') for lfn in lfns]) }) def getTransformation(self, transID, extraParams=False): if transID == self.transID and self.asIfProd: transID = self.asIfProd if transID != self.transID: return self.transClient.getTransformation(transID) res = self.trans.getType() return DIRAC.S_OK({'Type': res['Value']}) def getReplicas(self): return self.transReplicas def getFiles(self): return self.transFiles def getCounters(self, table, attrList, condDict): if condDict['TransformationID'] == self.transID and self.asIfProd: condDict['TransformationID'] = self.asIfProd if condDict['TransformationID'] != self.transID: return self.transClient.getCounters(table, attrList, condDict) possibleTargets = [ 'CERN-RAW', 'CNAF-RAW', 'GRIDKA-RAW', 'IN2P3-RAW', 'SARA-RAW', 'PIC-RAW', 'RAL-RAW', 'RRCKI-RAW' ] counters = [] for se in possibleTargets: counters.append(({'UsedSE': se}, 0)) return DIRAC.S_OK(counters) def getBookkeepingQuery(self, transID): if transID == self.transID and self.asIfProd: return self.transClient.getBookkeepingQuery(asIfProd) return self.trans.getBkQuery() def insertTransformationRun(self, transID, runID, xx): return DIRAC.S_OK() def getTransformationRuns(self, condDict): if condDict['TransformationID'] == self.transID and self.asIfProd: condDict['TransformationID'] = self.asIfProd if condDict['TransformationID'] == self.transID: transRuns = [] runs = condDict.get('RunNumber', []) if not runs and self.transFiles: res = self.bk.getFileMetadata( [fileDict['LFN'] for fileDict in self.transFiles]) if not res['OK']: return res runs = list( set(meta['RunNumber'] for meta in res['Value']['Successful'].itervalues())) for run in runs: transRuns.append({ 'RunNumber': run, 'Status': "Active", "SelectedSite": None }) return DIRAC.S_OK(transRuns) else: return self.transClient.getTransformationRuns(condDict) def getTransformationFiles(self, condDict=None): if condDict.get('TransformationID') == self.transID and self.asIfProd: condDict['TransformationID'] = self.asIfProd if condDict.get('TransformationID') == self.transID: transFiles = [] if 'Status' in condDict and 'Unused' not in condDict['Status']: return DIRAC.S_OK(transFiles) runs = None if 'RunNumber' in condDict: runs = condDict['RunNumber'] if not isinstance(runs, list): runs = [runs] for fileDict in self.transFiles: if not runs or fileDict['RunNumber'] in runs: transFiles.append({ 'LFN': fileDict['LFN'], 'Status': 'Unused', 'RunNumber': fileDict['RunNumber'] }) return DIRAC.S_OK(transFiles) else: return self.transClient.getTransformationFiles(condDict=condDict) def setParameterToTransformationFiles(self, transID, lfnDict): """ Update the transFiles with some parameters """ if transID == self.transID: for fileDict in self.transFiles: fileDict.update(lfnDict.get(fileDict['LFN'], {})) return S_OK() else: return self.transClient.setParameterToTransformationFiles( transID, lfnDict) def getTransformationFilesCount(self, transID, field, selection=None): if selection is None: selection = {} if transID == self.transID or selection.get( 'TransformationID') == self.transID: runs = selection.get('RunNumber') if runs and not isinstance(runs, list): runs = [runs] if field == 'Status': counters = {'Unused': 0} for fileDict in self.transFiles: if not runs or fileDict['RunNumber'] in runs: counters['Unused'] += 1 elif field == 'RunNumber': counters = {} for fileDict in self.transFiles: runID = fileDict['RunNumber'] if not runs or runID in runs: counters.setdefault(runID, 0) counters[runID] += 1 else: return DIRAC.S_ERROR('Not implemented for field ' + field) counters['Total'] = sum(count for count in counters.itervalues()) return DIRAC.S_OK(counters) else: return self.transClient.getTransformationFilesCount( transID, field, selection=selection) def getTransformationRunStats(self, transIDs): counters = {} for transID in transIDs: if transID == self.transID: for fileDict in self.transFiles: runID = fileDict['RunNumber'] counters[transID][runID]['Unused'] = counters.setdefault( transID, {}).setdefault(runID, {}).setdefault( 'Unused', 0) + 1 for runID in counters[transID]: counters[transID][runID]['Total'] = counters[transID][ runID]['Unused'] else: res = self.transClient.getTransformationRunStats(transIDs) if res['OK']: counters.update(res['Value']) else: return res return DIRAC.S_OK(counters) def addRunsMetadata(self, runID, val): return self.transClient.addRunsMetadata(runID, val) def getRunsMetadata(self, runID): return self.transClient.getRunsMetadata(runID) def setTransformationRunStatus(self, transID, runID, status): return DIRAC.S_OK() def setTransformationRunsSite(self, transID, runID, site): return DIRAC.S_OK() def setFileStatusForTransformation(self, transID, status, lfns): return DIRAC.S_OK() def addTransformationRunFiles(self, transID, run, lfns): return DIRAC.S_OK() def setDestinationForRun(self, runID, site): return DIRAC.S_OK() def getDestinationForRun(self, runID): return self.transClient.getDestinationForRun(runID) def prepareForPlugin(self, lfns): import time print "Preparing the plugin input data (%d files)" % len(lfns) type = self.trans.getType()['Value'] if not lfns: return (None, None) res = self.bk.getFileMetadata(lfns) if res['OK']: files = [] for lfn, metadata in res['Value']['Successful'].iteritems(): runID = metadata.get('RunNumber', 0) runDict = {"RunNumber": runID, "LFN": lfn} files.append(runDict) else: print "Error getting BK metadata", res['Message'] return ([], {}) replicas = {} startTime = time.time() from DIRAC.Core.Utilities.List import breakListIntoChunks for lfnChunk in breakListIntoChunks(lfns, 200): # print lfnChunk if type.lower() in ("replication", "removal"): res = self.dm.getReplicas(lfnChunk, getUrl=False) else: res = self.dm.getReplicasForJobs(lfnChunk, getUrl=False) # print res if res['OK']: for lfn, ses in res['Value']['Successful'].iteritems(): if ses: replicas[lfn] = sorted(ses) else: print "Error getting replicas of %d files:" % len( lfns), res['Message'] print "Obtained replicas of %d files in %.3f seconds" % ( len(lfns), time.time() - startTime) return (files, replicas)