def getBKProductions(self, visible=None): """ It returns a list of productions """ if visible is None: visible = self.isVisible() prodList = self.__bkQueryDict.get('Production') if prodList: if not isinstance(prodList, list): prodList = [prodList] return sorted(prodList) if not self.getProcessingPass(): gLogger.fatal( 'Impossible to get a list of productions without the Processing Pass' ) return [] eventTypes = self.__bkQueryDict.get('EventType') if not isinstance(eventTypes, list): eventTypes = [eventTypes] fullList = set() for eventType in eventTypes: bkQ = BKQuery(self.__bkQueryDict) bkQ.setVisible(visible) bkDict = bkQ.setEventType(eventType) # gLogger.notice( 'Get productions for BK query', str( bkDict ) ) res = self.__bkClient.getProductions(bkDict) if not res['OK']: gLogger.error('Error getting productions from BK', res['Message']) return [] if self.getProcessingPass().replace('/', '') != 'Real Data': fileTypes = self.getFileTypeList() prodList = set(prod for prods in res['Value']['Records'] for prod in prods if self.__getProdStatus(prod) != 'Deleted') # print '\n', self.__bkQueryDict, res['Value']['Records'], '\nVisible:', visible, prodList pList = set() if fileTypes: transClient = TransformationClient() for prod in prodList: res = transClient.getBookkeepingQuery(prod) if res['OK'] and res['Value']['FileType'] in fileTypes: pList.add(prod) if not pList: pList = prodList else: runList = sorted( [-run for r in res['Value']['Records'] for run in r]) startRun = int(self.__bkQueryDict.get('StartRun', 0)) endRun = int(self.__bkQueryDict.get('EndRun', sys.maxsize)) pList = set(run for run in runList if run >= startRun and run <= endRun) fullList.update(pList) return sorted(fullList)
def execute(): tr = TransformationClient() for switch in Script.getUnprocessedSwitches(): pass bkQuery = dmScript.getBKQuery() if not bkQuery: gLogger.notice("No BKQuery given...") exit(1) startTime = time.time() prods = bkQuery.getBKProductions() # visible = 'All' ) parents = {} productions = {} for prod in prods: type = tr.getTransformation(prod).get('Value', {}).get('Type', 'Unknown') productions[prod] = type parent = tr.getBookkeepingQuery(prod).get('Value', {}).get('ProductionID', '') if parent: type = tr.getTransformation(parent).get('Value', {}).get('Type', 'Unknown') parents[parent] = type gLogger.notice("For BK path %s:" % bkQuery.getPath()) if not prods: gLogger.notice('No productions found!') else: printProds('Productions found', productions) if parents: printProds('Parent productions', parents) gLogger.notice('Completed in %.1f seconds' % (time.time() - startTime))
class BookkeepingWatchAgent(AgentModule, TransformationAgentsUtilities): """ LHCbDIRAC only agent. A threaded agent. """ def __init__(self, *args, **kwargs): """ c'tor """ AgentModule.__init__(self, *args, **kwargs) TransformationAgentsUtilities.__init__(self) self.bkQueriesToBeChecked = Queue.Queue() self.bkQueriesInCheck = [] self.fullUpdatePeriod = 86400 self.bkUpdateLatency = 7200 self.debug = False self.transInThread = {} self.pickleFile = 'BookkeepingWatchAgent.pkl' self.chunkSize = 1000 self.pluginsWithNoRunInfo = ['LHCbStandard', 'ReplicateDataset', 'ArchiveDataset', 'LHCbMCDSTBroadcastRandom', 'ReplicateToLocalSE', 'RemoveReplicas', 'RemoveReplicasWhenProcessed', 'RemoveReplicasWithAncestors', 'ReplicateWithAncestors', 'ReduceReplicas', 'RemoveDatasetFromDisk', 'DestroyDataset', 'DestroyDatasetWhenProcessed', 'BySize', 'Standard'] self.timeLog = {} self.fullTimeLog = {} self.bkQueries = {} self.transClient = None self.bkClient = None def initialize(self): """ Make the necessary initializations. The ThreadPool is created here, the _execute() method is what each thread will execute. """ self.fullUpdatePeriod = self.am_getOption('FullUpdatePeriod', self.fullUpdatePeriod) self.bkUpdateLatency = self.am_getOption('BKUpdateLatency', self.bkUpdateLatency) self.debug = self.am_getOption('verbose', self.debug) self.pickleFile = os.path.join(self.am_getWorkDirectory(), self.pickleFile) self.chunkSize = self.am_getOption('maxFilesPerChunk', self.chunkSize) self.pluginsWithNoRunInfo = Operations().getValue('TransformationPlugins/PluginsWithNoRunInfo', self.pluginsWithNoRunInfo) self._logInfo('Full Update Period: %d seconds' % self.fullUpdatePeriod) self._logInfo('BK update latency : %d seconds' % self.bkUpdateLatency) self._logInfo('Plugins with no run info: %s' % ', '.join(self.pluginsWithNoRunInfo)) self.transClient = TransformationClient() self.bkClient = BookkeepingClient() try: with open(self.pickleFile, 'r') as pf: self.timeLog = pickle.load(pf) self.fullTimeLog = pickle.load(pf) self.bkQueries = pickle.load(pf) self._logInfo("successfully loaded Log from", self.pickleFile, "initialize") except (EOFError, IOError): self._logInfo("failed loading Log from", self.pickleFile, "initialize") self.timeLog = {} self.fullTimeLog = {} self.bkQueries = {} maxNumberOfThreads = self.am_getOption('maxThreadsInPool', 1) threadPool = ThreadPool(maxNumberOfThreads, maxNumberOfThreads) for i in xrange(maxNumberOfThreads): threadPool.generateJobAndQueueIt(self._execute, [i]) gMonitor.registerActivity("Iteration", "Agent Loops", AGENT_NAME, "Loops/min", gMonitor.OP_SUM) return S_OK() @gSynchro def __dumpLog(self): """ dump the log in the pickle file """ if self.pickleFile: try: with open(self.pickleFile, 'w') as pf: pickle.dump(self.timeLog, pf) pickle.dump(self.fullTimeLog, pf) pickle.dump(self.bkQueries, pf) self._logVerbose("successfully dumped Log into %s" % self.pickleFile) except IOError as e: self._logError("fail to open %s: %s" % (self.pickleFile, e)) except pickle.PickleError as e: self._logError("fail to dump %s: %s" % (self.pickleFile, e)) except ValueError as e: self._logError("fail to close %s: %s" % (self.pickleFile, e)) ################################################################################ def execute(self): """ Main execution method. Just fills a list, and a queue, with BKKQueries ID. """ gMonitor.addMark('Iteration', 1) # Get all the transformations result = self.transClient.getTransformations(condDict={'Status': ['Active', 'Idle']}) if not result['OK']: self._logError("Failed to get transformations.", result['Message']) return S_OK() transIDsList = [long(transDict['TransformationID']) for transDict in result['Value']] res = self.transClient.getTransformationsWithBkQueries(transIDsList) if not res['OK']: self._logError("Failed to get transformations with Bk Queries.", res['Message']) return S_OK() transIDsWithBkQueriesList = res['Value'] _count = 0 # Process each transformation for transID in transIDsWithBkQueriesList: if transID in self.bkQueriesInCheck: continue self.bkQueriesInCheck.append(transID) self.bkQueriesToBeChecked.put(transID) _count += 1 self._logInfo("Out of %d transformations, %d put in thread queue" % (len(result['Value']), _count)) self.__dumpLog() return S_OK() def _execute(self, threadID): """ Real executor. This is what is executed by the single threads - so do not return here! Just continue """ while True: # not self.bkQueriesToBeChecked.empty(): transID = None try: transID = self.bkQueriesToBeChecked.get() self.transInThread[transID] = ' [Thread%d] [%s] ' % (threadID, str(transID)) startTime = time.time() self._logInfo("Processing transformation %s." % transID, transID=transID) res = self.transClient.getTransformation(transID, extraParams=False) if not res['OK']: self._logError("Failed to get transformation", res['Message'], transID=transID) continue transPlugin = res['Value']['Plugin'] res = self.transClient.getBookkeepingQuery(transID) if not res['OK']: self._logError("Failed to get BkQuery", res['Message'], transID=transID) continue bkQuery = res['Value'] # Determine the correct time stamp to use for this transformation now = datetime.datetime.utcnow() self.__timeStampForTransformation(transID, bkQuery, now) try: files = self.__getFiles(transID, bkQuery, now) except RuntimeError as e: # In case we failed a full query, we should retry full query until successful if 'StartDate' not in bkQuery: self.bkQueries.pop(transID, None) self._logError("Failed to get response from the Bookkeeping: %s" % e, "", "__getFiles", transID) continue runDict = {} filesMetadata = {} # get the files metadata for lfnChunk in breakListIntoChunks(files, self.chunkSize): start = time.time() res = self.bkClient.getFileMetadata(lfnChunk) self._logVerbose("Got metadata from BK for %d files" % len(lfnChunk), transID=transID, reftime=start) if not res['OK']: self._logError("Failed to get BK metadata for %d files" % len(lfnChunk), res['Message'], transID=transID) # No need to return as we only consider files that are successful... else: filesMetadata.update(res['Value']['Successful']) # There is no need to add the run information for a transformation that doesn't need it if transPlugin not in self.pluginsWithNoRunInfo: for lfn, metadata in filesMetadata.iteritems(): runID = metadata.get('RunNumber', None) if isinstance(runID, (basestring, int, long)): runDict.setdefault(int(runID), []).append(lfn) try: self.__addRunsMetadata(transID, runDict.keys()) except RuntimeError as e: self._logException("Failure adding runs metadata", method="__addRunsMetadata", lException=e, transID=transID) else: runDict[None] = filesMetadata.keys() # Add all new files to the transformation for runID in sorted(runDict): lfnList = runDict[runID] # We enter all files of a run at once, otherwise do it by chunks lfnChunks = [lfnList] if runID else breakListIntoChunks(lfnList, self.chunkSize) for lfnChunk in lfnChunks: # Add the files to the transformation self._logVerbose('Adding %d lfns for transformation' % len(lfnChunk), transID=transID) result = self.transClient.addFilesToTransformation(transID, lfnChunk) if not result['OK']: self._logError("Failed to add %d lfns to transformation" % len(lfnChunk), result['Message'], transID=transID) return result else: # Handle errors errors = {} for lfn, error in result['Value']['Failed'].iteritems(): errors.setdefault(error, []).append(lfn) for error, lfns in errors.iteritems(): self._logWarn("Failed to add files to transformation", error, transID=transID) self._logVerbose("\n\t".join([''] + lfns)) # Add the metadata and RunNumber to the newly inserted files addedLfns = [lfn for (lfn, status) in result['Value']['Successful'].iteritems() if status == 'Added'] if addedLfns: # Add files metadata: size and file type lfnDict = dict((lfn, {'Size': filesMetadata[lfn]['FileSize'], 'FileType': filesMetadata[lfn]['FileType']}) for lfn in addedLfns) res = self.transClient.setParameterToTransformationFiles(transID, lfnDict) if not res['OK']: self._logError("Failed to set transformation files metadata", res['Message']) return res # Add run information if it exists if runID: self._logInfo("Added %d files to transformation for run %d, now including run information" % (len(addedLfns), runID), transID=transID) self._logVerbose("Associating %d files to run %d" % (len(addedLfns), runID), transID=transID) res = self.transClient.addTransformationRunFiles(transID, runID, addedLfns) if not res['OK']: self._logError("Failed to associate %d files to run %d" % (len(addedLfns), runID), res['Message'], transID=transID) return res else: self._logInfo("Added %d files to transformation" % len(addedLfns), transID=transID) except Exception as x: # pylint: disable=broad-except self._logException('Exception while adding files to transformation', lException=x, method='_execute', transID=transID) finally: self._logInfo("Processed transformation", transID=transID, reftime=startTime) if transID in self.bkQueriesInCheck: self.bkQueriesInCheck.remove(transID) self.transInThread.pop(transID, None) return S_OK() @gSynchro def __timeStampForTransformation(self, transID, bkQuery, now): """ Determine the correct time stamp to use for this transformation """ fullTimeLog = self.fullTimeLog.setdefault(transID, now) bkQueryLog = self.bkQueries.setdefault(transID, {}) bkQueryLog.pop('StartDate', None) self.bkQueries[transID] = bkQuery.copy() if transID in self.timeLog \ and bkQueryLog == bkQuery \ and (now - fullTimeLog) < datetime.timedelta(seconds=self.fullUpdatePeriod): # If it is more than a day since the last reduced query, make a full query just in case timeStamp = self.timeLog[transID] delta = datetime.timedelta(seconds=self.bkUpdateLatency) bkQuery['StartDate'] = (timeStamp - delta).strftime('%Y-%m-%d %H:%M:%S') if 'StartDate' not in bkQuery: self.fullTimeLog[transID] = now def __getFiles(self, transID, bkQuery, now): """ Perform the query to the Bookkeeping """ self._logInfo("Using BK query for transformation: %s" % str(bkQuery), transID=transID) start = time.time() result = self.bkClient.getFiles(bkQuery) self._logVerbose("BK query time: %.2f seconds." % (time.time() - start), transID=transID) if not result['OK']: raise RuntimeError(result['Message']) else: self.__updateTimeStamp(transID, now) if result['Value']: self._logInfo("Obtained %d files from BK" % len(result['Value']), transID=transID) return result['Value'] @gSynchro def __updateTimeStamp(self, transID, now): """ Update time stamp for current transformation to now """ self.timeLog[transID] = now def __addRunsMetadata(self, transID, runsList): """ Add the run metadata """ runsInCache = self.transClient.getRunsInCache({'Name': ['TCK', 'CondDb', 'DDDB']}) if not runsInCache['OK']: raise RuntimeError(runsInCache['Message']) newRuns = list(set(runsList) - set(runsInCache['Value'])) if newRuns: self._logVerbose("Associating run metadata to %d runs" % len(newRuns), transID=transID) res = self.bkClient.getRunInformation({'RunNumber': newRuns, 'Fields': ['TCK', 'CondDb', 'DDDB']}) if not res['OK']: raise RuntimeError(res['Message']) else: for run, runMeta in res['Value'].iteritems(): res = self.transClient.addRunsMetadata(run, runMeta) if not res['OK']: raise RuntimeError(res['Message']) # Add run duration to the metadata runsInCache = self.transClient.getRunsInCache({'Name': ['Duration']}) if not runsInCache['OK']: raise RuntimeError(runsInCache['Message']) newRuns = list(set(runsList) - set(runsInCache['Value'])) if newRuns: self._logVerbose("Associating run duration to %d runs" % len(newRuns), transID=transID) res = self.bkClient.getRunInformation({'RunNumber': newRuns, 'Fields': ['JobStart', 'JobEnd']}) if not res['OK']: raise RuntimeError(res['Message']) else: for run, runMeta in res['Value'].iteritems(): duration = (runMeta['JobEnd'] - runMeta['JobStart']).seconds res = self.transClient.addRunsMetadata(run, {'Duration': duration}) if not res['OK']: raise RuntimeError(res['Message']) def finalize(self): """ Gracious finalization """ if self.bkQueriesInCheck: self._logInfo("Wait for queue to get empty before terminating the agent (%d tasks)" % len(self.transInThread)) self.bkQueriesInCheck = [] while self.transInThread: time.sleep(2) self.log.info("Threads are empty, terminating the agent...") return S_OK()
elif opt == 'AddRuns': try: settings[opt] = [int(runID) for runID in val.split(',')] except TypeError: gLogger.error("Invalid run list", str(val)) DIRAC.exit(1) elif opt == 'List': settings[opt] = True if 'AddRuns' in settings and ('StartRun' in settings or 'EndRun' in settings): gLogger.error( 'Incompatible requests, cannot set run list and start/end run') DIRAC.exit(1) from LHCbDIRAC.TransformationSystem.Client.TransformationClient import TransformationClient client = TransformationClient() res = client.getBookkeepingQuery(prodId) if not res['OK']: gLogger.error("Error retrieving BKQuery for transformation %s" % prodId, res['Message']) DIRAC.exit(2) bkDict = res['Value'] startRun = bkDict.get('StartRun', 0) endRun = bkDict.get('EndRun', 0) runNumbers = bkDict.get('RunNumbers', 'All') if ('StartRun' in settings or 'EndRun' in settings) and runNumbers and runNumbers != 'All': gLogger.notice("Transformation %d has RunNumbers key" % prodId) settings = {'List': True} if 'AddRuns' in settings and (startRun or endRun):
class ProcessingProgress( object ): def __init__( self, cacheFile = None ): if not cacheFile: self.prodStatFile = os.path.join( os.environ['HOME'], ".dirac/work", "dirac-production-stats.pkl" ) else: self.prodStatFile = cacheFile self.cacheVersion = '0.0' self.clearCache = [] self.cachedInfo = {} # Recuperate the previous cached information self.readCache() self.bk = BookkeepingClient() self.transClient = TransformationClient() def setClearCache( self, clearCache ): self.clearCache = clearCache def __getProdBkDict( self, prod ): res = self.transClient.getBookkeepingQuery( prod ) if not res['OK']: gLogger.error( "Couldn't get BK query on production %d" % prod ) return {} prodBKDict = res['Value'] return prodBKDict def getFullStats( self, bkQuery, printResult = False ): processingPass = bkQuery.getProcessingPass() if printResult: gLogger.info( "\nStatistics for processing %s, condition %s\n" % ( processingPass, bkQuery.getConditions() ) ) prodStats = [] processingPass = processingPass.split( '/' ) if len( processingPass ) != 4 or processingPass[1] != "Real Data": gLogger.error( "Processing pass should be /Real Data/<Reco>/<Stripping>" ) return [] # Get production numbers for the Reco recoBKQuery = BKQuery( bkQuery ) recoBKQuery.setProcessingPass( '/'.join( processingPass[0:3] ) ) recoList = recoBKQuery.getBKProductions( visible = False ) recoRunRanges = {} recoDQFlags = [] for prod in recoList: prodBKDict = self.__getProdBkDict( prod ) if prodBKDict: recoRunRanges[prod] = [prodBKDict.get( "StartRun", 0 ), prodBKDict.get( "EndRun", sys.maxint )] dqFlags = prodBKDict.get( "DataQualityFlag", ['UNCHECKED', 'EXPRESS_OK', 'OK'] ) if isinstance( dqFlags, basestring ): dqFlags = dqFlags.split( ',' ) recoDQFlags += [fl for fl in dqFlags if fl not in recoDQFlags] else: recoRunRanges[prod] = [0, 0] # Sort productions by runs try: recoList.sort( cmp = ( lambda p1, p2: int( recoRunRanges[p1][0] - recoRunRanges[p2][1] ) ) ) except: print "Exception in sorting productions:" for p in recoList: print p, recoRunRanges[p] gLogger.verbose( "Reconstruction productions found (%d): %s" % ( len( recoList ), str( sorted( recoList ) ) ) ) gLogger.verbose( "Reconstruction DQ flags: %s" % str( recoDQFlags ) ) # Get productions for merging mergeList = [] mergeStripProds = {} # Get stripping productions as parents of merging productions stripList = [] for prod in bkQuery.getBKProductions( visible = False ): prodBKDict = self.__getProdBkDict( prod ) gLogger.verbose( "BK query for production %s: %s" % ( prod, str( prodBKDict ) ) ) mergedTypes = prodBKDict.get( 'FileType' ) if type( mergedTypes ) != type( [] ): mergedTypes = [mergedTypes] if [ft for ft in bkQuery.getFileTypeList() if ft in mergedTypes] and 'ProductionID' in prodBKDict: mergeList.append( prod ) prods = prodBKDict['ProductionID'] if type( prods ) != type( [] ): prods = [prods] stripList += prods mergeStripProds[prod] = [str( p ) for p in prods] else: _msgTuple = ( str( bkQuery.getFileTypeList() ), prod, str( prodBKDict ) ) gLogger.verbose( "Could not find production or filetype %s in BKquery of production %d (%s)" % _msgTuple ) mergeList.sort( cmp = ( lambda p1, p2: int( mergeStripProds[p1][0] ) - int( mergeStripProds[p2][0] ) ) ) gLogger.verbose( "Merging productions found: %s" % str( mergeList ) ) # get list of stripping productions (from merging) stripRunRanges = {} for prod in stripList: prodBKDict = self.__getProdBkDict( prod ) if prodBKDict: stripRunRanges[prod] = [prodBKDict.get( "StartRun", 0 ), prodBKDict.get( "EndRun", sys.maxint )] else: stripRunRanges[prod] = [0, 0] # Sort productions by runs try: stripList.sort( cmp = ( lambda p1, p2: int( stripRunRanges[p1][0] - stripRunRanges[p2][1] ) ) ) except: print "Error when sorting stripping productions:" for prodStrip in stripList: print prodStrip, stripRunRanges[prodStrip] gLogger.verbose( "Stripping productions found (%d): %s" % ( len( stripList ), str( sorted( stripList ) ) ) ) # Get all runs corresponding to the run range used by the Reco productions rawBKQuery = BKQuery( bkQuery ) rawBKQuery.setProcessingPass( '/Real Data' ) rawBKQuery.setFileType( "RAW" ) # get the list of runs (-prodNum) fullRunList = rawBKQuery.getBKRuns() gLogger.verbose( "Initial list of runs: %s" % str( fullRunList ) ) recoRunList = [] openProd = False for prod in [p for p in recoList]: # Forget fully openProd productions # Don't consider productions without a BK query (these are individual files) if recoRunRanges[prod][1] == sys.maxint and recoRunRanges[prod][0] != -sys.maxint: openProd = True # Try and find if that open production overlaps with a closed one, in which case, remove it # Do nothing for derived productions for p in [p for p in recoList if p != prod and recoRunRanges[prod] != recoRunRanges[p]]: if recoRunRanges[prod][0] < recoRunRanges[p][1] and recoRunRanges[p][1] != sys.maxint: openProd = False gLogger.verbose( "Production %s was removed as redundant..." % str( prod ) ) recoList.remove( prod ) break if not openProd: continue recoRunList += [run for run in fullRunList if run not in recoRunList and run >= recoRunRanges[prod][0] and run <= recoRunRanges[prod][1]] gLogger.verbose( "List of runs matching Reco (%d): %s" % ( len( recoRunList ), str( sorted( recoRunList ) ) ) ) restrictToStripping = True if restrictToStripping and not openProd and stripList: runList = [] for prod in stripList: runList += [run for run in recoRunList if run not in runList and run >= stripRunRanges[prod][0] and run <= stripRunRanges[prod][1]] else: runList = recoRunList gLogger.verbose( "Final list of runs matching Reco and Stripping (%d): %s" % ( len( runList ), str( sorted( runList ) ) ) ) # Now get statistics from the runs info, runInfo = self._getStatsFromRuns( int( bkQuery.getEventTypeList()[0] ), runList, recoDQFlags ) rawInfo = StatInfo( processingPass[1], info ) prodStats.append( rawInfo ) if printResult: gLogger.info( "%s - All runs in Reco productions" % processingPass[1] ) for fileInfo in runInfo: if runInfo[fileInfo]: gLogger.info( "%s runs (%d): %s" % ( fileInfo, len( runInfo[fileInfo] ), str( runInfo[fileInfo] ) ) ) summStr = "%s files, " % rawInfo.getItemAsString( 'Files' ) summStr += "%s events in " % rawInfo.getItemAsString( 'Events' ) summStr += "%s runs, luminosity (pb-1):All=%s, Bad=%s, OK=%s" % ( rawInfo.getItemAsString( 'Runs' ), rawInfo.getItemAsString( 'Lumi' ), rawInfo.getItemAsString( 'BadLumi' ), rawInfo.getItemAsString( 'OKLumi' ) ) gLogger.info( summStr ) # Create the info for the 3 sets of productions prodSets = [] fileType = bkQuery.getFileTypeList()[0] prodSets.append( {'Name': processingPass[2], 'FileType': ['SDST', 'FULL.DST'], 'List':recoList, 'RunRange':recoRunRanges, 'MotherProds':None, 'AllReplicas':False } ) prodSets.append( {'Name': processingPass[3], 'FileType': fileType, 'List':stripList, 'RunRange':stripRunRanges, 'MotherProds':None, 'AllReplicas':True, 'StatForOK':False } ) prodSets.append( {'Name': "Merging (%s)" % fileType.split( '.' )[0], 'FileType': fileType, 'List':mergeList, 'RunRange':None, 'MotherProds':mergeStripProds, 'AllReplicas':False } ) prevInfo = rawInfo for prodSet in prodSets: info = StatInfo( prodSet['Name'], self._getProdInfo( prodSet, runList, printResult = printResult ) ) info.setRawInfo( rawInfo ) info.setPrevInfo( prevInfo ) prevInfo = info prodStats.append( info ) self.saveCache() return prodStats @staticmethod def __sumProdInfo( info, totInfo ): for inf in info: for flag in info[inf]: if inf == 'Runs': totInfo[inf][flag] = totInfo.setdefault( inf, {} ).setdefault( flag, [] ) + info[inf][flag] else: totInfo[inf][flag] = totInfo.setdefault( inf, {} ).setdefault( flag, 0 ) + info[inf][flag] return totInfo def _getProdInfo( self, prodSet, runList, printResult = False ): totInfo = {} if printResult: gLogger.info( "" ) for prod in prodSet['List']: info, runInfo = self._getStatsFromBK( prod, prodSet['FileType'], runList, prodSet['AllReplicas'] ) if info['Files'][''] == 0: continue if not prodSet.get( 'StatForOK', True ): for item in info: for fl in info[item]: if fl == 'OK': info[item][fl] = 0 if not item == 'Runs' else [] runRange = prodSet['RunRange'] if runRange and prod in runRange and runRange[prod][0] == 0 and runRange[prod][1] == 0: for flag in info['Runs']: info['Runs'][flag] = [] totInfo = self.__sumProdInfo( info, totInfo ) if printResult: summStr = "%s production %d -" % ( prodSet['Name'], prod ) if runRange and prod in runRange: firstRun = runRange[prod][0] lastRun = runRange[prod][1] if firstRun: summStr += " From run %d" % int( firstRun ) if lastRun and lastRun != sys.maxint: summStr += " Up to run %d" % int( lastRun ) if firstRun == 0 and lastRun == 0: summStr += "No run range specified" motherProds = prodSet['MotherProds'] if motherProds and prod in motherProds: summStr += " from productions %s" % motherProds[prod] gLogger.info( summStr ) for inf in runInfo: if runInfo[inf]: gLogger.info( "%s runs (%d): %s" % ( inf, len( runInfo[inf] ), str( runInfo[inf] ) ) ) summStr = "%d files, " % info['Files'][''] if info['Events']: summStr += "%d events in " % info['Events'][''] _msgTuple = ( len( info['Runs'][''] ), info['Lumi'][''], info['Lumi']['Bad'], info['Lumi']['OK'] ) summStr += "%d runs, luminosity (pb-1): All=%.3f, Bad=%.3f, OK=%.3f" % _msgTuple gLogger.info( summStr ) for flag in totInfo.get( 'Runs', [] ): totInfo['Runs'][flag] = len( totInfo['Runs'][flag] ) return totInfo @staticmethod def outputResults( conditions, processingPass, prodStats ): outputString = "" _msgTuple = ( conditions, ",", processingPass, "on", time.ctime( time.time() ) ) outputString += "\nProduction progress for %s %s %s %s %s\n" % _msgTuple if len( prodStats ) < 4: outputString += "No statistics found for this BK query" return outputString for i in xrange( 4 ): info = prodStats[i] if not info: continue name = info.getName() outputString += "\nSummary for %s\n" % name outputString += "%d files, " % info.getItem( 'Files' ) if info.getItem( 'Events' ): outputString += "%d events in " % info.getItem( 'Events' ) _msgTuple = ( info.getItem( 'Runs' ), info.getItem( 'Lumi' ), info.getItem( 'BadLumi' ), info.getItem( 'OKLumi' ) ) outputString += "%d runs, luminosity (pb-1): All=%.3f, Bad=%.3f, OK=%.3f\n" % _msgTuple prevStats = prodStats[:i] prevStats.reverse() for prevInfo in prevStats: name = prevInfo.getName() if prevInfo.getItem( 'Runs' ) == 0: outputString += "From %s : - No runs...\n" % name else: outputString += "From %s : %.1f%% files, " % ( name, 100.*info.getItem( 'Files' ) / prevInfo.getItem( 'Files' ) ) if info.getItem( 'Events' ) and prevInfo.getItem( 'Events' ): outputString += "%.1f%% events\n" % ( 100.*info.getItem( 'Events' ) / prevInfo.getItem( 'Events' ) ) outputString += "%.1f%% runs, %.1f%% luminosity\n" \ % ( 100. * info.getItem( 'Runs' ) / prevInfo.getItem( 'Runs' ), 100. * info.getItem( 'Lumi' ) / prevInfo.getItem( 'Lumi' ) ) return outputString def __getRunsDQFlag( self, runList, evtType ): res = self.bk.getRunFilesDataQuality( runList ) runFlags = {} if res['OK']: dqFlags = res['Value'] for dq in dqFlags: if dq[2] == evtType: runFlags.setdefault( dq[0], [] ).append( dq[1] ) runDQFlags = {} flags = ( 'BAD', 'OK', 'EXPRESS_OK', 'UNCHECKED' ) for run in runFlags: for fl in flags: if fl in runFlags[run]: runDQFlags[run] = fl break return runDQFlags def _getStatsFromRuns( self, evtType, runList, recoDQFlags ): info = dict.fromkeys( ( 'Events', 'Runs', 'Files', 'Lumi' ), {} ) for inf in info: info[inf] = dict.fromkeys( ( 'Bad', 'OK', '' ), 0 ) now = datetime.datetime.utcnow() # Set to True to renew the cache clearCache = 'RAW' in self.clearCache newRuns = [ run for run in runList if clearCache or run not in self.cachedInfo or 'DQFlag' not in self.cachedInfo[run] or ( now - self.cachedInfo[run]['Time'] ) < datetime.timedelta( days = 2 ) ] if newRuns: runFlags = self.__getRunsDQFlag( newRuns, evtType ) else: runFlags = {} runsByDQFlag = {} runInfo = {} for run in runList: cached = self.cachedInfo.get( run, {} ) cachedTime = cached.get( 'Time', None ) if run not in newRuns: cachedFiles = cached.get( 'Files', 0 ) cachedEvents = cached.get( 'EventStat', 0 ) cachedLumi = cached.get( 'Luminosity', 0 ) dqFlag = cached.get( 'DQFlag', None ) else: res = self.bk.getRunInformations( run ) if res['OK']: val = res['Value'] ind = val['Stream'].index( 90000000 ) cachedFiles = val['Number of file'][ind] cachedEvents = val['Number of events'][ind] cachedLumi = val['luminosity'][ind] cachedTime = val['RunStart'] else: gLogger.error( "Unable to get run information for run %s" % str( run ) ) continue dqFlag = runFlags[run] self.cachedInfo[run] = { 'Time':cachedTime, 'Files':cachedFiles, 'EventStat': cachedEvents, 'Luminosity': cachedLumi, 'DQFlag':dqFlag } runsByDQFlag[dqFlag] = runsByDQFlag.setdefault( dqFlag, 0 ) + 1 if dqFlag == "BAD": runInfo.setdefault( 'BAD', [] ).append( run ) elif dqFlag not in recoDQFlags and dqFlag != 'OK' : runInfo.setdefault( 'Untagged', [] ).append( run ) # Now count... flags = [] if dqFlag != 'BAD': flags.append( '' ) # OK in recoDQFlags means we take everything that is not BAD (reprocessing or new convention) if dqFlag in recoDQFlags or dqFlag == 'OK': flags.append( 'OK' ) else: flags.append( 'Bad' ) for flag in flags: info['Runs'][flag] += 1 info['Files'][flag] += cachedFiles info['Events'][flag] += cachedEvents info['Lumi'][flag] += cachedLumi # Set lumi in pb-1 for flag in info['Lumi']: info['Lumi'][flag] /= 1000000. gLogger.info( "Runs per flag:" ) for key in runsByDQFlag: gLogger.info( "%s : %d" % ( key, runsByDQFlag[key] ) ) for flag in runInfo: runInfo[flag].sort() return info, runInfo def __getLfnsMetadata( self, lfns ): lfnDict = {} if len( lfns ): gLogger.verbose( "Getting metadata for %d files" % len( lfns ) ) for lfnChunk in breakListIntoChunks( lfns, 1000 ): while True: res = self.bk.getFileMetadata( lfnChunk ) if not res['OK']: gLogger.error( "Error getting files metadata, retrying...", res['Message'] ) else: break metadata = res['Value']['Successful'] for lfn in lfnChunk: lfnDict[lfn] = {} for meta in ( 'EventStat', 'Luminosity', 'DQFlag', 'RunNumber' ): lfnDict[lfn][meta] = metadata[lfn][meta] return lfnDict def _getStatsFromBK( self, prod, fileType, runList, allReplicas ): bkQueryDict = { "ProductionID": prod, "FileType": fileType } bkStr = str( bkQueryDict ) bkQuery = BKQuery( bkQueryDict, visible = not allReplicas ) if allReplicas: bkQuery.setOption( 'ReplicaFlag', "All" ) cached = self.cachedInfo.get( bkStr, {} ) cachedTime = cached.get( 'Time', None ) cachedLfns = cached.get( 'Lfns', {} ) if isinstance( fileType, basestring ): fileType = [fileType] if set( fileType ).intersection( set( self.clearCache ) ): cachedTime = datetime.datetime.utcnow() - datetime.timedelta( days = 8 ) cachedTime = None cachedLfns = {} gLogger.verbose( "Cleared cache for production %s, file type %s" % ( str( prod ), fileType ) ) # Update if needed the cached information on LFNs if cachedLfns: lfns = [lfn for lfn in cachedLfns if cachedLfns[lfn].get( 'DQFlag' ) not in ( 'OK', 'BAD' )] for lfnChunk in breakListIntoChunks( lfns, 1000 ): # get the DQFlag of files that are not yet OK while True: res = self.bk.getFileMetadata( lfnChunk ) if not res['OK']: gLogger.error( "Error getting files metadata for cached files, bkQuery %s: %s" % ( bkStr, res['Message'] ) ) else: metadata = res['Value']['Successful'] for lfn in lfnChunk: cachedLfns[lfn]['DQFlag'] = metadata[lfn]['DQFlag'] break # Now get the new files since last time... if cachedTime: bkQuery.setOption( 'StartDate', cachedTime.strftime( '%Y-%m-%d %H:%M:%S' ) ) gLogger.verbose( "Getting files for BKQuery %s" % str( bkQuery ) ) cachedTime = datetime.datetime.utcnow() lfns = [lfn for lfn in bkQuery.getLFNs( printOutput = False ) if lfn not in cachedLfns] gLogger.verbose( "Returned %d files" % len( lfns ) ) cachedLfns.update( self.__getLfnsMetadata( lfns ) ) self.cachedInfo[bkStr] = { 'Time':cachedTime, 'Lfns':cachedLfns } # Now sum up all information for the files info = dict.fromkeys( ( 'Events', 'Runs', 'Files', 'Lumi' ), {} ) for inf in info: if inf == 'Runs': for flag in ( 'Bad', 'OK', '' ): info[inf][flag] = [] else: info[inf] = dict.fromkeys( ( 'Bad', 'OK', '' ), 0 ) for lfn in cachedLfns: lfnInfo = cachedLfns[lfn] run = lfnInfo['RunNumber'] if run in runList and run in self.cachedInfo and self.cachedInfo[run]['DQFlag'] != 'BAD': dqFlag = cachedLfns[lfn]['DQFlag'] flags = [] if dqFlag != 'BAD': flags.append( '' ) else: flags.append( 'Bad' ) if dqFlag == 'OK': flags.append( 'OK' ) for flag in flags: if run not in info['Runs'][flag]: info['Runs'][flag].append( run ) info['Files'][flag] += 1 info['Events'][flag] += lfnInfo['EventStat'] info['Lumi'][flag] += lfnInfo['Luminosity'] runInfo = {} if 'BAD' in info['Runs']: runInfo['BAD'] = info['Runs']['BAD'] runInfo['BAD'].sort() else: runInfo['BAD'] = [] if '' in info['Runs'] and 'OK' in info['Runs']: runInfo['Untagged'] = [run for run in info['Runs'][''] if run not in info['Runs']['OK']] runInfo['Untagged'].sort() else: runInfo['Untagged'] = [] # for f in info['Runs']: # info['Runs'][f] = len( info['Runs'][f] ) for flag in info['Lumi']: info['Lumi'][flag] /= 1000000. return info, runInfo def getPreviousStats( self, processingPass ): prevStats = self.cachedInfo.get( 'ProdStats', {} ).get( processingPass ) if prevStats: try: _name = prevStats['ProdStats'][0][0].getName() except: prevStats = None return prevStats def setPreviousStats( self, processingPass, prevStats ): self.cachedInfo.setdefault( 'ProdStats', {} )[processingPass] = prevStats self.saveCache() def readCache( self ): if not os.path.exists( self.prodStatFile ): gLogger.info( "Created cached file %s" % self.prodStatFile ) self.cachedInfo = {} self.saveCache() return fileRead = False while not fileRead: try: with FileLock( self.prodStatFile ): lFile = open( self.prodStatFile, 'r' ) cachedVersion = pickle.load( lFile ) startTime = time.time() if cachedVersion == self.cacheVersion: self.cachedInfo = pickle.load( lFile ) _msgTuple = ( self.prodStatFile, time.time() - startTime ) gLogger.info( "Loaded cached information from %s in %.3f seconds" % _msgTuple ) else: _msgTuple = ( cachedVersion, self.cacheVersion ) gLogger.info( "Incompatible versions of cache, reset information (%s, expect %s)" % _msgTuple ) self.cachedInfo = {} lFile.close() fileRead = True except FileLockException, error: gLogger.error( "Lock exception: %s while reading pickle file %s" % ( error, self.prodStatFile ) ) except: