def readCache(filename): # read the cache file and returns the list of the RunReports for all the cached files runReports = [] runNumbers = [] if os.path.exists(filename): print "reading cache file: " + filename cache = file(filename,"r") data = cache.readlines() for line in data: if line[0] != '#' and line != "": # read the relevant lines # print line items = line.split() # get the run # runCached = int(items[0]) runNumbers.append(runCached) # create the report runReport = RunReport(runCached) #runReport.setRunNumber(runCached) runReport.startTime = RunInfo.getDate(items[1] + " " + items[2]) runReport.stopTime = RunInfo.getDate(items[3] + " " + items[4]) runReport.pclRun = ast.literal_eval(items[5]) runReport.multipleFiles = ast.literal_eval(items[6]) runReport.hasPayload = ast.literal_eval(items[7]) runReport.hasUpload = ast.literal_eval(items[8]) runReport.uploadSucceeded = ast.literal_eval(items[9]) runReport.isOutofOrder = ast.literal_eval(items[10]) latencyJobFromEnd = float(items[11]) latencyStartCached = float(items[12]) latencyEndCached = float(items[13]) runReport.latencyJobFromEnd = latencyJobFromEnd runReport.latencyUploadFromStart = latencyStartCached runReport.latencyUploadFromEnd = latencyEndCached runReports.append(runReport) cache.close() return runNumbers, runReports
def readCache(filename): # read the cache file and returns the list of the RunReports for all the cached files runReports = [] runNumbers = [] if os.path.exists(filename): print "reading cache file: " + filename cache = file(filename,"r") data = cache.readlines() for line in data: if line[0] != '#' and line != "": # read the relevant lines items = line.split() # get the run # runCached = int(items[0]) runNumbers.append(runCached) # create the report rRep = RunReportTagCheck() rRep.setRunNumber(runCached) startCached = RunInfo.getDate(items[1] + " " + items[2]) rRep.setStartTime(startCached) stopCached = RunInfo.getDate(items[3] + " " + items[4]) rRep.setStopTime(stopCached) remaining = len(items) - 5 last = 4 #print runCached while remaining >= 1: record = data[0].split()[last] status = items[last+1] rRep.addRecordAndStatus(record,status) #print data[0].split() #print record, status last += 1 remaining -= 1 runReports.append(rRep) cache.close() return runNumbers, runReports
deltaTRunH = deltaTRun.seconds/(60.*60.) # FIXME if rRep.runNumber() > lastPromptRecoRun and not nextFound: pageWriter.setNextPromptReco(rRep.runNumber(), rRep.startTime(), rRep.stopTime(), deltaTRunH) nextFound = True producePlots(rcdReports, runReports, nextPromptRecoRun) status = monitorStatus.MonitorStatus('read') status.readJsonFile(webArea + "status.json") pageWriter.setBackendUpdateDate(status.getField('update')) pageWriter.statusSummary(status.getField('status'),status.getField('msg')) #update = datetime.datetime update = RunInfo.getDate(status.getField('update')) deltatfromend = datetime.datetime.today() - update # deltatfromendH = deltatfromend.days*24. + deltatfromend.seconds/(60.*60.) #print deltatfromendH pageWriter.setOldUpdateWarning(deltatfromend) pageWriter.buildPage(webArea) sys.exit(0)
def runBackEnd(): # --- get the prompt reco status from Tier0-DAS tier0Das = tier0DasInterface.Tier0DasInterface(config.tier0DasSrc) try: lastPromptRecoRun = tier0Das.firstConditionSafeRun() print "Tier0 DAS last run released for PROMPT: ", lastPromptRecoRun #print "Tier0 DAS first run safe for condition update:", tier0Das.firstConditionSafeRun() except Exception as error: print '*** Error 2: Tier0-DAS query has failed' print error return 102, "Error: Tier0-DAS query has failed: " + str(error) # -------------------------------------------------------------------------------- # find the file produced by PCL in the afs area fileList = os.listdir(config.promptCalibDir) # -------------------------------------------------------------------------------- # create a dictionary to store cached IOV tables for target tags in ORACLE oracleTables = {} # -------------------------------------------------------------------------------- # these will store the global outcome of the script tagsJson = pclMonitoringTools.PCLTagsJson('pclMonitor') retValues = 0, 'OK' for pclTag in config.pclTasks: print blue("---------------------------------------------------------------------") print "--- Check status for PCL workflow: " + blue(pclTag) # -------------------------------------------------------------------------------- # --- get the overview of the files produced by PCL for thsi workflow (cache + AFS area) print "Look for new files produced for this workflow:" pclFileEngine = pclMonitoringTools.PCLFileEngine(config.promptCalibDir, pclTag) pclFileEngine.readFromCache() lastUploadTime = datetime.datetime(1960, 01, 01, 00, 00, 00) for pclFileName in fileList: if '.db' in pclFileName: if pclTag in pclFileName: outFile = pclMonitoringTools.PCLOutputFile(config.promptCalibDir, pclFileName) if pclFileEngine.isNewFile(outFile): print " -- New file found for this workflow: " + pclFileName # 1. check if the file contains a paylaod if outFile.checkPayload(pclTag): print " file contains paylaods" if outFile.isUploaded: print " file has been uploaded to DropBox" # FIXME: need sorting of the files? # 2. check if the file was uploaded out of order if outFile.checkOutOfOrder(lastUploadTime): print " file uploaded OOO" # FIXME: where? lastUploadTime = outFile.uploadTime # ------------------------------------------------------------------- # 3. check for the IOVs in ORACLE # get the target tag in oracle from the Drop-Box metadata metadatamap = outFile.getDropBoxMetadata() #print metadatamap targetOracleTag = metadatamap['destinationTags'].keys()[0] targetOracleConnect = metadatamap['destinationDatabase'] # check for online connection strings if 'oracle://cms_orcon_prod' in targetOracleConnect: targetOracleConnect = 'oracle://cms_orcon_adg/CMS_COND_'+metadatamap['destinationDatabase'].split('CMS_COND_')[1] print " Target tag in Oracle:",targetOracleTag,'in account',targetOracleConnect # list IOV for the target tag (cache the list of IOV by pclTag: in case has not changed there is no need to re-run listIOv) iovtable_oracle = gtTools.IOVTable() if not targetOracleTag in oracleTables: # IOV for this tag in ORACLE has not yet been cached -> will now list IOV listiov_oracle = gtTools.listIov(targetOracleConnect, targetOracleTag, config.passwdfile) print " listing IOV..." if listiov_oracle[0] == 0: iovtable_oracle.setFromListIOV(listiov_oracle[1]) oracleTables[targetOracleTag] = iovtable_oracle else: # get the IOV from the cache dictionary print " getting IOV list from cache..." iovtable_oracle = oracleTables[targetOracleTag] if outFile.checkOracleExport(iovtable_oracle): print " file correctly exported to Oracle" print " file status: " + outFile.status() pclFileEngine.addNewFile(outFile) pclFileEngine.writeToCache() tagReport = pclMonitoringTools.PclTagReport(pclTag) cacheFileName = 'pclMonitor_' + pclTag + '.cache' # -------------------------------------------------------------------------------- # --- read the cache cachedRuns, runReports = pclMonitoringTools.readCache(cacheFileName) unknownRun = False # this is set to true only if one run can not be processed unknownRunMsg = '' # -------------------------------------------------------------------------------- # --- get the last cached run if len(cachedRuns) != 0: cachedRuns.sort() else: cachedRuns.append(firstRunToMonitor) # get the list of runs to be refreshed (< 24h old) runsToBeRefreshed = [] reportsToBeRefreshed = [] last2days = datetime.timedelta(days=config.refreshDays) twdaysago = datetime.datetime.today() - last2days for rep in runReports: if rep.startTime >= twdaysago: runsToBeRefreshed.append(rep.runNumber) reportsToBeRefreshed.append(rep) #print rep.runNumber() #print "start: " + str(rep.startTime()) + " less than " + str(twdaysago) #remove the list of reports and runs to be refreshed from the cahced ones for rep in reportsToBeRefreshed: cachedRuns.remove(rep.runNumber) runReports.remove(rep) lastCachedRun = config.firstRunToMonitor if(len(cachedRuns) != 0): lastCachedRun = cachedRuns[len(cachedRuns)-1] runsToBeRefreshed.sort(reverse=True) print "last cached run #: " + str(lastCachedRun) print "runs to be refreshed: " + str(runsToBeRefreshed) # -------------------------------------------------------------------------------- # --- get the list of collision runs from RR (only for the runs not yet cached) runList = [] try: #runList = RunRegistryTools.getRunList(lastCachedRun+1) #runList2 = RunRegistryTools.getRunListRR3(lastCachedRun+1, "Express", "Collisions12") # FIXME: the run-types can be specialized "by tag" runList = RunRegistryTools.getRunListRR3(lastCachedRun+1, config.rrDatasetName, config.rrRunClassName) #print runList #print runList2 runList.sort(reverse=True) except Exception as error: print '*** Error 1: RR query has failed' print error return 101, "Error: failed to get collision runs from RunRegistry: " + str(error) print "run list from RR: " + str(runList) if len(runList) < len(runsToBeRefreshed): print "Warning: list from RR is fishy...using the previous one!" retValues = 1, 'Warning: list from RR is fishy...using the previous one!' runList = runsToBeRefreshed # -------------------------------------------------------------------------------- # --- Get the run-info information for the interesting run-range if len(runList) > 0: print "Accessing runInfo...may take a while..." runInfoList = None try: runInfoList = RunInfo.getRunInfoStartAndStopTime(config.runInfoTag_stop, config.runInfoConnect, runList[len(runList)-1], runList[0]) except Exception as error: print "*** Error can not query run-info for runs between: " + str(runList[0]) + " and " + str(runList[len(runList)-1]) + " error: " + str(error) raise Exception("Error can not query run-info for runs between: " + str(runList[0]) + " and " + str(runList[len(runList)-1]) + " error: " + str(error)) # -------------------------------------------------------------------------------- # run on runs not yet cached # FIXME: remove? lastUploadDate = None isLastProcessed = True for run in runList: statusValues = 0, 'OK' runInfo = None # look for the RunInfo corresponding to this run matches = [runI for runI in runInfoList if int(runI.run()) == int(run)] if len(matches) == 0: # try to get the payload from runinfo_start: this run might still be ongoing try: runInfo = RunInfo.getRunInfoStartAndStopTime(config.runInfoTag_start, config.runInfoConnect, run, run) except Exception as error: print "*** Error can not query run-info for run: " + str(run) + " error: " + str(error) raise Exception("Error can not query run-info for run: " + str(run) + " error: " + str(error)) elif len(matches) == 1: runInfo = matches[0] else: print "***Error: more than one match (" + str(len(matches)) + " in run-info for run: " + str(run) raise Exception("***Error: more than one match (" + str(len(matches)) + " in run-info for run: " + str(run)) # get the run report for all the records run in PCL (dictionary) rRep = None try: rRep = pclMonitoringTools.getRunReport(pclTag, run, runInfo, fileList, oracleTables, lastUploadDate) except pclMonitoringTools.OngoingRunExcept as error: print error except Exception as error: unknownRun = True unknownRunMsg = "Error: can not get report for run: " + str(run) + ", reason: " + str(error) print unknownRunMsg else: runReports.append(rRep) # --- Assign the status for this run based on the RunReport # NOTE: the 'isLastProcessed' flag is meant not to flag as problematic runs that are just waiting to be processed. # as soon as a run is uploaded or Tier0 tries to uplaod it all the following runs should have the flag set to false # the same is true if the "age" of the run is > than 12h (time-out on the Tier0 side to launch PCL) # here the logic is based on the reverse order of the runs in the list if rRep.hasUpload or rRep.stopTimeAge() > 12: isLastProcessed = False #print run, isLastProcessed # these are assigned before any other since they are only warning and they could in principle be overwritten by error statuses if rRep.multipleFiles: statusValues = 998, "PCL run multiple times for run: " + str(rRep.runNumber) if rRep.isOutOfOrder: statusValues = 999, "PCL run out of order for run: " + str(rRep.runNumber) # assign the status to this run if not rRep.pclRun: if not isLastProcessed: statusValues = 1001, "PCL not run for run: " + str(rRep.runNumber) elif not rRep.hasPayload: statusValues = 1002, "PCL produced no paylaod for run: " + str(rRep.runNumber) elif not rRep.hasUpload: if not isLastProcessed: statusValues = 1003, "PCL did not upload paylaod for run: " + str(rRep.runNumber) elif not rRep.uploadSucceeded: statusValues = 1004, "Upload to DB failed for run: " + str(rRep.runNumber) # strore the status of the problemati runs only if statusValues[0] != 0: tagReport.addRunStatus(rRep.runNumber, statusValues) #print statusValues runReports.sort(key=lambda rr: rr.runNumber) # ----------------------------------------------------------------- # ---- cache the results for runs older than 48h and write the log for the web logFileName = '/log_' + pclTag + '.txt' pclMonitoringTools.writeCacheAndLog(cacheFileName, config.webArea + logFileName, runReports) # --- add the reports for this tag to the Json tagsJson.addTag(pclTag, tagReport) # --- write the reports for all the tags to a JSON tagsJson.writeJsonFile(config.webArea) # FIXME: find a logic for many records status = retValues[0] message = retValues[1] if status == 0 and unknownRun: status = 10 message = unknownRunMsg return status, message
def getRunReport(runinfoTag, run, promptCalibDir, fileList, iovtableByRun_oracle, iovtableByLumi_oracle): #print run # input: runInfoTag, run, fileList, iovlist runInfo = None try: runInfo = RunInfo.getRunInfoStartAndStopTime(runinfoTag, '', run) except Exception as error: print "*** Error can not find run: " + str(run) + " in RunInfo: " + str(error) raise Exception("Error can not find run: " + str(run) + " in RunInfo: " + str(error)) rRep = RunReport() rRep.setRunNumber(runInfo.run()) rRep.setRunInfoContent(runInfo) deltaTRun = runInfo.stopTime() - runInfo.startTime() deltaTRunH = deltaTRun.days*24. + deltaTRun.seconds/(60.*60.) print "-- run #: " + colorTools.blue(runInfo.run()) print " start: " + str(runInfo.startTime()) + " stop: " + str(runInfo.stopTime()) + " lenght (h): " + str(deltaTRunH) # --- status flags for this run isFileFound = False emptyPayload = True isOutOfOrder = False allLumiIOVFound = False # --- look for the file on AFS fileName = "" fileForRun = [] # find the files associated to this run: for dbFile in fileList: if str(run) in dbFile: fileForRun.append(dbFile) if len(fileForRun) == 0: print " " + colorTools.warning("***Warning") + ": no sqlite file found!" isFileFound = False elif len(fileForRun) > 1: print " " + colorTools.warning("***Warning") + ": more than one file for this run!" for dbFile in fileForRun: modifDate = datetime.datetime.fromtimestamp(os.path.getmtime(promptCalibDir + dbFile)) print ' ',dbFile,'time-stamp:',modifDate for dbFile in fileForRun: isFileFound = True if isFileFound and not emptyPayload and isFileFound: # in this case the file was already identified continue print " file: " + dbFile modifDate = datetime.datetime.fromtimestamp(os.path.getmtime(promptCalibDir + dbFile)) rRep.setJobTime(modifDate) # # check this is not older than the one for the following run # if isFirst or modifDate < lastDate: # lastDate = modifDate # isFirst = False # isOutOfOrder = False # else: # print " " + warning("Warning: ") + " this comes after the following run!!!" # isOutOfOrder = True # delta-time from begin of run deltaTFromBegin = modifDate - runInfo.startTime() deltaTFromBeginH = deltaTFromBegin.days*24. + deltaTFromBegin.seconds/(60.*60.) # delta-time from end of run deltaTFromEndH = 0.01 if(modifDate > runInfo.stopTime()): deltaTFromEnd = modifDate - runInfo.stopTime() deltaTFromEndH = deltaTFromEnd.days*24. + deltaTFromEnd.seconds/(60.*60.) print " file time: " + str(modifDate) + " Delta_T begin (h): " + str(deltaTFromBeginH) + " Delta_T end (h): " + str(deltaTFromEndH) rRep.setLatencyFromBeginning(deltaTFromBeginH) rRep.setLatencyFromEnd(deltaTFromEndH) # check the file size fileSize = os.path.getsize(promptCalibDir + dbFile) if fileSize == 1 or fileSize == 32768: emptyPayload = True print " " + colorTools.warning("***Warning") + ": no payload in sqlite file!" else: emptyPayload = False # list the iov in the tag connect = "sqlite_file:" + promptCalibDir + dbFile listiov_run_sqlite = gtTools.listIov(connect, tagRun, '') if listiov_run_sqlite[0] == 0: iovtableByRun_sqlite = gtTools.IOVTable() iovtableByRun_sqlite.setFromListIOV(listiov_run_sqlite[1]) #iovtableByRun_sqlite.printList() for iov in iovtableByRun_sqlite._iovList: iovOracle = gtTools.IOVEntry() if iovtableByRun_oracle.search(iov.since(), iovOracle): print " runbased IOV found in Oracle!" #print iovOracle else: print " " + colorTools.warning("Warning:") + " runbased IOV not found in Oracle" missingIOV = False listiov_lumi_sqlite = gtTools.listIov(connect, tagLumi, '') if listiov_lumi_sqlite[0] == 0: iovtableByLumi_sqlite = gtTools.IOVTable() iovtableByLumi_sqlite.setFromListIOV(listiov_lumi_sqlite[1]) #iovtableByLumi_sqlite.printList() counterbla = 0 for iov in iovtableByLumi_sqlite._iovList: iovOracle = gtTools.IOVEntry() if not iovtableByLumi_oracle.search(iov.since(), iovOracle): #print " Lumi based IOV found in Oracle:" #print iovOracle counterbla += 1 print " " + colorTools.warning("Warning:") + " lumibased IOV not found in Oracle for since: " + str(iov.since()) missingIOV = True else: raise Exception("Error can not list IOV for file",connect) if not missingIOV: allLumiIOVFound = True print " All lumibased IOVs found in oracle!" else: allLumiIOVFound = False print " " + colorTools.warning("Warning:") + " not all lumibased IOVs found in Oracle!!!" # fill the run-report for this run if not isFileFound: rRep.sqliteFound(False) else: rRep.sqliteFound(True) if isOutOfOrder: rRep.isOutoforder(True) else: rRep.isOutoforder(False) if emptyPayload: rRep.payloadFound(False) else: rRep.payloadFound(True) if not allLumiIOVFound: rRep.isUploaded(False) else: rRep.isUploaded(True) return rRep
def runBackEnd(): # this will store the exit status for the json report retValues = 0, "OK" # flag in case we miss the info for some runs unknownRun = False unknownRunMsg = "" tier0Das = tier0DasInterface.Tier0DasInterface(tier0DasSrc) try: nextPromptRecoRun = tier0Das.firstConditionSafeRun() print "Tier0 DAS next run for prompt reco:",nextPromptRecoRun gtFromPrompt = tier0Das.promptGlobalTag(referenceDataset) print " GT for dataset: ", referenceDataset, "run:", str(nextPromptRecoRun), ":", gtFromPrompt except Exception as error: print '*** Error: Tier0-DAS query has failed' print error return 102, "Error: Tier0-DAS query has failed: " + str(error) print len(gtFromPrompt) if(len(gtFromPrompt) == 0): return 202, "No " + referenceDataset + " datset for run: " + str(nextPromptRecoRun) + " -> failed to get the GT name" gtName = gtFromPrompt.split('::')[0] gtConfFile = gtName + '.conf' if not gtTools.confFileFromDB(gtName, gtConfFile, gtconnstring, passwdfile): return 201, "GT: " + gtFromPrompt + " could not be found in ORACLE!" # create the collection of tags tagCollection = gtTools.GTEntryCollection() gtTools.fillGTCollection(gtName+'.conf', gtName, tagCollection) tagsTomonitor = [] print "Tags to be monitored: " for record in monitoredrecords.split(','): label = '' if ':' in record: label = record.split(':')[1] record = record.split(':')[0] rcdId = gtTools.RcdID([record, label]) if not tagCollection.hasRcdID(rcdId): print "Error: rcd: " + rcdIdn + " not found in GT: " + gtName else: print ' ', tagCollection.getByRcdID(rcdId) tagsTomonitor.append(tagCollection.getByRcdID(rcdId)) # -------------------------------------------------------------------------------- # --- read the cache allCachedRuns = o2oMonitoringTools.readCache(cacheFileName) cachedRuns = allCachedRuns[0] runReports = allCachedRuns[1] unknownRun = False # this is set to true only if one run can not be processed unknownRunMsg = '' # -------------------------------------------------------------------------------- # --- get the last cached run if len(cachedRuns) != 0: cachedRuns.sort() else: cachedRuns.append(1) lastCachedRun = cachedRuns[len(cachedRuns)-1] #lastCachedRun = 191419 print "last cached run #: " + str(lastCachedRun) # -------------------------------------------------------------------------------- # --- get the list of collision runs from RR (only for the runs not yet cached) runList = [] try: # FIXME: do we need to restrict to Collision12? #runList = RunRegistryTools.getRunListRR3(lastCachedRun+1,"Online", "Commissioning12") runList = RunRegistryTools.getRunListRR3(lastCachedRun+1, rrDatasetName, rrRunClassName) except Exception as error: print '*** Error 1: RR query has failed' print error return 101, "Error: failed to get collision runs from RunRegistry: " + str(error) print runList # -------------------------------------------------------------------------------- # --- check O2O and DB tag status for each record threshold = datetime.timedelta(hours=int(thresholdLastWrite)) thresholdSince = datetime.timedelta(hours=int(thresholdLastSince)) today = datetime.datetime.today() fromUTCToLocal = datetime.timedelta(hours=2) recordandlastsince = {} tableTitle = ["# run", "start-time", "end-time"] rcdJson = o2oMonitoringTools.O2ORecordJson("o2oMonitor") for entry in tagsTomonitor: print "- Tag:", entry tagName = entry.tagName() accountName = entry.account() recordName = entry.record() tableTitle.append(recordName) # create the report for this given record rcdRep = o2oMonitoringTools.RecordReport(recordName) rcdRep.setTagAndAccount(tagName, accountName) nDays = 1 nSec = nDays*24*60*60 popLog = popConLog.PopCon_Monitoring_last_updates(interval=nSec) # 0. get the last time the O2O run o2oLogfiles = {} for rcdEntry in o2oLogfileList.split(','): key = rcdEntry.split(':')[0] logFileForKey = rcdEntry.split(':')[1] o2oLogfiles[key] = logFileForKey jobData = popLog.PopConJobRunTime(authfile=passwdfile + "/authentication.xml", logFile=o2oLogfiles[recordName]) if len(jobData) != 0: lastO2ORun = jobData[0][0] + fromUTCToLocal previouO2ORun = jobData[0][1] + fromUTCToLocal runO2OAge = today - lastO2ORun statusForRpt = "OK" print " - Last O2O run on: " + str(lastO2ORun) + " (" + str(runO2OAge) + " ago)" if runO2OAge > 2*(lastO2ORun - previouO2ORun): print " " + colorPrintTools.error("Error") + ": the O2O for rcd " + recordName + " is not running since a while (" + str(runO2OAge) + ")" statusForRpt = "ERROR" if 2050 > retValues[0]: retValues = 2050, "Error: the O2O for rcd " + recordName + " is not running since a while (" + str(runO2OAge) + ")" rcdRep.setLastO2ORun(lastO2ORun, runO2OAge, statusForRpt) else: print "Error: No O2O job logs for tag: " + tagName + " in account: " + accountName + " could be found in the PopConLogs" if 2051 > retValues[0]: retValues = 2051, "Error: no O2O job logs found for rcd: " + recordName # 1. get the last updates from PopConLogger # FIXME: the auth.xml can it be read from a central place? logData = popLog.PopConRecentActivityRecorded(authfile=passwdfile + "/authentication.xml", account=accountName, iovtag=tagName) if len(logData['data']) != 0: datestring = logData['data'][0][1] status = logData['data'][0][6] token = logData['data'][0][8] #datelastupdate = datetime.datetime.strptime(datestring,"%B, %dnd %Y %H:%M:%S") + fromUTCToLocal datelastupdate = dateutil.parser.parse(datestring) + fromUTCToLocal updateage = today - datelastupdate statusForRpt = "OK" print " - Last O2O wrote on: " + str(datelastupdate) + " (" + str(updateage) + " ago)" print " status:",status, "payload token:", token.split("<br>")[4] if updateage > threshold: print " " + colorPrintTools.warning("Warning") + ": O2O is not writing since a while!" statusForRpt = "OLD" if 2001 > retValues[0]: retValues = 2001, "Warning: the O2O for rcd: " + recordName + " is not writing since a while!" if status != 'OK': print " Warning: O2O status is: " + status + "!" statusForRpt = "ERROR" if 2002 > retValues[0]: retValues = 2002, "Error: the O2O status for rcd: " + recordName + " is " + status + "!" rcdRep.setLastO2OWrite(datelastupdate, updateage, statusForRpt) else: print "Error: No O2O updates to tag: " + tagName + " in account: " + accountName + " could be found in the PopConLogs" if 2010 > retValues[0]: retValues = 2010, "Error: no O2O updates logged for rcd: " + recordName #rcdRep.setLastO2OWrite(datelastupdate, updateage, "ERROR") # 2. check the status of the tag outputAndStatus = gtTools.listIov(entry.getOraclePfn(False), tagName, passwdfile) iovtable = gtTools.IOVTable() iovtable.setFromListIOV(outputAndStatus[1]) datesince = iovtable.lastIOV().sinceDate() sinceage = today - datesince print " - Last IOV since:", datesince, "(" + str(sinceage),"ago)" print " with token: [" + iovtable.lastIOV().token() +"]"#.split("][")[4] recordandlastsince[recordName] = datesince #print iovtable.lastIOV() stat = "OK" if sinceage > thresholdSince: stat = "OLD" if 2101 > retValues[0]: retValues = 2101, "Error: the last IOV of rcd: " + recordName + " is OLD (since: " + str(datesince) + ")!" rcdRep.setLastSince(datesince, sinceage, stat) rcdJson.addRcd(gtTools.RcdID([recordName,""]), rcdRep) # -------------------------------------------------------------------------------- # --- Write the Rcd status to cache rcdJson.writeJsonFile(webArea) # -------------------------------------------------------------------------------- # --- check the status for each of the Collision runs for run in runList: if run == 167551:continue print "-- run #: " + colorPrintTools.blue(str(run)) #print run # get the information from runInfo runInfo = None try: runInfo = RunInfo.getRunInfoStartAndStopTime(runinfoTag, "", run) except Exception as error: print '*** Error XXX: RunInfo query failed!' print error unknownRun = True unknownRunMsg = "Error: can not get report for run: " + str(run) + ", since run-info query failed: " + str(error) print unknownRunMsg continue rRep = o2oMonitoringTools.RunReportTagCheck() rRep.setRunNumber(runInfo.run()) #rRep.setStartTime(runInfo.startTime()) rRep.setRunInfoContent(runInfo) deltaTRun = runInfo.stopTime() - runInfo.startTime() deltaTRunH = deltaTRun.seconds/(60.*60.) print " start: " + str(runInfo.startTime()) + " stop: " + str(runInfo.stopTime()) + " lenght (h): " + str(deltaTRunH) #pageWriter.setNextPromptReco(runInfo.run(), runInfo.startTime(), runInfo.stopTime(), deltaTRunH) for entry in tagsTomonitor: recordName = entry.record() datesince = recordandlastsince[recordName] if runInfo.stopTime() <= datesince: print " rcd: ", recordName,":",colorPrintTools.ok("OK") rRep.addRecordAndStatus(recordName, 0) elif runInfo.startTime() < datesince and runInfo.stopTime() > datesince: print " rcd: ", recordName,":",colorPrintTools.warning("partially covered!") rRep.addRecordAndStatus(recordName, 0.5) else: print " rcd: ", recordName,":",colorPrintTools.error("not covered!") rRep.addRecordAndStatus(recordName, 1) runReports.append(rRep) # print "---------------------------------------------------------" #print gtEntry, #print " # of updates:", len(logData['data']) #if gtEntry.updateType() != 1: # listofchanges.append(str(gtEntry) + " # of updates: " + str(len(logData['data']))) #else: # listofchangesO2O.append(str(gtEntry) + " # of updates: " + str(len(logData['data']))) # -------------------------------------------------------------------------------- # --- write to cache and to log runReports.sort(key=lambda rr: rr._runnumber) tableForCache =[] tableForCache.append(tableTitle) tableForLog =[] tableForLog.append(tableTitle) for rep in runReports: if int(rep.runNumber()) < int(nextPromptRecoRun): tableForCache.append(rep.getList()) tableForLog.append(rep.getList()) #out = sys.stdout print "writing cache file: " + cacheFileName cacheFile = file(cacheFileName,"w") tableWriter.pprint_table(cacheFile, tableForCache) cacheFile.close() out = sys.stdout logFile = file(webArea + "log.txt","w") tableWriter.pprint_table(logFile, tableForLog) logFile.close() status = retValues[0] message = retValues[1] if status == 0 and unknownRun: status = 10 message = unknownRunMsg return status, message