Example #1
0
        hist.GetYaxis().SetNdivisions(1)
        hist.Draw("COL")
        lineLastPromptRecoEnd.Draw("same")
        newlegend.Draw("same")
        c4.Print(webArea + 'c' + record + '.png')
        #raw_input ("Enter to quit")




    

if __name__ == "__main__":

    # get the run reports from the file
    allCachedRuns = o2oMonitoringTools.readCache(webArea + "log.txt")
    cachedRuns = allCachedRuns[0]
    runReports = allCachedRuns[1]



    tier0Das = tier0DasInterface.Tier0DasInterface(tier0DasSrc) 
    try:
        nextPromptRecoRun = tier0Das.firstConditionSafeRun()
        lastPromptRecoRun = tier0Das.lastPromptRun()

        print "Tier0 DAS next run for prompt reco:",nextPromptRecoRun
        #gtFromPrompt = tier0Das.promptGlobalTag(nextPromptRecoRun, referenceDataset)
        #print "      GT for dataset: ", referenceDataset, "run:", str(nextPromptRecoRun), ":", gtFromPrompt
    except Exception as error:
        print '*** Error 2: Tier0-DAS query has failed'
Example #2
0
def runBackEnd():

    # this will store the exit status for the json report
    retValues = 0, "OK"
    # flag in case we miss the info for some runs 
    unknownRun = False
    unknownRunMsg = ""

    tier0Das = tier0DasInterface.Tier0DasInterface(tier0DasSrc) 
    try:
        nextPromptRecoRun = tier0Das.firstConditionSafeRun()
        print "Tier0 DAS next run for prompt reco:",nextPromptRecoRun
        gtFromPrompt = tier0Das.promptGlobalTag(referenceDataset)
        print "      GT for dataset: ", referenceDataset, "run:", str(nextPromptRecoRun), ":", gtFromPrompt
    except Exception as error:
        print '*** Error: Tier0-DAS query has failed'
        print error
        return 102, "Error: Tier0-DAS query has failed: " + str(error)

    print len(gtFromPrompt) 
    if(len(gtFromPrompt) == 0):
        return 202, "No " + referenceDataset + " datset for run: " + str(nextPromptRecoRun) + " -> failed to get the GT name"
    gtName = gtFromPrompt.split('::')[0]
    gtConfFile = gtName + '.conf'

    
    if not gtTools.confFileFromDB(gtName, gtConfFile, gtconnstring, passwdfile):
        return 201, "GT: " + gtFromPrompt + " could not be found in ORACLE!"
    

    # create the collection of tags
    tagCollection = gtTools.GTEntryCollection()
    gtTools.fillGTCollection(gtName+'.conf', gtName, tagCollection)
    
    tagsTomonitor = []

    print "Tags to be monitored: "
    for record in monitoredrecords.split(','):
        label = ''
        if ':' in record:
            label = record.split(':')[1]
            record = record.split(':')[0]
        rcdId =  gtTools.RcdID([record, label])
        if not tagCollection.hasRcdID(rcdId):
            print "Error: rcd: " + rcdIdn + " not found in GT: " + gtName
        else:
            print '   ', tagCollection.getByRcdID(rcdId)
            tagsTomonitor.append(tagCollection.getByRcdID(rcdId))


    # --------------------------------------------------------------------------------
    # --- read the cache
    allCachedRuns = o2oMonitoringTools.readCache(cacheFileName)
    cachedRuns = allCachedRuns[0]
    runReports = allCachedRuns[1]

    unknownRun = False # this is set to true only if one run can not be processed
    unknownRunMsg = ''
    # --------------------------------------------------------------------------------
    # --- get the last cached run
    if len(cachedRuns) != 0:
        cachedRuns.sort()
    else:
        cachedRuns.append(1)

    lastCachedRun = cachedRuns[len(cachedRuns)-1]
    #lastCachedRun = 191419
    print "last cached run #: " + str(lastCachedRun)

    # --------------------------------------------------------------------------------
    # --- get the list of collision runs from RR (only for the runs not yet cached)
    runList = []
    try:
        # FIXME: do we need to restrict to Collision12?
        #runList = RunRegistryTools.getRunListRR3(lastCachedRun+1,"Online", "Commissioning12")
        runList = RunRegistryTools.getRunListRR3(lastCachedRun+1, rrDatasetName, rrRunClassName)

    except Exception as error:
        print '*** Error 1: RR query has failed'
        print error
        return 101, "Error: failed to get collision runs from RunRegistry: " + str(error)

    print runList


    # --------------------------------------------------------------------------------
    # --- check O2O and DB tag status for each record
    threshold = datetime.timedelta(hours=int(thresholdLastWrite))
    thresholdSince = datetime.timedelta(hours=int(thresholdLastSince))

    today = datetime.datetime.today()
    fromUTCToLocal = datetime.timedelta(hours=2)
    recordandlastsince = {}

    tableTitle = ["# run", "start-time", "end-time"]

    rcdJson = o2oMonitoringTools.O2ORecordJson("o2oMonitor")

    for entry in tagsTomonitor:
        print "- Tag:", entry
        
        tagName = entry.tagName()
        accountName = entry.account()
        recordName = entry.record()

        tableTitle.append(recordName)

        # create the report for this given record
        rcdRep = o2oMonitoringTools.RecordReport(recordName)
        rcdRep.setTagAndAccount(tagName, accountName)


        nDays = 1
        nSec = nDays*24*60*60
        popLog = popConLog.PopCon_Monitoring_last_updates(interval=nSec)

        # 0. get the last time the O2O run
        o2oLogfiles = {}

        for rcdEntry in o2oLogfileList.split(','):
            key = rcdEntry.split(':')[0]
            logFileForKey = rcdEntry.split(':')[1]
            o2oLogfiles[key] = logFileForKey
        jobData = popLog.PopConJobRunTime(authfile=passwdfile + "/authentication.xml",
                                          logFile=o2oLogfiles[recordName])

        if len(jobData) != 0:
            lastO2ORun = jobData[0][0] + fromUTCToLocal
            previouO2ORun = jobData[0][1] + fromUTCToLocal
            runO2OAge = today - lastO2ORun

            statusForRpt = "OK"

            print "  - Last O2O run on: " + str(lastO2ORun) + " (" + str(runO2OAge) + " ago)"
            if  runO2OAge > 2*(lastO2ORun - previouO2ORun):
                print "      " + colorPrintTools.error("Error") + ": the O2O for rcd " + recordName + " is not running since a while (" + str(runO2OAge) + ")"
                statusForRpt = "ERROR"
                if 2050 > retValues[0]:
                    retValues = 2050, "Error: the O2O for rcd " + recordName + " is not running since a while (" + str(runO2OAge) + ")"

            rcdRep.setLastO2ORun(lastO2ORun, runO2OAge, statusForRpt)
        else:
            print "Error: No O2O job logs for tag: " + tagName + " in account: " + accountName + " could be found in the PopConLogs"
            if 2051 > retValues[0]:
                retValues = 2051, "Error: no O2O job logs found for rcd: " + recordName



        # 1. get the last updates from PopConLogger
        # FIXME: the auth.xml can it be read from a central place?
        logData = popLog.PopConRecentActivityRecorded(authfile=passwdfile + "/authentication.xml",
                                                      account=accountName,
                                                      iovtag=tagName)

        

        if len(logData['data']) != 0:
            datestring = logData['data'][0][1]
            status = logData['data'][0][6]
            token =  logData['data'][0][8]
            #datelastupdate = datetime.datetime.strptime(datestring,"%B, %dnd %Y  %H:%M:%S") + fromUTCToLocal
            datelastupdate = dateutil.parser.parse(datestring) + fromUTCToLocal

            updateage = today - datelastupdate
            statusForRpt = "OK"
            print "  - Last O2O wrote on: " + str(datelastupdate) + " (" +  str(updateage) + " ago)"
            print "    status:",status, "payload token:", token.split("<br>")[4]
            if updateage > threshold:
                print "      " + colorPrintTools.warning("Warning") + ": O2O is not writing since a while!"
                statusForRpt = "OLD"
                if 2001 > retValues[0]:
                    retValues = 2001, "Warning: the O2O for rcd: " + recordName + " is not writing since a while!"
            if status != 'OK':
                print "      Warning: O2O status is: " + status + "!"
                statusForRpt = "ERROR"
                if 2002 > retValues[0]:
                    retValues = 2002, "Error: the O2O status for rcd: " + recordName + " is " + status + "!"
                
            rcdRep.setLastO2OWrite(datelastupdate, updateage, statusForRpt)
        else:
            print "Error: No O2O updates to tag: " + tagName + " in account: " + accountName + " could be found in the PopConLogs"
            if 2010 > retValues[0]:
                retValues = 2010, "Error: no O2O updates logged for rcd: " + recordName

            #rcdRep.setLastO2OWrite(datelastupdate, updateage, "ERROR")

        


            
        # 2. check the status of the tag
        outputAndStatus = gtTools.listIov(entry.getOraclePfn(False), tagName, passwdfile)
        iovtable = gtTools.IOVTable()
        iovtable.setFromListIOV(outputAndStatus[1])
        datesince = iovtable.lastIOV().sinceDate()
        sinceage = today - datesince
        print "  - Last IOV since:", datesince, "(" + str(sinceage),"ago)"
        print "    with token: [" + iovtable.lastIOV().token() +"]"#.split("][")[4]
        recordandlastsince[recordName] = datesince
        #print iovtable.lastIOV()
        stat = "OK"
        if sinceage > thresholdSince:
            stat = "OLD"
            if 2101 > retValues[0]:
                retValues = 2101, "Error: the last IOV of rcd: " + recordName + " is OLD (since: " + str(datesince) + ")!"
        rcdRep.setLastSince(datesince, sinceage, stat)


        rcdJson.addRcd(gtTools.RcdID([recordName,""]), rcdRep)


    # --------------------------------------------------------------------------------
    # --- Write the Rcd status to cache
    rcdJson.writeJsonFile(webArea)
    

    # --------------------------------------------------------------------------------
    # --- check the status for each of the Collision runs
    for run in runList:
        if run == 167551:continue
        print "-- run #: " + colorPrintTools.blue(str(run))            
        #print run
        # get the information from runInfo
        runInfo = None
        try: 
            runInfo = RunInfo.getRunInfoStartAndStopTime(runinfoTag, "", run)
        except Exception as error:
            print '*** Error XXX: RunInfo query failed!'
            print error
            unknownRun = True
            unknownRunMsg = "Error: can not get report for run: " + str(run) + ", since run-info query failed: " + str(error)
            print unknownRunMsg
            continue
            
        rRep = o2oMonitoringTools.RunReportTagCheck()
        rRep.setRunNumber(runInfo.run())
        #rRep.setStartTime(runInfo.startTime())
        rRep.setRunInfoContent(runInfo)
        deltaTRun = runInfo.stopTime() - runInfo.startTime()
        deltaTRunH = deltaTRun.seconds/(60.*60.)


        print "   start: " + str(runInfo.startTime()) + " stop: " + str(runInfo.stopTime()) + " lenght (h): " + str(deltaTRunH)
        #pageWriter.setNextPromptReco(runInfo.run(), runInfo.startTime(), runInfo.stopTime(), deltaTRunH)
        for entry in tagsTomonitor:
            recordName = entry.record()
            datesince = recordandlastsince[recordName]
            if runInfo.stopTime() <= datesince:
                print "   rcd: ", recordName,":",colorPrintTools.ok("OK")
                rRep.addRecordAndStatus(recordName, 0)
            elif runInfo.startTime()  < datesince and runInfo.stopTime() > datesince:
                print "   rcd: ", recordName,":",colorPrintTools.warning("partially covered!")
                rRep.addRecordAndStatus(recordName, 0.5)
            else:
                print "   rcd: ", recordName,":",colorPrintTools.error("not covered!")
                rRep.addRecordAndStatus(recordName, 1)

        runReports.append(rRep)
        # print "---------------------------------------------------------"
        #print gtEntry,
        #print "  # of updates:", len(logData['data'])
        #if gtEntry.updateType() != 1:
        #    listofchanges.append(str(gtEntry) +  "  # of updates: " + str(len(logData['data'])))
        #else:
        #    listofchangesO2O.append(str(gtEntry) +  "  # of updates: " + str(len(logData['data'])))



    # --------------------------------------------------------------------------------
    # --- write to cache and to log
    runReports.sort(key=lambda rr: rr._runnumber)

    tableForCache =[]
    tableForCache.append(tableTitle)
    tableForLog =[]
    tableForLog.append(tableTitle)

    for rep in runReports:
        if int(rep.runNumber()) < int(nextPromptRecoRun):
            tableForCache.append(rep.getList())            
        tableForLog.append(rep.getList()) 


    #out = sys.stdout
    print "writing cache file: " + cacheFileName
    cacheFile = file(cacheFileName,"w")
    tableWriter.pprint_table(cacheFile, tableForCache)
    cacheFile.close()

    out = sys.stdout
    logFile = file(webArea + "log.txt","w")
    tableWriter.pprint_table(logFile, tableForLog)
    logFile.close()


    status = retValues[0]
    message = retValues[1]
    if status == 0 and unknownRun:
        status = 10
        message = unknownRunMsg
    return status, message