Example #1
0
    def __init__(self, ceflavour=['ARC-CE']):

        # Get agent name from /path/to/aCTAgent.py
        self.name = os.path.basename(sys.argv[0])[:-3]

        # logger
        self.logger = aCTLogger.aCTLogger(self.name)
        self.log = self.logger()
        self.criticallogger = aCTLogger.aCTLogger('aCTCritical', arclog=False)
        self.criticallog = self.criticallogger()

        # config
        self.conf = aCTConfig.aCTConfigAPP()
        self.arcconf = aCTConfig.aCTConfigARC()
        self.tmpdir = str(self.arcconf.get(['tmp', 'dir']))
        # database
        self.dbarc = aCTDBArc.aCTDBArc(self.log)
        self.dbcondor = aCTDBCondor.aCTDBCondor(self.log)
        self.dbpanda = aCTDBPanda.aCTDBPanda(self.log)

        # APFMon
        self.apfmon = aCTAPFMon.aCTAPFMon(self.conf)

        # CRIC info
        self.flavour = ceflavour
        self.cricparser = aCTCRICParser.aCTCRICParser(self.log)
        self.sites = {}
        self.osmap = {}
        self.sitesselect = ''

        # start time for periodic restart
        self.starttime = time.time()
        self.log.info("Started %s", self.name)
Example #2
0
    def __init__(self):
        self.conf = aCTConfig.aCTConfigARC()
        self.logger = aCTLogger.aCTLogger("aCTReport")
        self.log = self.logger()
        self.criticallogger = aCTLogger.aCTLogger('aCTCritical', arclog=False)
        self.criticallog = self.criticallogger()

        #self.db=aCTDB.aCTDB(None,self.conf.get(["db","file"]))
        self.db = aCTDBArc.aCTDBArc(self.log, self.conf.get(["db", "file"]))
        self.pandadb = aCTDBPanda.aCTDBPanda(self.log,
                                             self.conf.get(["db", "file"]))
Example #3
0
def bootstrap():

    logger = aCTLogger('aCTBootstrap')
    log = logger()
    dbpanda = aCTDBPanda(log)
    if not dbpanda.createTables():
        print('Failed to create Panda tables, see aCTBootstrap.log for details')
Example #4
0
    def __init__(self, args):
        self.output = ""
        self.outfile = args.web
        self.actconfs = args.conffiles or [''] # empty string for default behaviour

        self.logger=aCTLogger.aCTLogger("aCTReport")
        self.actlog=self.logger()
        self.actlog.logger.setLevel(logging.INFO)
        self.criticallogger = aCTLogger.aCTLogger('aCTCritical', arclog=False)
        self.criticallog = self.criticallogger()

        if self.outfile:
            self.log('<META HTTP-EQUIV="refresh" CONTENT="60"><pre>')
            self.log(time.asctime() + '\n')

        self.db=aCTDBArc.aCTDBArc(self.actlog)
Example #5
0
def bootstrap():

    logger = aCTLogger('aCTBootstrap')
    log = logger()
    clientdb = ClientDB(log)
    if not clientdb.createTables():
        print('Error creating client tables, see aCTBootstrap.log for details')
Example #6
0
def bootstrap():

    logger = aCTLogger('aCTBootstrap')
    log = logger()
    dbldmx = aCTDBLDMX(log)
    if not dbldmx.createTables():
        print('Failed to create LDMX tables, see aCTBootstrap.log for details')
Example #7
0
    def __init__(self, conf, log=None):

        if log:
            self.log = log
        else:
            self.logger = aCTLogger(self.__class__.__name__, arclog=False)
            self.log = self.logger()

        self.apfmonurl = conf.get(["monitor", "apfmon"])
        self.sendupdates = conf.get(["monitor", "update"])
        self.acturl = conf.get(["joblog", "urlprefix"])
        self.factory = conf.get(["panda", "schedulerid"])
Example #8
0
def bootstrap_db():
    '''Set up the ARC and Condor DB tables'''
    logger = aCTLogger('aCTBootstrap')
    log = logger()
    dbarc = aCTDBArc(log)
    dbcondor = aCTDBCondor(log)
    print('Setting up ARC tables...')
    if not dbarc.createTables():
        print('Error creating arc tables, see aCTBootstrap.log for details')
    print('Setting up Condor tables...')
    if not dbcondor.createTables():
        print('Error creating condor tables, see aCTBootstrap.log for details')
Example #9
0
    def __init__(self):

        # Get agent name from /path/to/aCTAgent.py
        self.name = os.path.basename(sys.argv[0])[:-3]

        # logger
        self.logger = aCTLogger.aCTLogger(self.name)
        self.log = self.logger()
        self.criticallogger = aCTLogger.aCTLogger('aCTCritical', arclog=False)
        self.criticallog = self.criticallogger()

        # config
        self.conf = aCTConfig.aCTConfigATLAS()
        self.arcconf = aCTConfig.aCTConfigARC()
        # database
        self.dbarc = aCTDBArc.aCTDBArc(self.log, self.conf.get(["db", "file"]))
        self.dbpanda = aCTDBPanda.aCTDBPanda(self.log,
                                             self.conf.get(["db", "file"]))

        # start time for periodic restart
        self.starttime = time.time()
        self.log.info("Started %s", self.name)
Example #10
0
    def __init__(self):
        """Initialize all attributes."""
        # get name, remove .py from the end
        self.name = os.path.basename(sys.argv[0])[:-3]

        self.arcconf = aCTConfig.aCTConfigARC()

        self.logger = aCTLogger.aCTLogger(self.name)
        self.log = self.logger()
        self.clidb = clientdb.ClientDB(self.log)
        self.arcdb = aCTDBArc.aCTDBArc(self.log)

        self.log.info('Started {}'.format(self.name))
Example #11
0
    def __init__(self):

        # Get agent name from /path/to/aCTAgent.py
        self.name = os.path.basename(sys.argv[0])[:-3]

        # logger
        self.logger = aCTLogger.aCTLogger(self.name)
        self.log = self.logger()
        self.criticallogger = aCTLogger.aCTLogger('aCTCritical', arclog=False)
        self.criticallog = self.criticallogger()

        # config
        self.conf = aCTConfig.aCTConfigAPP()
        self.arcconf = aCTConfig.aCTConfigARC()
        self.tmpdir = str(self.arcconf.get(['tmp', 'dir']))
        # database
        self.dbarc = aCTDBArc.aCTDBArc(self.log)
        self.dbldmx = aCTDBLDMX.aCTDBLDMX(self.log)
        # Rucio client
        self.rucio = Client()

        # start time for periodic restart
        self.starttime = time.time()
        self.log.info("Started %s", self.name)
Example #12
0
def main():
    if len(sys.argv) != 2:
        print("Usage: python aCTHeartbeatWatchdog.py timelimit")
        sys.exit(1)

    timelimit = int(sys.argv[1])

    # logger
    logger = aCTLogger('aCTHeartbeatWatchdog')
    log = logger()
    # database
    dbarc = aCTDBArc(log)
    dbpanda = aCTDBPanda(log)

    # Query for running jobs with theartbeat longer than timelimit seconds ago
    select = "sendhb=1 and " \
             "pandastatus in ('sent', 'starting', 'running', 'transferring') and " \
             "theartbeat != 0 and " + dbpanda.timeStampLessThan("theartbeat", timelimit)
    columns = ['pandaid', 'pandastatus', 'proxyid', 'sitename', 'theartbeat']
    jobs = dbpanda.getJobs(select, columns)

    if jobs:
        print(
            'Found %d jobs with outdated heartbeat (older than %d seconds):\n'
            % (len(jobs), timelimit))
        print('\t'.join(
            ['pandaid', 'site', 'status', 'theartbeat', 'Panda response']))

        # Panda server for each proxy
        pandas = {}
        for job in jobs:
            proxyid = job['proxyid']
            if proxyid not in pandas:
                panda = aCTPanda(log, dbarc.getProxyPath(proxyid))
                pandas[proxyid] = panda

            response = pandas[proxyid].updateStatus(job['pandaid'],
                                                    job['pandastatus'])
            print('\t'.join([
                str(job['pandaid']), job['sitename'], job['pandastatus'],
                str(job['theartbeat']),
                str(response)
            ]))
            # update heartbeat time in the DB
            dbpanda.updateJob(
                job['pandaid'],
                {'theartbeat': dbpanda.getTimeStamp(time.time() + 1)})
Example #13
0
def bootstrap_db():
    '''Set up the DB tables'''
    # TODO: setup only what is needed based on config and app
    logger = aCTLogger('aCTBootstrap')
    log = logger()
    dbarc = aCTDBArc(log)
    dbclient = ClientDB(log)
    dbcondor = aCTDBCondor(log)
    dbpanda = aCTDBPanda(log)
    if not dbarc.createTables():
        print('Error creating arc tables, see aCTBootstrap.log for details')
    if not dbclient.createTables():
        print('Error creating client tables, see aCTBootstrap.log for details')
    if not dbcondor.createTables():
        print('Error creating condor tables, see aCTBootstrap.log for details')
    if not dbpanda.createTables():
        print('Error creating panda tables, see aCTBootstrap.log for details')
Example #14
0
    def __init__(self, args):

        # Check we have the right ARC version
        self.checkARC()

        # xml config file
        self.conf = aCTConfig.aCTConfigARC()
        self.appconf = aCTConfig.aCTConfigAPP()

        # Create required directories
        tmpdir = self.conf.get(["tmp", "dir"])
        self.makeDirs(tmpdir)
        self.makeDirs(os.path.join(tmpdir, 'inputfiles'))
        self.makeDirs(os.path.join(tmpdir, 'eventranges'))
        self.makeDirs(os.path.join(tmpdir, 'failedlogs'))
        self.makeDirs(self.conf.get(["voms", "proxystoredir"]), 0o700)
        self.makeDirs(self.conf.get(["logger", "logdir"]))

        # logger
        self.logger = aCTLogger.aCTLogger("aCTMain")
        self.log = self.logger()

        # Check if we should run
        self.shouldrun = not os.path.exists(
            os.path.join(self.conf.get(["actlocation", "dir"]), "act.stop"))
        if not self.shouldrun:
            self.log.warning(
                "Detected act.stop file, won't start child processes")

        # daemon operations
        if len(args) >= 2:
            self.daemon(args[1])

        # process manager
        try:
            if self.shouldrun:
                self.procmanager = aCTProcessManager.aCTProcessManager(
                    self.log, self.conf, self.appconf)
        except Exception as e:
            self.log.critical("*** Unexpected exception! ***")
            self.log.critical(traceback.format_exc())
            self.log.critical("*** Process exiting ***")
            raise e
Example #15
0
import subprocess
import sys
import time
from datetime import datetime
from act.arc.aCTDBArc import aCTDBArc
from act.atlas.aCTDBPanda import aCTDBPanda
from act.common.aCTLogger import aCTLogger
from act.common.aCTConfig import aCTConfigARC

try:
    service_id, webpage_url = sys.argv[1:3]
except:
    print('Usage: kibana.py service_id webpage_url')
    sys.exit(1)

logger = aCTLogger('kibana probe')
log = logger()
arcdb = aCTDBArc(log)
pandadb = aCTDBPanda(log)
config = aCTConfigARC()


def getARCJobs():
    return str(arcdb.getNArcJobs('TRUE'))


def getARCSlots():
    jobs = arcdb.getArcJobsInfo("state='Running'", ['RequestedSlots'])
    slots = 0
    for j in jobs:
        slots += j['RequestedSlots']
Example #16
0
def report(actconfs):
    actlogger = aCTLogger('aCTReport')
    logger = actlogger()
    rep = {}
    rtot = {}
    log = ''
    states = [
        "sent", "starting", "running", "slots", "tovalidate", "toresubmit",
        "toclean", "finished", "done", "failed", "donefailed", "tobekilled",
        "cancelled", "donecancelled"
    ]

    for conf in actconfs:
        if conf:
            os.environ['ACTCONFIGARC'] = conf

        db = aCTDBPanda(logger)
        c = db.db.conn.cursor()
        c.execute("select sitename, actpandastatus, corecount from pandajobs")
        rows = c.fetchall()
        for r in rows:

            site, state = (str(r[0]), str(r[1]))
            if r[2] is None:
                corecount = 1
            else:
                corecount = int(r[2])

            try:
                rep[site][state] += 1
                if state == "running":
                    rep[site]["slots"] += corecount
            except:
                try:
                    rep[site][state] = 1
                    if state == "running":
                        try:
                            rep[site]["slots"] += corecount
                        except:
                            rep[site]["slots"] = corecount
                except:
                    rep[site] = {}
                    rep[site][state] = 1
                    if state == "running":
                        rep[site]["slots"] = corecount
            try:
                rtot[state] += 1
                if state == "running":
                    rtot["slots"] += corecount
            except:
                rtot[state] = 1
                if state == "running":
                    rtot["slots"] = corecount

    log += f"All Panda jobs: {sum([v for k,v in rtot.items() if k != 'slots'])}\n"
    log += f"{'':29} {' '.join([f'{s:>9}' for s in states])}\n"

    for k in sorted(rep.keys()):
        log += f"{k:>28.28}:"
        for s in states:
            try:
                log += f'{rep[k][s]:>10}'
            except KeyError:
                log += f'{"-":>10}'
        log += '\n'

    log += f'{"Totals":>28}:'
    for s in states:
        try:
            log += f'{rtot[s]:>10}'
        except:
            log += f'{"-":>10}'
    log += '\n\n'
    if len(actconfs) == 1:
        log += HarvesterReport()
    return log
Example #17
0
#!/usr/bin/python

import classad
import logging
from act.arc.aCTDBArc import aCTDBArc
from act.condor.aCTDBCondor import aCTDBCondor
from act.common.aCTProxy import aCTProxy
from act.common.aCTLogger import aCTLogger

logger = aCTLogger('acttest', cluster='test')
log = logger()

db = aCTDBArc(log)
dbcondor = aCTDBCondor(log)

xrsl = '''&(executable=/bin/sleep)
           (arguments=100)
           (stdout=stdout)
           (rerun=2)
           (gmlog=gmlog)
           (inputfiles = ("runpilot2-wrapper.sh" "/cvmfs/atlas.cern.ch/repo/sw/PandaPilotWrapper/latest/runpilot2-wrapper.sh"))
           (runtimeenvironment = "ENV/PROXY")
           '''

cad = classad.ClassAd('''\n
    [\n
            UserLog = "/tmp/log/test.$(ClusterId).log"; \n
            Out = "/tmp/output/test.$(ClusterId).out"; \n
            Err = "/tmp/error/test.$(ClusterId).err"; \n
            JobUniverse = 9; \n
            X509UserProxy = "/tmp/x509up_u100780"; \n
Example #18
0
        self.setInputs()
        self.setLog()
        self.setGMLog()
        self.setOutputs()
        self.setPriority()
        self.setEnvironment()

    def getXrsl(self):
        return "&" + '\n'.join(self.xrsl.values())


if __name__ == '__main__':
    from act.common.aCTLogger import aCTLogger
    from act.common.aCTConfig import aCTConfigAPP
    from datetime import datetime
    logger = aCTLogger('test')
    log = logger()
    pandajob = "jobsetID=799&logGUID=5ba37307-e4d7-4224-82f9-ff0503622677&cmtConfig=x86_64-slc6-gcc48-opt&prodDBlocks=user.rwatari%3Auser.rwatari.1k_10mu.xm005_yp106.RDO.20161003_2_EXT0_RDO2RDOFTK_v01_all1E5ev_EXT2.99328897%2Cpanda.1110091801.467362.lib._9845189&dispatchDBlockTokenForOut=NULL%2CNULL%2CNULL&destinationDBlockToken=NULL%2CNULL%2CNULL&destinationSE=NULL&realDatasets=user.rwatari.1k_10mu.xm005_yp106.RDO.20161003_2_EXT0_PseduoTracking_v14_all1E5ev_EXT0%2F%2Cuser.rwatari.1k_10mu.xm005_yp106.RDO.20161003_2_EXT0_PseduoTracking_v14_all1E5ev_EXT1%2F%2Cuser.rwatari.1k_10mu.xm005_yp106.RDO.20161003_2_EXT0_PseduoTracking_v14_all1E5ev.log%2F&prodUserID=%2FDC%3Dch%2FDC%3Dcern%2FOU%3DOrganic+Units%2FOU%3DUsers%2FCN%3Drwatari%2FCN%3D764796%2FCN%3DRyutaro+Watari%2FCN%3Dproxy&GUID=51997D0A-850A-9044-A264-83A8986FE1C6%2C1de48e07-f37c-43e6-a343-3947342858b1&realDatasetsIn=user.rwatari.1k_10mu.xm005_yp106.RDO.20161003_2_EXT0_RDO2RDOFTK_v01_all1E5ev_EXT2%2Cpanda.1110091801.467362.lib._9845189&nSent=0&cloud=ND&StatusCode=0&homepackage=AnalysisTransforms-AtlasProduction_20.7.3.7&inFiles=user.rwatari.9557718.EXT2._000016.RDO_FTK.pool.root%2Cpanda.1110091801.467362.lib._9845189.7456421499.lib.tgz&processingType=panda-client-0.5.69-jedi-athena-trf&currentPriority=814&fsize=1140292964%2C727003478&fileDestinationSE=ANALY_SiGNET_DIRECT%2CANALY_SiGNET_DIRECT%2CANALY_SiGNET_DIRECT&scopeOut=user.rwatari%2Cuser.rwatari&minRamCount=4772&jobDefinitionID=836&scopeLog=user.rwatari&transformation=http%3A%2F%2Fpandaserver.cern.ch%3A25085%2Ftrf%2Fuser%2FrunAthena-00-00-12&maxDiskCount=3167&coreCount=1&prodDBlockToken=NULL%2CNULL&transferType=NULL&destinationDblock=user.rwatari.1k_10mu.xm005_yp106.RDO.20161003_2_EXT0_PseduoTracking_v14_all1E5ev_EXT0.104826316_sub0341667607%2Cuser.rwatari.1k_10mu.xm005_yp106.RDO.20161003_2_EXT0_PseduoTracking_v14_all1E5ev_EXT1.104826317_sub0341667608%2Cuser.rwatari.1k_10mu.xm005_yp106.RDO.20161003_2_EXT0_PseduoTracking_v14_all1E5ev.log.104826315_sub0341667610&dispatchDBlockToken=NULL%2CNULL&jobPars=-l+panda.1110091801.467362.lib._9845189.7456421499.lib.tgz+--sourceURL+https%3A%2F%2Faipanda078.cern.ch%3A25443+-r+WorkArea%2Frun%2Ffast%2F+--trf+--useLocalIO++-i+%22%5B%27user.rwatari.9557718.EXT2._000016.RDO_FTK.pool.root%27%5D%22+-o+%22%7B%27IROOT%27%3A+%5B%28%27InDetDxAOD.pool.root%27%2C+%27user.rwatari.9845189.EXT0._002324.InDetDxAOD.pool.root%27%29%2C+%28%27esd.pool.root%27%2C+%27user.rwatari.9845189.EXT1._002324.esd.pool.root%27%29%5D%7D%22++-j+%22Reco_tf.py%2520--inputRDOFile%253Duser.rwatari.9557718.EXT2._000016.RDO_FTK.pool.root%2520--outputESDFile%253Desd.pool.root%2520%2520--doAllNoise%2520False%2520--autoConfiguration%253Deverything%2520--numberOfCavernBkg%253D0%2520--postInclude%253DFTKFastSim%2FInDetDxAOD.py%2520--preExec%2520%2527rec.UserAlgs%253D%255B%2522FTKFastSim%2FFTKFastSimulation_jobOptions.py%2522%255D%253Brec.doCalo.set_Value_and_Lock%2528False%2529%253Brec.doMuon.set_Value_and_Lock%2528False%2529%253Brec.doJetMissingETTag.set_Value_and_Lock%2528False%2529%253Brec.doEgamma.set_Value_and_Lock%2528False%2529%253Brec.doMuonCombined.set_Value_and_Lock%2528False%2529%253Brec.doTau.set_Value_and_Lock%2528False%2529%253Brec.doTrigger.set_Value_and_Lock%2528False%2529%253Brec.doFTK.set_Value_and_Lock%2528True%2529%253Bfrom%2520AthenaCommon.DetFlags%2520import%2520DetFlags%253BDetFlags.all_setOn%2528%2529%253BDetFlags.FTK_setOn%2528%2529%2527%2520--maxEvents%253D-1%2520--postExec%2520r2e%253A%2520%2527ServiceMgr%252B%253DService%2528%2522BeamCondSvc%2522%2529%253BbeamCondSvc%253DServiceMgr.BeamCondSvc%253BbeamCondSvc.useDB%253DFalse%253BbeamCondSvc.posX%253D-0.0497705%253BbeamCondSvc.posY%253D1.06299%253BbeamCondSvc.posZ%253D0.0%253BbeamCondSvc.sigmaX%253D0.0251281%253BbeamCondSvc.sigmaY%253D0.0231978%253BbeamCondSvc.sigmaZ%253D0.1%253BbeamCondSvc.sigmaXY%253D-2.7745e-06%253BbeamCondSvc.tiltX%253D-1.51489e-05%253BbeamCondSvc.tiltY%253D-4.83891e-05%253B%2527%22&attemptNr=2&swRelease=Atlas-20.7.3&nucleus=NULL&maxCpuCount=0&outFiles=user.rwatari.9845189.EXT0._002324.InDetDxAOD.pool.root%2Cuser.rwatari.9845189.EXT1._002324.esd.pool.root%2Cuser.rwatari.1k_10mu.xm005_yp106.RDO.20161003_2_EXT0_PseduoTracking_v14_all1E5ev.log.9845189.002324.log.tgz&ddmEndPointOut=NDGF-T1_SCRATCHDISK%2CNDGF-T1_SCRATCHDISK%2CNDGF-T1_SCRATCHDISK&scopeIn=user.rwatari%2Cpanda&PandaID=3072596651&sourceSite=NULL&dispatchDblock=NULL%2Cpanda.1110091801.467362.lib._9845189&prodSourceLabel=user&checksum=ad%3Afd1c3aac%2Cad%3A516b31b3&jobName=user.rwatari.1k_10mu.xm005_yp106.RDO.20161003_2_EXT0_PseduoTracking_v14_all1E5ev%2F.3071213044&ddmEndPointIn=NDGF-T1_SCRATCHDISK%2CNDGF-T1_SCRATCHDISK&taskID=9845189&logFile=user.rwatari.1k_10mu.xm005_yp106.RDO.20161003_2_EXT0_PseduoTracking_v14_all1E5ev.log.9845189.002324.log.tgz"
    siteinfo = {
        'schedconfig': 'ANALY_SiGNET_DIRECT',
        'corecount': 1,
        'truepilot': False,
        'maxwalltime': 10800,
        'direct_access_lan': True,
        'type': 'analysis'
    }
    conf = aCTConfigAPP()
    pandadbjob = {
        'pandajob': pandajob,
        'siteName': 'ANALY_SiGNET_DIRECT',
        'eventranges': None,
Example #19
0
from act.common.aCTConfig import aCTConfigATLAS
from act.common.aCTLogger import aCTLogger
from act.arc.aCTDBArc import aCTDBArc
from aCTDBPanda import aCTDBPanda
from aCTPanda import aCTPanda
import sys
import time

if len(sys.argv) != 2:
    print "Usage: python aCTHeartbeatWatchdog.py timelimit"
    sys.exit(1)

timelimit = int(sys.argv[1])

# logger
logger = aCTLogger('aCTHeartbeatWatchdog')
log = logger()
# config
conf = aCTConfigATLAS()
# database
dbarc = aCTDBArc(log, conf.get(["db", "file"]))
dbpanda = aCTDBPanda(log, conf.get(["db", "file"]))

# Query for running jobs with theartbeat longer than timelimit seconds ago
select = "sendhb=1 and " \
         "pandastatus in ('sent', 'starting', 'running', 'transferring') and " \
         "theartbeat != 0 and " + dbpanda.timeStampLessThan("theartbeat", timelimit)
columns = ['pandaid', 'pandastatus', 'proxyid', 'sitename', 'theartbeat']
jobs = dbpanda.getJobs(select, columns)

if jobs: