Example #1
0
    def __init__(self, ceflavour=['ARC-CE']):

        # Get agent name from /path/to/aCTAgent.py
        self.name = os.path.basename(sys.argv[0])[:-3]

        # logger
        self.logger = aCTLogger.aCTLogger(self.name)
        self.log = self.logger()
        self.criticallogger = aCTLogger.aCTLogger('aCTCritical', arclog=False)
        self.criticallog = self.criticallogger()

        # config
        self.conf = aCTConfig.aCTConfigAPP()
        self.arcconf = aCTConfig.aCTConfigARC()
        self.tmpdir = str(self.arcconf.get(['tmp', 'dir']))
        # database
        self.dbarc = aCTDBArc.aCTDBArc(self.log)
        self.dbcondor = aCTDBCondor.aCTDBCondor(self.log)
        self.dbpanda = aCTDBPanda.aCTDBPanda(self.log)

        # APFMon
        self.apfmon = aCTAPFMon.aCTAPFMon(self.conf)

        # CRIC info
        self.flavour = ceflavour
        self.cricparser = aCTCRICParser.aCTCRICParser(self.log)
        self.sites = {}
        self.osmap = {}
        self.sitesselect = ''

        # start time for periodic restart
        self.starttime = time.time()
        self.log.info("Started %s", self.name)
Example #2
0
def test():
    from act.common.aCTConfig import aCTConfigAPP
    conf = aCTConfigAPP()
    apf = aCTAPFMon(conf)
    apf.registerFactory()
    apf.registerLabels(['ARC-TEST', 'BOINC_MCORE'])
    apf.registerJobs(['123456', '234567'], 'ARC-TEST')
    apf.updateJob('123456', 'running')
Example #3
0
def bootstrap_conf():
    '''Check config is ok'''
    try:
        arcconf = aCTConfigARC()
    except Exception as e:
        print('Error processing ARC config file: %s' % str(e))
        sys.exit(1)

    try:
        atlasconf = aCTConfigAPP()
    except Exception as e:
        print('Error processing APP config file: %s' % str(e))
        sys.exit(1)
Example #4
0
    def AppReport(self):

        appconf = aCTConfigAPP()
        apps = appconf.getList(["modules", "app"])
        for app in apps:
            try:
                ap = importlib.import_module(f'{app}.aCTReport').report
                self.log(ap(self.actconfs))
            except ModuleNotFoundError as e:
                self.actlog.info(f'No report in module {app}')
            except AttributeError:
                self.actlog.info(f'aCTReport.report() not found in {app}')
            except Exception as e:
                self.actlog.error(f'Exception running {app}.aCTReport.report: {e}')
Example #5
0
def submit(args):

    try:
        with open(args.conffile) as f:
            try:
                config = {
                    l.split('=')[0]: l.split('=')[1].strip()
                    for l in f if '=' in l
                }
            except IndexError:
                logger.error(f"Error: Badly formed line in {args.conffile}")
                return 1
    except OSError as e:
        logger.error(
            f"Error: Failed to open job configuration file {args.conffile}: {str(e)}"
        )
        return 1

    # Check for mandatory parameters
    for param in ('JobTemplate', 'RandomSeed1SequenceStart',
                  'RandomSeed2SequenceStart', 'NumberofJobs'):
        if param not in config:
            logger.error(f"Error: {param} not defined in {args.conffile}")
            return 1

    actconf = aCTConfigAPP()
    bufferdir = actconf.get(['jobs', 'bufferdir'])
    if not bufferdir:
        logger.error(f"Error: bufferdir not found in aCT configuration")
        return 1

    template_file = os.path.join(bufferdir, 'templates', config['JobTemplate'])
    if not os.path.exists(template_file):
        logger.error(f"Error: template not found at {template_file}")
        return 1

    # Everything looks ok, so submit the job
    try:
        shutil.copy(args.conffile, os.path.join(bufferdir, 'configs'))
    except Exception as e:
        logger.error(
            f"Failed to copy {args.conffile} to {os.path.join(bufferdir, 'configs')}: {str(e)}"
        )
        return 1

    logger.info(
        f"Submitted job configuration at {args.conffile} to create {config['NumberofJobs']} jobs"
    )
    return 0
Example #6
0
    def app_collect(self):

        appconf = aCTConfigAPP()
        apps = appconf.getList(["modules", "app"])
        for app in apps:
            try:
                yield from importlib.import_module(f'{app}.aCTMonitor').collect(self.log)
                self.log.info(f'Added metrics from {app}.aCTMonitor')
            except ModuleNotFoundError as e:
                self.log.info(f'No collect in module {app}')
            except AttributeError:
                self.log.info(f'aCTMonitor.collect() not found in {app}')
            except Exception as e:
                self.log.error(f'Exception running {app}.aCTMonitor.collect: {e}')
        raise StopIteration
Example #7
0
def bootstrap_app():
    '''Set up app-specific things'''
    appconf = aCTConfigAPP()
    apps = appconf.getList(["modules", "app"])
    for app in apps:
        print(f'Setting up app from {app}...')
        try:
            ap = importlib.import_module(f'{app}.aCTBootstrap').bootstrap
            ap()
        except ModuleNotFoundError as e:
            print(f'No bootstrap in module {app}')
        except AttributeError:
            print(f'aCTBootstrap.bootstrap() not found in {app}')
        except Exception as e:
            print(f'Exception running {app}.aCTBootstrap.bootstrap(): {e}')
Example #8
0
    def __init__(self,logger, proxyfile):
        self.conf = aCTConfig.aCTConfigAPP()
        server = self.conf.get(['panda','server'])
        u = urllib.parse.urlparse(server)
        self.hostport = u.netloc
        self.topdir = u.path
        proxypath = proxyfile
        self.log = logger
        # timeout in seconds
        self.timeout = int(self.conf.get(['panda','timeout']))
        socket.setdefaulttimeout(self.timeout)

        self.context = ssl.SSLContext(ssl.PROTOCOL_TLSv1)
        self.context.load_cert_chain(proxypath, keyfile=proxypath)
        self.context.verify_mode = ssl.CERT_REQUIRED
        self.context.load_verify_locations('/etc/pki/tls/certs/CERN-bundle.pem')
Example #9
0
    def __init__(self, args):

        # Check we have the right ARC version
        self.checkARC()

        # xml config file
        self.conf = aCTConfig.aCTConfigARC()
        self.appconf = aCTConfig.aCTConfigAPP()

        # Create required directories
        tmpdir = self.conf.get(["tmp", "dir"])
        self.makeDirs(tmpdir)
        self.makeDirs(os.path.join(tmpdir, 'inputfiles'))
        self.makeDirs(os.path.join(tmpdir, 'eventranges'))
        self.makeDirs(os.path.join(tmpdir, 'failedlogs'))
        self.makeDirs(self.conf.get(["voms", "proxystoredir"]), 0o700)
        self.makeDirs(self.conf.get(["logger", "logdir"]))

        # logger
        self.logger = aCTLogger.aCTLogger("aCTMain")
        self.log = self.logger()

        # Check if we should run
        self.shouldrun = not os.path.exists(
            os.path.join(self.conf.get(["actlocation", "dir"]), "act.stop"))
        if not self.shouldrun:
            self.log.warning(
                "Detected act.stop file, won't start child processes")

        # daemon operations
        if len(args) >= 2:
            self.daemon(args[1])

        # process manager
        try:
            if self.shouldrun:
                self.procmanager = aCTProcessManager.aCTProcessManager(
                    self.log, self.conf, self.appconf)
        except Exception as e:
            self.log.critical("*** Unexpected exception! ***")
            self.log.critical(traceback.format_exc())
            self.log.critical("*** Process exiting ***")
            raise e
Example #10
0
    def __init__(self):

        # Get agent name from /path/to/aCTAgent.py
        self.name = os.path.basename(sys.argv[0])[:-3]

        # logger
        self.logger = aCTLogger.aCTLogger(self.name)
        self.log = self.logger()
        self.criticallogger = aCTLogger.aCTLogger('aCTCritical', arclog=False)
        self.criticallog = self.criticallogger()

        # config
        self.conf = aCTConfig.aCTConfigAPP()
        self.arcconf = aCTConfig.aCTConfigARC()
        self.tmpdir = str(self.arcconf.get(['tmp', 'dir']))
        # database
        self.dbarc = aCTDBArc.aCTDBArc(self.log)
        self.dbldmx = aCTDBLDMX.aCTDBLDMX(self.log)
        # Rucio client
        self.rucio = Client()

        # start time for periodic restart
        self.starttime = time.time()
        self.log.info("Started %s", self.name)
Example #11
0
    def getXrsl(self):
        return "&" + '\n'.join(self.xrsl.values())


if __name__ == '__main__':
    from act.common.aCTLogger import aCTLogger
    from act.common.aCTConfig import aCTConfigAPP
    from datetime import datetime
    logger = aCTLogger('test')
    log = logger()
    pandajob = "jobsetID=799&logGUID=5ba37307-e4d7-4224-82f9-ff0503622677&cmtConfig=x86_64-slc6-gcc48-opt&prodDBlocks=user.rwatari%3Auser.rwatari.1k_10mu.xm005_yp106.RDO.20161003_2_EXT0_RDO2RDOFTK_v01_all1E5ev_EXT2.99328897%2Cpanda.1110091801.467362.lib._9845189&dispatchDBlockTokenForOut=NULL%2CNULL%2CNULL&destinationDBlockToken=NULL%2CNULL%2CNULL&destinationSE=NULL&realDatasets=user.rwatari.1k_10mu.xm005_yp106.RDO.20161003_2_EXT0_PseduoTracking_v14_all1E5ev_EXT0%2F%2Cuser.rwatari.1k_10mu.xm005_yp106.RDO.20161003_2_EXT0_PseduoTracking_v14_all1E5ev_EXT1%2F%2Cuser.rwatari.1k_10mu.xm005_yp106.RDO.20161003_2_EXT0_PseduoTracking_v14_all1E5ev.log%2F&prodUserID=%2FDC%3Dch%2FDC%3Dcern%2FOU%3DOrganic+Units%2FOU%3DUsers%2FCN%3Drwatari%2FCN%3D764796%2FCN%3DRyutaro+Watari%2FCN%3Dproxy&GUID=51997D0A-850A-9044-A264-83A8986FE1C6%2C1de48e07-f37c-43e6-a343-3947342858b1&realDatasetsIn=user.rwatari.1k_10mu.xm005_yp106.RDO.20161003_2_EXT0_RDO2RDOFTK_v01_all1E5ev_EXT2%2Cpanda.1110091801.467362.lib._9845189&nSent=0&cloud=ND&StatusCode=0&homepackage=AnalysisTransforms-AtlasProduction_20.7.3.7&inFiles=user.rwatari.9557718.EXT2._000016.RDO_FTK.pool.root%2Cpanda.1110091801.467362.lib._9845189.7456421499.lib.tgz&processingType=panda-client-0.5.69-jedi-athena-trf&currentPriority=814&fsize=1140292964%2C727003478&fileDestinationSE=ANALY_SiGNET_DIRECT%2CANALY_SiGNET_DIRECT%2CANALY_SiGNET_DIRECT&scopeOut=user.rwatari%2Cuser.rwatari&minRamCount=4772&jobDefinitionID=836&scopeLog=user.rwatari&transformation=http%3A%2F%2Fpandaserver.cern.ch%3A25085%2Ftrf%2Fuser%2FrunAthena-00-00-12&maxDiskCount=3167&coreCount=1&prodDBlockToken=NULL%2CNULL&transferType=NULL&destinationDblock=user.rwatari.1k_10mu.xm005_yp106.RDO.20161003_2_EXT0_PseduoTracking_v14_all1E5ev_EXT0.104826316_sub0341667607%2Cuser.rwatari.1k_10mu.xm005_yp106.RDO.20161003_2_EXT0_PseduoTracking_v14_all1E5ev_EXT1.104826317_sub0341667608%2Cuser.rwatari.1k_10mu.xm005_yp106.RDO.20161003_2_EXT0_PseduoTracking_v14_all1E5ev.log.104826315_sub0341667610&dispatchDBlockToken=NULL%2CNULL&jobPars=-l+panda.1110091801.467362.lib._9845189.7456421499.lib.tgz+--sourceURL+https%3A%2F%2Faipanda078.cern.ch%3A25443+-r+WorkArea%2Frun%2Ffast%2F+--trf+--useLocalIO++-i+%22%5B%27user.rwatari.9557718.EXT2._000016.RDO_FTK.pool.root%27%5D%22+-o+%22%7B%27IROOT%27%3A+%5B%28%27InDetDxAOD.pool.root%27%2C+%27user.rwatari.9845189.EXT0._002324.InDetDxAOD.pool.root%27%29%2C+%28%27esd.pool.root%27%2C+%27user.rwatari.9845189.EXT1._002324.esd.pool.root%27%29%5D%7D%22++-j+%22Reco_tf.py%2520--inputRDOFile%253Duser.rwatari.9557718.EXT2._000016.RDO_FTK.pool.root%2520--outputESDFile%253Desd.pool.root%2520%2520--doAllNoise%2520False%2520--autoConfiguration%253Deverything%2520--numberOfCavernBkg%253D0%2520--postInclude%253DFTKFastSim%2FInDetDxAOD.py%2520--preExec%2520%2527rec.UserAlgs%253D%255B%2522FTKFastSim%2FFTKFastSimulation_jobOptions.py%2522%255D%253Brec.doCalo.set_Value_and_Lock%2528False%2529%253Brec.doMuon.set_Value_and_Lock%2528False%2529%253Brec.doJetMissingETTag.set_Value_and_Lock%2528False%2529%253Brec.doEgamma.set_Value_and_Lock%2528False%2529%253Brec.doMuonCombined.set_Value_and_Lock%2528False%2529%253Brec.doTau.set_Value_and_Lock%2528False%2529%253Brec.doTrigger.set_Value_and_Lock%2528False%2529%253Brec.doFTK.set_Value_and_Lock%2528True%2529%253Bfrom%2520AthenaCommon.DetFlags%2520import%2520DetFlags%253BDetFlags.all_setOn%2528%2529%253BDetFlags.FTK_setOn%2528%2529%2527%2520--maxEvents%253D-1%2520--postExec%2520r2e%253A%2520%2527ServiceMgr%252B%253DService%2528%2522BeamCondSvc%2522%2529%253BbeamCondSvc%253DServiceMgr.BeamCondSvc%253BbeamCondSvc.useDB%253DFalse%253BbeamCondSvc.posX%253D-0.0497705%253BbeamCondSvc.posY%253D1.06299%253BbeamCondSvc.posZ%253D0.0%253BbeamCondSvc.sigmaX%253D0.0251281%253BbeamCondSvc.sigmaY%253D0.0231978%253BbeamCondSvc.sigmaZ%253D0.1%253BbeamCondSvc.sigmaXY%253D-2.7745e-06%253BbeamCondSvc.tiltX%253D-1.51489e-05%253BbeamCondSvc.tiltY%253D-4.83891e-05%253B%2527%22&attemptNr=2&swRelease=Atlas-20.7.3&nucleus=NULL&maxCpuCount=0&outFiles=user.rwatari.9845189.EXT0._002324.InDetDxAOD.pool.root%2Cuser.rwatari.9845189.EXT1._002324.esd.pool.root%2Cuser.rwatari.1k_10mu.xm005_yp106.RDO.20161003_2_EXT0_PseduoTracking_v14_all1E5ev.log.9845189.002324.log.tgz&ddmEndPointOut=NDGF-T1_SCRATCHDISK%2CNDGF-T1_SCRATCHDISK%2CNDGF-T1_SCRATCHDISK&scopeIn=user.rwatari%2Cpanda&PandaID=3072596651&sourceSite=NULL&dispatchDblock=NULL%2Cpanda.1110091801.467362.lib._9845189&prodSourceLabel=user&checksum=ad%3Afd1c3aac%2Cad%3A516b31b3&jobName=user.rwatari.1k_10mu.xm005_yp106.RDO.20161003_2_EXT0_PseduoTracking_v14_all1E5ev%2F.3071213044&ddmEndPointIn=NDGF-T1_SCRATCHDISK%2CNDGF-T1_SCRATCHDISK&taskID=9845189&logFile=user.rwatari.1k_10mu.xm005_yp106.RDO.20161003_2_EXT0_PseduoTracking_v14_all1E5ev.log.9845189.002324.log.tgz"
    siteinfo = {
        'schedconfig': 'ANALY_SiGNET_DIRECT',
        'corecount': 1,
        'truepilot': False,
        'maxwalltime': 10800,
        'direct_access_lan': True,
        'type': 'analysis'
    }
    conf = aCTConfigAPP()
    pandadbjob = {
        'pandajob': pandajob,
        'siteName': 'ANALY_SiGNET_DIRECT',
        'eventranges': None,
        'metadata': {},
        'created': datetime.utcnow()
    }
    a = aCTPanda2Xrsl(pandadbjob, siteinfo, {}, '/tmp', conf, log)
    a.parse()
    print(a.getXrsl())
Example #12
0
 def __init__(self, logger):
     self.log = logger
     self.conf = aCTConfig.aCTConfigAPP()
     self.arcconf = aCTConfig.aCTConfigARC()
     self.tparse = 0
     self.getSites()