def __init__(self): aCTATLASProcess.__init__(self) # Get DN from configured proxy file uc = arc.UserConfig() uc.ProxyPath(str(self.arcconf.get(['voms', 'proxypath']))) cred = arc.Credential(uc) dn = cred.GetIdentityName() self.log.info("Running under DN %s" % dn) # Keep a panda object per proxy. The site "type" maps to a specific # proxy role self.pandas = {} # Map the site type to a proxy id in proxies table # In future for analysis the id will change once the job is picked up self.proxymap = {} actp = aCTProxy.aCTProxy(self.log) for role in self.arcconf.getList(['voms', 'roles', 'item']): attr = '/atlas/Role='+role proxyid = actp.getProxyId(dn, attr) if not proxyid: raise Exception("Proxy with DN "+dn+" and attribute "+attr+" was not found in proxies table") proxyfile = actp.path(dn, attribute=attr) # pilot role is mapped to analysis type if role == 'pilot': role = 'analysis' self.pandas[role] = aCTPanda.aCTPanda(self.log, proxyfile) self.proxymap[role] = proxyid # queue interval self.queuestamp=0 self.sites={}
def main(): if len(sys.argv) != 2: print("Usage: python aCTHeartbeatWatchdog.py timelimit") sys.exit(1) timelimit = int(sys.argv[1]) # logger logger = aCTLogger('aCTHeartbeatWatchdog') log = logger() # database dbarc = aCTDBArc(log) dbpanda = aCTDBPanda(log) # Query for running jobs with theartbeat longer than timelimit seconds ago select = "sendhb=1 and " \ "pandastatus in ('sent', 'starting', 'running', 'transferring') and " \ "theartbeat != 0 and " + dbpanda.timeStampLessThan("theartbeat", timelimit) columns = ['pandaid', 'pandastatus', 'proxyid', 'sitename', 'theartbeat'] jobs = dbpanda.getJobs(select, columns) if jobs: print( 'Found %d jobs with outdated heartbeat (older than %d seconds):\n' % (len(jobs), timelimit)) print('\t'.join( ['pandaid', 'site', 'status', 'theartbeat', 'Panda response'])) # Panda server for each proxy pandas = {} for job in jobs: proxyid = job['proxyid'] if proxyid not in pandas: panda = aCTPanda(log, dbarc.getProxyPath(proxyid)) pandas[proxyid] = panda response = pandas[proxyid].updateStatus(job['pandaid'], job['pandastatus']) print('\t'.join([ str(job['pandaid']), job['sitename'], job['pandastatus'], str(job['theartbeat']), str(response) ])) # update heartbeat time in the DB dbpanda.updateJob( job['pandaid'], {'theartbeat': dbpanda.getTimeStamp(time.time() + 1)})
def __init__(self): aCTATLASProcess.__init__(self) # Get DN from configured proxy file uc = arc.UserConfig() uc.ProxyPath(str(self.arcconf.get(['voms', 'proxypath']))) cred = arc.Credential(uc) dn = cred.GetIdentityName() self.log.info("Running under DN %s" % dn) # Keep a panda object per proxy. The site "type" maps to a specific # proxy role self.pandas = {} # Map the site type to a proxy id in proxies table # In future for analysis the id will change once the job is picked up self.proxymap = {} actp = aCTProxy.aCTProxy(self.log) for role in self.arcconf.getList(['voms', 'roles', 'item']): attr = '/atlas/Role=' + role proxyid = actp.getProxyId(dn, attr) if not proxyid: raise Exception("Proxy with DN " + dn + " and attribute " + attr + " was not found in proxies table") proxyfile = actp.path(dn, attribute=attr) # pilot role is mapped to analysis type psl = 'managed' if role == 'pilot': role = 'analysis' psl = 'user' self.proxymap['panda'] = proxyid self.pandas[role] = aCTPanda.aCTPanda(self.log, proxyfile) self.proxymap[psl] = proxyid # queue interval self.queuestamp = 0 # Register this aCT to APFMon self.apfmon.registerFactory() # AGIS queue info self.sites = {} # Panda info on activated jobs: {queue: {'rc_test': 2, 'rest': 40}} self.activated = {} # Flag for calling getJob no matter what to have a constant stream self.getjob = False
#!/usr/bin/python # # Get the list of jobs from panda for the configured scheduler ID and send a # heartbeat for starting and running jobs. # from act.atlas.aCTPanda import aCTPanda import logging import sys if len(sys.argv) != 2: print "Usage: fakeHeartbeats.py <path to proxy>" sys.exit(0) logging.basicConfig(level=logging.DEBUG) p = aCTPanda(logging.getLogger('test'), sys.argv[1]) for cloud in [ 'ND', 'CA', 'CERN', 'DE', 'ES', 'FR', 'IT', 'NL', 'TW', 'UK', 'US' ]: print 'Cloud', cloud jobs = p.queryJobInfo(cloud=cloud) for j in jobs: if j['jobStatus'] == 'starting' or j['jobStatus'] == 'running': print j p.updateStatus(j['PandaID'], j['jobStatus'])