def WatchAndKill(pid, user=None, listOnly=False): print "WatchAndKill started with pid " + str(pid) procChk = {} procChk['^cmsRun ']=60*60*3 procChk['/testing\.log;\s*fi\s*$'] = 60*30 procChk['python\s+whiteRabbit.py\s+'] = 60*60*2 procChk['/IB/runTests.py\s+']=60*60*8 procs = ProcessTree(user, detail=True, xinfo=None, show=False) startTime = -1 while True: procs.updateSelected(None, None, pid) if (len(procs.selectedTree)==0) or (procs.all[pid].stime <= startTime): break startTime = procs.all[pid].stime for p in procs.selectedTree: proc = procs.all[p] maxRunAge=0 for cmd in procChk: if re.search(cmd,proc.arg) and (proc.stime > procChk[cmd]): print "Job reached max allocated time: ",proc.arg, ' ===> ', proc.stime,'/',procChk[cmd] proc2kill = ProcessTree(user, detail=True, xinfo=None, show=True) if listOnly: proc2kill.updateSelected(None, None, p) else: proc2kill.killSelected(None, None, p) time.sleep(60*10) procs.allProcesses(user) return
def startIBCancellationWatcher(pid, request_id): print "IBCancellationWatcher started with pid %s and request_id %s" % (str(pid), str(request_id)) procs = ProcessTree(user=None, detail=True, xinfo=None, show=False) while True: time.sleep(60) try: procs.updateSelected(None, None, pid) if (len(procs.selectedTree)==0): print "PID %s doesnt's exist anymore, exiting..." % str(pid) break data = None try: data = tagCollectorAPI.getRequestData(request_id) except: pass if data['state'] == "Cancelled" or data['state'] == "Failed": print "Found status Cancelled or Failed for request %s" % str(request_id) proc2kill = ProcessTree(user=None, detail=True, xinfo=None, show=True) proc2kill.killSelected(None, None, pid) break procs.allProcesses(None) except: print "Error: An exception occured while checking cancelled status" return
def WatchAndKill(pid, user=None, listOnly=False): print "WatchAndKill started with pid " + str(pid) procChk = {} procChk["^cmsRun "] = [60 * 60 * 4, False] procChk["/testing\.log;\s*fi\s*$"] = [60 * 30, False] procChk["python\s+whiteRabbit.py\s+"] = [60 * 60 * 2, False] procChk["/runTheMatrix.py\s+"] = [60 * 60 * 8, True] # Kill child only procChk["/IB/runTests.py\s+"] = [60 * 60 * 12, True] # Kill child only procs = ProcessTree(user, detail=True, xinfo=None, show=False) startTime = -1 while True: procs.updateSelected(None, None, pid) if (len(procs.selectedTree) == 0) or (procs.all[pid].stime <= startTime): break startTime = procs.all[pid].stime for p in procs.selectedTree: proc = procs.all[p] maxRunAge = 0 for cmd in procChk: if re.search(cmd, proc.arg) and (proc.stime > procChk[cmd][0]): print "Job reached max allocated time: ", proc.arg, " ===> ", proc.stime, "/", procChk[cmd][0] proc2kill = ProcessTree(user, detail=True, xinfo=None, show=True) if listOnly: proc2kill.updateSelected(None, None, p) elif procChk[cmd][1]: for cp in proc.child: proc2kill.killSelected(None, None, cp) else: proc2kill.killSelected(None, None, p) time.sleep(10) procs.allProcesses(user) return
def WatchAndKill(pid, user=None, listOnly=False): print "WatchAndKill started with pid " + str(pid) procChk = {} procChk['^cmsRun '] = [60 * 60 * 4, False] procChk['/testing\.log;\s*fi\s*$'] = [60 * 30, False] procChk['python\s+whiteRabbit.py\s+'] = [60 * 60 * 2, False] procChk['/runTheMatrix.py\s+'] = [60 * 60 * 8, True] #Kill child only procChk['/IB/runTests.py\s+'] = [60 * 60 * 12, True] #Kill child only procs = ProcessTree(user, detail=True, xinfo=None, show=False) startTime = -1 while True: procs.updateSelected(None, None, pid) if (len(procs.selectedTree) == 0) or (procs.all[pid].stime <= startTime): break startTime = procs.all[pid].stime for p in procs.selectedTree: proc = procs.all[p] maxRunAge = 0 for cmd in procChk: if re.search(cmd, proc.arg) and (proc.stime > procChk[cmd][0]): print "Job reached max allocated time: ", proc.arg, ' ===> ', proc.stime, '/', procChk[ cmd][0] proc2kill = ProcessTree(user, detail=True, xinfo=None, show=True) if listOnly: proc2kill.updateSelected(None, None, p) elif procChk[cmd][1]: for cp in proc.child: proc2kill.killSelected(None, None, cp) else: proc2kill.killSelected(None, None, p) time.sleep(10) procs.allProcesses(user) return