def test_FullChain(self): """This test will - call all the WMSClient methods that will end up calling all the JobManager service methods - use the JobMonitoring to verify few properties - call the JobCleaningAgent to eliminate job entries from the DBs """ wmsClient = WMSClient() jobMonitor = JobMonitoringClient() jobStateUpdate = JobStateUpdateClient() # create the job job = helloWorldJob() jobDescription = createFile(job) # submit the job res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(res["OK"], res.get("Message")) self.assertTrue(isinstance(res["Value"], int), msg="Got %s" % type(res["Value"])) self.assertEqual(res["Value"], res["JobID"], msg="Got %s, expected %s" % (str(res["Value"]), res["JobID"])) jobID = res["JobID"] jobID = res["Value"] # updating the status res = jobStateUpdate.setJobStatus(jobID, JobStatus.RUNNING, "Executing Minchiapp", "source") self.assertTrue(res["OK"], res.get("Message")) # reset the job res = wmsClient.resetJob(jobID) self.assertTrue(res["OK"], res.get("Message")) # reschedule the job res = wmsClient.rescheduleJob(jobID) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobsStatus(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"][jobID]["Status"], JobStatus.RECEIVED, msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobsMinorStatus([jobID]) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"], {jobID: { "MinorStatus": "Job Rescheduled" }}, msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobsApplicationStatus([jobID]) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"], {jobID: { "ApplicationStatus": "Unknown" }}, msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobsStates([jobID]) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual( res["Value"], { jobID: { "Status": JobStatus.RECEIVED, "MinorStatus": "Job Rescheduled", "ApplicationStatus": "Unknown" } }, msg="Got %s" % str(res["Value"]), ) # updating the status again res = jobStateUpdate.setJobStatus(jobID, JobStatus.CHECKING, "checking", "source") self.assertTrue(res["OK"], res.get("Message")) res = jobStateUpdate.setJobStatus(jobID, JobStatus.WAITING, "waiting", "source") self.assertTrue(res["OK"], res.get("Message")) res = jobStateUpdate.setJobStatus(jobID, JobStatus.MATCHED, "matched", "source") self.assertTrue(res["OK"], res.get("Message")) # kill the job res = wmsClient.killJob(jobID) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobsStatus(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"][jobID]["Status"], JobStatus.KILLED, msg="Got %s" % str(res["Value"])) # delete the job - this will just set its status to "deleted" res = wmsClient.deleteJob(jobID) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobsStatus(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"][jobID]["Status"], JobStatus.DELETED, msg="Got %s" % str(res["Value"]))
def main(): site = "BOINC.World.org" status = ["Running"] minorStatus = None workerNodes = None since = None date = "today" full = False until = None batchIDs = None Script.registerSwitch("", "Site=", " Select site (default: %s)" % site) Script.registerSwitch("", "Status=", " Select status (default: %s)" % status) Script.registerSwitch("", "MinorStatus=", " Select minor status") Script.registerSwitch("", "WorkerNode=", " Select WN") Script.registerSwitch("", "BatchID=", " Select batch jobID") Script.registerSwitch( "", "Since=", " Date since when to select jobs, or number of days (default: today)" ) Script.registerSwitch("", "Date=", " Specify the date (check for a full day)") Script.registerSwitch( "", "Full", " Printout full list of job (default: False except if --WorkerNode)") Script.parseCommandLine() from DIRAC import gLogger from DIRAC.Interfaces.API.Dirac import Dirac from DIRAC.WorkloadManagementSystem.Client.JobMonitoringClient import JobMonitoringClient switches = Script.getUnprocessedSwitches() for switch in switches: if switch[0] == "Site": site = switch[1] elif switch[0] == "MinorStatus": minorStatus = switch[1] elif switch[0] == "Status": if switch[1].lower() == "all": status = [None] else: status = switch[1].split(",") elif switch[0] == "WorkerNode": workerNodes = switch[1].split(",") elif switch[0] == "BatchID": try: batchIDs = [int(id) for id in switch[1].split(",")] except Exception: gLogger.error("Invalid jobID", switch[1]) DIRAC.exit(1) elif switch[0] == "Full": full = True elif switch[0] == "Date": since = switch[1].split()[0] until = str( datetime.datetime.strptime(since, "%Y-%m-%d") + datetime.timedelta(days=1)).split()[0] elif switch[0] == "Since": date = switch[1].lower() if date == "today": since = None elif date == "yesterday": since = 1 elif date == "ever": since = 2 * 365 elif date.isdigit(): since = int(date) date += " days" else: since = date if isinstance(since, int): since = str(datetime.datetime.now() - datetime.timedelta(days=since)).split()[0] if workerNodes or batchIDs: # status = [None] full = True monitoring = JobMonitoringClient() dirac = Dirac() # Get jobs according to selection jobs = set() for stat in status: res = dirac.selectJobs(site=site, date=since, status=stat, minorStatus=minorStatus) if not res["OK"]: gLogger.error("Error selecting jobs", res["Message"]) DIRAC.exit(1) allJobs = set(int(job) for job in res["Value"]) if until: res = dirac.selectJobs(site=site, date=until, status=stat) if not res["OK"]: gLogger.error("Error selecting jobs", res["Message"]) DIRAC.exit(1) allJobs -= set(int(job) for job in res["Value"]) jobs.update(allJobs) if not jobs: gLogger.always("No jobs found...") DIRAC.exit(0) # res = monitoring.getJobsSummary( jobs ) # print eval( res['Value'] )[jobs[0]] allJobs = set() result = {} wnJobs = {} gLogger.always("%d jobs found" % len(jobs)) # Get host name for job in jobs: res = monitoring.getJobParameter(job, "HostName") node = res.get("Value", {}).get("HostName", "Unknown") res = monitoring.getJobParameter(job, "LocalJobID") batchID = res.get("Value", {}).get("LocalJobID", "Unknown") if workerNodes: if not [wn for wn in workerNodes if node.startswith(wn)]: continue allJobs.add(job) if batchIDs: if batchID not in batchIDs: continue allJobs.add(job) if full or status == [None]: allJobs.add(job) result.setdefault(job, {})["Status"] = status result[job]["Node"] = node result[job]["LocalJobID"] = batchID wnJobs[node] = wnJobs.setdefault(node, 0) + 1 # If necessary get jobs' status statusCounters = {} if allJobs: allJobs = sorted(allJobs, reverse=True) res = monitoring.getJobsStates(allJobs) if not res["OK"]: gLogger.error("Error getting job parameter", res["Message"]) else: jobStates = res["Value"] for job in allJobs: stat = ( jobStates.get(job, {}).get("Status", "Unknown") + "; " + jobStates.get(job, {}).get("MinorStatus", "Unknown") + "; " + jobStates.get(job, {}).get("ApplicationStatus", "Unknown")) result[job]["Status"] = stat statusCounters[stat] = statusCounters.setdefault(stat, 0) + 1 elif not workerNodes and not batchIDs: allJobs = sorted(jobs, reverse=True) # Print out result if workerNodes or batchIDs: gLogger.always("Found %d jobs at %s, WN %s (since %s):" % (len(allJobs), site, workerNodes, date)) if allJobs: gLogger.always("List of jobs:", ",".join([str(job) for job in allJobs])) else: if status == [None]: gLogger.always("Found %d jobs at %s (since %s):" % (len(allJobs), site, date)) for stat in sorted(statusCounters): gLogger.always("%d jobs %s" % (statusCounters[stat], stat)) else: gLogger.always("Found %d jobs %s at %s (since %s):" % (len(allJobs), status, site, date)) gLogger.always( "List of WNs:", ",".join([ "%s (%d)" % (node, wnJobs[node]) for node in sorted( wnJobs, key=cmp_to_key(lambda n1, n2: (wnJobs[n2] - wnJobs[n1]))) ]), ) if full: if workerNodes or batchIDs: nodeJobs = {} for job in allJobs: status = result[job]["Status"] node = result[job]["Node"].split(".")[0] jobID = result[job].get("LocalJobID") nodeJobs.setdefault(node, []).append((jobID, job, status)) if not workerNodes: workerNodes = sorted(nodeJobs) for node in workerNodes: for job in nodeJobs.get(node.split(".")[0], []): gLogger.always("%s " % node + "(%s): %s - %s" % job) else: for job in allJobs: status = result[job]["Status"] node = result[job]["Node"] jobID = result[job].get("LocalJobID") gLogger.always("%s (%s): %s - %s" % (node, jobID, job, status))