def test_ParametricChain(self): """ This test will submit a parametric job which should generate 3 actual jobs """ wmsClient = WMSClient() jobStateUpdate = JobStateUpdateClient() jobMonitor = JobMonitoringClient() # create the job job = parametricJob() jobDescription = createFile(job) # submit the job result = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(result['OK']) jobIDList = result['Value'] self.assertEqual(len(jobIDList), 3) result = jobMonitor.getJobsParameters(jobIDList, ['JobName']) self.assertTrue(result['OK']) jobNames = [result['Value'][jobID]['JobName'] for jobID in result['Value']] self.assertEqual(set(jobNames), set(['parametric_helloWorld_%s' % nJob for nJob in range(3)])) for jobID in jobIDList: result = jobStateUpdate.setJobStatus(jobID, 'Done', 'matching', 'source') self.assertTrue(result['OK']) result = wmsClient.deleteJob(jobIDList) self.assertTrue(result['OK']) for jobID in jobIDList: result = jobMonitor.getJobStatus(jobID) self.assertTrue(result['OK']) self.assertEqual(result['Value'], 'Deleted')
def __report(self, jobID, status, minorStatus): """Wraps around setJobStatus of state update client """ jobStatus = JobStateUpdateClient().setJobStatus( int(jobID), status, minorStatus, 'JobAgent@%s' % self.siteName) self.log.verbose( 'Setting job status', 'setJobStatus(%s,%s,%s,%s)' % (jobID, status, minorStatus, 'JobAgent@%s' % self.siteName)) if not jobStatus['OK']: self.log.warn('Issue setting the job status', jobStatus['Message']) return jobStatus
def __setJobParam(self, jobID, name, value): """Wraps around setJobParameter of state update client """ jobParam = JobStateUpdateClient().setJobParameter( int(jobID), str(name), str(value)) self.log.verbose('Setting job parameter', 'setJobParameter(%s,%s,%s)' % (jobID, name, value)) if not jobParam['OK']: self.log.warn('Issue setting the job parameter', jobParam['Message']) return jobParam
def __setJobParamList(self, value): """Wraps around setJobParameters of state update client""" # job wrapper template sets the jobID variable if "JOBID" not in os.environ: self.log.info("Running without JOBID so parameters will not be reported") return S_OK() jobID = os.environ["JOBID"] jobParam = JobStateUpdateClient().setJobParameters(int(jobID), value) self.log.verbose("setJobParameters(%s,%s)" % (jobID, value)) if not jobParam["OK"]: self.log.warn(jobParam["Message"]) return jobParam
def __sendSignOfLife(self, jobID, heartBeatDict, staticParamDict): """Sends sign of life 'heartbeat' signal and triggers control signal interpretation. """ result = JobStateUpdateClient().sendHeartBeat(jobID, heartBeatDict, staticParamDict) if not result["OK"]: self.log.warn("Problem sending sign of life") self.log.warn(result) if result["OK"] and result["Value"]: self.__interpretControlSignal(result["Value"]) return result
def test_ParametricChain(self): """This test will submit a parametric job which should generate 3 actual jobs""" wmsClient = WMSClient() jobStateUpdate = JobStateUpdateClient() jobMonitor = JobMonitoringClient() # create the job job = parametricJob() jobDescription = createFile(job) # submit the job res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(res["OK"], res.get("Message")) jobIDList = res["Value"] self.assertEqual(len(jobIDList), 3, msg="Got %s" % str(jobIDList)) res = jobMonitor.getJobsParameters(jobIDList, ["JobName"]) self.assertTrue(res["OK"], res.get("Message")) jobNames = [res["Value"][jobID]["JobName"] for jobID in res["Value"]] self.assertEqual( set(jobNames), set(["parametric_helloWorld_%s" % nJob for nJob in range(3)])) for jobID in jobIDList: res = jobStateUpdate.setJobStatus(jobID, JobStatus.CHECKING, "checking", "source") self.assertTrue(res["OK"], res.get("Message")) res = wmsClient.deleteJob(jobIDList) self.assertTrue(res["OK"], res.get("Message")) print(res) for jobID in jobIDList: res = jobMonitor.getJobsStatus(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"][jobID]["Status"], JobStatus.DELETED, msg="Got %s" % str(res["Value"]))
def test_ParametricChain(self): """ This test will submit a parametric job which should generate 3 actual jobs """ wmsClient = WMSClient() jobStateUpdate = JobStateUpdateClient() jobMonitor = JobMonitoringClient() # create the job job = parametricJob() jobDescription = createFile(job) # submit the job result = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(result['OK']) jobIDList = result['Value'] self.assertEqual(len(jobIDList), 3) result = jobMonitor.getJobsParameters(jobIDList, ['JobName']) self.assertTrue(result['OK']) jobNames = [ result['Value'][jobID]['JobName'] for jobID in result['Value'] ] self.assertEqual( set(jobNames), set(['parametric_helloWorld_%s' % nJob for nJob in range(3)])) for jobID in jobIDList: result = jobStateUpdate.setJobStatus(jobID, 'Done', 'matching', 'source') self.assertTrue(result['OK']) result = wmsClient.deleteJob(jobIDList) self.assertTrue(result['OK']) for jobID in jobIDList: result = jobMonitor.getJobStatus(jobID) self.assertTrue(result['OK']) self.assertEqual(result['Value'], 'Deleted')
def __setJobParamList(self, value): """Wraps around setJobParameters of state update client """ # job wrapper template sets the jobID variable if 'JOBID' not in os.environ: self.log.info('Running without JOBID so parameters will not be reported') return S_OK() jobID = os.environ['JOBID'] jobParam = JobStateUpdateClient().setJobParameters(int(jobID), value) self.log.verbose('setJobParameters(%s,%s)' % (jobID, value)) if not jobParam['OK']: self.log.warn(jobParam['Message']) return jobParam
def __setJobParam(self, name, value): """Wraps around setJobParameter of state update client""" if not self.jobID: return S_ERROR("JobID not defined") self.log.verbose( "setting job parameters", "setJobParameter(%s,%s,%s)" % (self.jobID, name, value)) jobParam = JobStateUpdateClient().setJobParameter( int(self.jobID), str(name), str(value)) if not jobParam["OK"]: self.log.warn("Failed to set job parameters", jobParam["Message"]) return jobParam
def __setJobParam(self, name, value): """Wraps around setJobParameter of state update client """ if not self.jobID: return S_ERROR('JobID not defined') jobParam = JobStateUpdateClient().setJobParameter( int(self.jobID), str(name), str(value)) self.log.verbose('setJobParameter(%s,%s,%s)' % (self.jobID, name, value)) if not jobParam['OK']: self.log.warn(jobParam['Message']) return jobParam
def __init__(self, transformationID, transInfoDict, enabled, tClient, fcClient, jobMon): """Store clients etc.""" self.log = gLogger.getSubLogger(__name__ + "[%s]" % transformationID) self.enabled = enabled self.tID = transformationID self.transName = transInfoDict['TransformationName'] self.tClient = tClient self.jobMon = jobMon self.fcClient = fcClient self.transType = transInfoDict['Type'] self.authorDN = transInfoDict['AuthorDN'] self.authorGroup = transInfoDict['AuthorGroup'] self.jobStateClient = JobStateUpdateClient()
def __call__(self): """ update the job status """ # # decode arguments jobID = self.operation.Arguments self.log.info("Performing callback to job %s" % jobID) res = JobStateUpdateClient().updateJobFromStager(jobID, 'Done') if not res['OK']: self.log.error("Error performing the callback to the job", res) return res self.operation.Status = "Done" self.log.info("Callback from staging done") return S_OK()
def sendStoredJobParameters(self): """ Send the job parameters stored in the internal cache """ parameters = [[pname, value[0]] for pname, value in self.jobParameters.items()] if parameters: result = JobStateUpdateClient().setJobParameters( self.jobID, parameters) if result['OK']: # Empty the internal parameter container self.jobParameters = {} return result else: return S_OK('Empty')
def test_matcher(self): # insert a proper DN to run the test resourceDescription = { 'OwnerGroup': 'prod', 'OwnerDN': '/C=ch/O=DIRAC/OU=DIRAC CI/CN=ciuser/[email protected]', 'DIRACVersion': 'pippo', 'ReleaseVersion': 'blabla', 'VirtualOrganization': 'LHCb', 'PilotInfoReportedFlag': 'True', 'PilotBenchmark': 'anotherPilot', 'Site': 'DIRAC.Jenkins.ch', 'CPUTime': 86400 } wmsClient = WMSClient() job = helloWorldJob() job.setDestination('DIRAC.Jenkins.ch') job.setInputData('/a/bbb') job.setType('User') jobDescription = createFile(job) res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(res['OK']) jobID = res['Value'] res = JobStateUpdateClient().setJobStatus(jobID, 'Waiting', 'matching', 'source') self.assertTrue(res['OK']) tqDB = TaskQueueDB() tqDefDict = { 'OwnerDN': '/C=ch/O=DIRAC/OU=DIRAC CI/CN=ciuser/[email protected]', 'OwnerGroup': 'prod', 'Setup': 'dirac-JenkinsSetup', 'CPUTime': 86400 } res = tqDB.insertJob(jobID, tqDefDict, 10) self.assertTrue(res['OK']) res = MatcherClient().requestJob(resourceDescription) print(res) self.assertTrue(res['OK']) wmsClient.deleteJob(jobID)
def sendStoredJobParameters(self): """ Send the job parameters stored in the internal cache """ parameters = [] for pname, value in self.jobParameters.items(): pvalue, _timeStamp = value parameters.append([pname, pvalue]) if parameters: result = JobStateUpdateClient().setJobParameters( self.jobID, parameters) if not result['OK']: return result if result['OK']: # Empty the internal parameter container self.jobParameters = {} return result else: return S_OK('Empty')
def test_matcher(self): # insert a proper DN to run the test resourceDescription = { "OwnerGroup": "prod", "OwnerDN": "/C=ch/O=DIRAC/OU=DIRAC CI/CN=ciuser", "DIRACVersion": "pippo", "GridCE": "some.grid.ce.org", "ReleaseVersion": "blabla", "VirtualOrganization": "LHCb", "PilotInfoReportedFlag": "True", "PilotBenchmark": "anotherPilot", "Site": "DIRAC.Jenkins.ch", "CPUTime": 86400, } wmsClient = WMSClient() job = helloWorldJob() job.setDestination("DIRAC.Jenkins.ch") job.setInputData("/a/bbb") job.setType("User") jobDescription = createFile(job) res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(res["OK"], res.get("Message")) jobID = res["Value"] # forcing the update res = JobStateUpdateClient().setJobStatus(jobID, JobStatus.WAITING, "matching", "source", None, True) self.assertTrue(res["OK"], res.get("Message")) tqDB = TaskQueueDB() tqDefDict = { "OwnerDN": "/C=ch/O=DIRAC/OU=DIRAC CI/CN=ciuser", "OwnerGroup": "prod", "Setup": "dirac-JenkinsSetup", "CPUTime": 86400, } res = tqDB.insertJob(jobID, tqDefDict, 10) self.assertTrue(res["OK"], res.get("Message")) res = MatcherClient().requestJob(resourceDescription) print(res) self.assertTrue(res["OK"], res.get("Message")) wmsClient.deleteJob(jobID)
def test_FullChain(self): """ This test will - call all the WMSClient methods that will end up calling all the JobManager service methods - use the JobMonitoring to verify few properties - call the JobCleaningAgent to eliminate job entries from the DBs """ wmsClient = WMSClient() jobMonitor = JobMonitoringClient() jobStateUpdate = JobStateUpdateClient() # create the job job = helloWorldJob() jobDescription = createFile(job) # submit the job res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(res['OK']) self.assertTrue(isinstance(res['Value'], int)) self.assertEqual(res['Value'], res['JobID']) jobID = res['JobID'] jobID = res['Value'] # updating the status jobStateUpdate.setJobStatus(jobID, 'Running', 'Executing Minchiapp', 'source') # reset the job res = wmsClient.resetJob(jobID) self.assertTrue(res['OK']) # reschedule the job res = wmsClient.rescheduleJob(jobID) self.assertTrue(res['OK']) res = jobMonitor.getJobStatus(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Received') # updating the status again jobStateUpdate.setJobStatus(jobID, 'Matched', 'matching', 'source') # kill the job res = wmsClient.killJob(jobID) self.assertTrue(res['OK']) res = jobMonitor.getJobStatus(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Killed') # updating the status aaaagain jobStateUpdate.setJobStatus(jobID, 'Done', 'matching', 'source') # kill the job res = wmsClient.killJob(jobID) self.assertTrue(res['OK']) res = jobMonitor.getJobStatus(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Done') # this time it won't kill... it's done! # delete the job - this will just set its status to "deleted" res = wmsClient.deleteJob(jobID) self.assertTrue(res['OK']) res = jobMonitor.getJobStatus(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Deleted')
def test_JobStateUpdateAndJobMonitoringMultuple(self): """ # Now, let's submit some jobs. Different sites, types, inputs """ wmsClient = WMSClient() jobMonitor = JobMonitoringClient() jobStateUpdate = JobStateUpdateClient() jobIDs = [] lfnss = [['/a/1.txt', '/a/2.txt'], ['/a/1.txt', '/a/3.txt', '/a/4.txt'], []] types = ['User', 'Test'] for lfns in lfnss: for jobType in types: job = helloWorldJob() job.setDestination('DIRAC.Jenkins.ch') job.setInputData(lfns) job.setType(jobType) jobDescription = createFile(job) res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(res['OK']) jobID = res['Value'] jobIDs.append(jobID) res = jobMonitor.getSites() self.assertTrue(res['OK']) self.assertTrue(set(res['Value']) <= {'ANY', 'DIRAC.Jenkins.ch'}) res = jobMonitor.getJobTypes() self.assertTrue(res['OK']) self.assertEqual(sorted(res['Value']), sorted(types)) res = jobMonitor.getApplicationStates() self.assertTrue(res['OK']) self.assertEqual(sorted(res['Value']), sorted(['Unknown'])) res = jobMonitor.getOwners() self.assertTrue(res['OK']) res = jobMonitor.getOwnerGroup() self.assertTrue(res['OK']) res = jobMonitor.getProductionIds() self.assertTrue(res['OK']) res = jobMonitor.getJobGroups() self.assertTrue(res['OK']) res = jobMonitor.getStates() self.assertTrue(res['OK']) self.assertTrue(sorted(res['Value']) in [['Received'], sorted(['Received', 'Waiting'])]) res = jobMonitor.getMinorStates() self.assertTrue(res['OK']) self.assertTrue(sorted(res['Value']) in [['Job accepted'], sorted(['Job accepted', 'Job Rescheduled'])]) self.assertTrue(res['OK']) res = jobMonitor.getJobs() self.assertTrue(res['OK']) self.assertTrue(set([str(x) for x in jobIDs]) <= set(res['Value'])) # res = jobMonitor.getCounters(attrList) # self.assertTrue(res['OK']) res = jobMonitor.getCurrentJobCounters() self.assertTrue(res['OK']) try: self.assertTrue( res['Value'].get('Received') + res['Value'].get('Waiting') >= long( len(lfnss) * len(types))) except TypeError: pass res = jobMonitor.getJobsSummary(jobIDs) self.assertTrue(res['OK']) res = jobMonitor.getJobPageSummaryWeb({}, [], 0, 100) self.assertTrue(res['OK']) res = jobStateUpdate.setJobStatusBulk(jobID, {str(datetime.datetime.utcnow()): {'Status': 'Running', 'MinorStatus': 'MinorStatus', 'ApplicationStatus': 'ApplicationStatus', 'Source': 'Unknown'}}) self.assertTrue(res['OK']) res = jobStateUpdate.setJobsParameter({jobID: ['Status', 'Running']}) self.assertTrue(res['OK']) # delete the jobs - this will just set its status to "deleted" wmsClient.deleteJob(jobIDs)
def test_JobStateUpdateAndJobMonitoring(self): """ Verifying all JobStateUpdate and JobMonitoring functions """ wmsClient = WMSClient() jobMonitor = JobMonitoringClient() jobStateUpdate = JobStateUpdateClient() # create a job and check stuff job = helloWorldJob() jobDescription = createFile(job) # submitting the job. Checking few stuff res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(res['OK']) jobID = int(res['Value']) # jobID = res['JobID'] res = jobMonitor.getJobJDL(jobID, True) self.assertTrue(res['OK']) res = jobMonitor.getJobJDL(jobID, False) self.assertTrue(res['OK']) res = jobMonitor.getJobsParameters([jobID], []) self.assertTrue(res['OK']) self.assertEqual(res['Value'], {}) res = jobMonitor.getJobsParameters([jobID], ['Owner']) self.assertTrue(res['OK']) # Adding stuff res = jobStateUpdate.setJobStatus(jobID, 'Matched', 'matching', 'source') self.assertTrue(res['OK']) res = jobStateUpdate.setJobParameters(jobID, [('par1', 'par1Value'), ('par2', 'par2Value')]) self.assertTrue(res['OK']) res = jobStateUpdate.setJobApplicationStatus(jobID, 'app status', 'source') self.assertTrue(res['OK']) # res = jobStateUpdate.setJobFlag() # self.assertTrue(res['OK']) # res = jobStateUpdate.unsetJobFlag() # self.assertTrue(res['OK']) res = jobStateUpdate.setJobSite(jobID, 'Site') self.assertTrue(res['OK']) # res = jobMonitor.traceJobParameter( 'Site', 1, 'Status' ) # self.assertTrue(res['OK']) # now checking few things res = jobMonitor.getJobStatus(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Running') res = jobMonitor.getJobParameter(jobID, 'par1') self.assertTrue(res['OK']) self.assertEqual(res['Value'], {'par1': 'par1Value'}) res = jobMonitor.getJobParameters(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], {'par1': 'par1Value', 'par2': 'par2Value'}) res = jobMonitor.getJobAttribute(jobID, 'Site') self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Site') res = jobMonitor.getJobAttributes(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value']['ApplicationStatus'], 'app status') self.assertEqual(res['Value']['JobName'], 'helloWorld') res = jobMonitor.getJobSummary(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value']['ApplicationStatus'], 'app status') self.assertEqual(res['Value']['Status'], 'Running') res = jobMonitor.getJobHeartBeatData(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], []) res = jobMonitor.getInputData(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], []) res = jobMonitor.getJobPrimarySummary(jobID) self.assertTrue(res['OK']) res = jobMonitor.getAtticJobParameters(jobID) self.assertTrue(res['OK']) res = jobStateUpdate.setJobsStatus([jobID], 'Done', 'MinorStatus', 'Unknown') self.assertTrue(res['OK']) res = jobMonitor.getJobSummary(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value']['Status'], 'Done') self.assertEqual(res['Value']['MinorStatus'], 'MinorStatus') self.assertEqual(res['Value']['ApplicationStatus'], 'app status') res = jobStateUpdate.sendHeartBeat(jobID, {'bih': 'bih'}, {'boh': 'boh'}) self.assertTrue(res['OK']) # delete the job - this will just set its status to "deleted" wmsClient.deleteJob(jobID)
def test_JobStateUpdateAndJobMonitoring(self): """Verifying all JobStateUpdate and JobMonitoring functions""" wmsClient = WMSClient() jobMonitor = JobMonitoringClient() jobStateUpdate = JobStateUpdateClient() # create a job and check stuff job = helloWorldJob() jobDescription = createFile(job) # submitting the job. Checking few stuff res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(res["OK"], res.get("Message")) jobID = int(res["Value"]) # jobID = res['JobID'] res = jobMonitor.getJobJDL(jobID, True) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobJDL(jobID, False) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobsParameters([jobID], []) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobOwner(jobID) self.assertTrue(res["OK"], res.get("Message")) # Adding stuff # forcing the update res = jobStateUpdate.setJobStatus(jobID, JobStatus.RUNNING, "running", "source", None, True) self.assertTrue(res["OK"], res.get("Message")) res = jobStateUpdate.setJobParameters(jobID, [("par1", "par1Value"), ("par2", "par2Value")]) time.sleep(5) self.assertTrue(res["OK"], res.get("Message")) res = jobStateUpdate.setJobApplicationStatus(jobID, "app status", "source") self.assertTrue(res["OK"], res.get("Message")) # res = jobStateUpdate.setJobFlag() # self.assertTrue(res['OK'], res.get('Message')) # res = jobStateUpdate.unsetJobFlag() # self.assertTrue(res['OK'], res.get('Message')) res = jobStateUpdate.setJobSite(jobID, "Site") self.assertTrue(res["OK"], res.get("Message")) # now checking few things res = jobMonitor.getJobsStatus(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"][jobID]["Status"], JobStatus.RUNNING, msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobParameter(jobID, "par1") self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"], {"par1": "par1Value"}, msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobParameters(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"], {jobID: { "par1": "par1Value", "par2": "par2Value" }}, msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobParameters(jobID, "par1") self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"], {jobID: { "par1": "par1Value" }}, msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobAttribute(jobID, "Site") self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"], "Site", msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobAttributes(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"]["ApplicationStatus"], "app status", msg="Got %s" % str(res["Value"]["ApplicationStatus"])) self.assertEqual(res["Value"]["JobName"], "helloWorld", msg="Got %s" % str(res["Value"]["JobName"])) res = jobMonitor.getJobSummary(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"]["ApplicationStatus"], "app status", msg="Got %s" % str(res["Value"]["ApplicationStatus"])) self.assertEqual(res["Value"]["Status"], JobStatus.RUNNING, msg="Got %s" % str(res["Value"]["Status"])) res = jobMonitor.getJobHeartBeatData(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"], [], msg="Got %s" % str(res["Value"])) res = jobMonitor.getInputData(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"], [], msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobSummary(jobID) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getAtticJobParameters(jobID) self.assertTrue(res["OK"], res.get("Message")) res = jobStateUpdate.setJobStatus(jobID, JobStatus.DONE, "MinorStatus", "Unknown") self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobSummary(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"]["Status"], JobStatus.DONE, msg="Got %s" % str(res["Value"]["Status"])) self.assertEqual(res["Value"]["MinorStatus"], "MinorStatus", msg="Got %s" % str(res["Value"]["MinorStatus"])) self.assertEqual(res["Value"]["ApplicationStatus"], "app status", msg="Got %s" % str(res["Value"]["ApplicationStatus"])) res = jobStateUpdate.sendHeartBeat(jobID, {"bih": "bih"}, {"boh": "boh"}) self.assertTrue(res["OK"], res.get("Message")) # delete the job - this will just set its status to "deleted" wmsClient.deleteJob(jobID)
def test_JobStateUpdateAndJobMonitoringMultuple(self): """# Now, let's submit some jobs. Different sites, types, inputs""" wmsClient = WMSClient() jobMonitor = JobMonitoringClient() jobStateUpdate = JobStateUpdateClient() jobIDs = [] lfnss = [["/a/1.txt", "/a/2.txt"], ["/a/1.txt", "/a/3.txt", "/a/4.txt"], []] types = ["User", "Test"] for lfns in lfnss: for jobType in types: job = helloWorldJob() job.setDestination("DIRAC.Jenkins.ch") job.setInputData(lfns) job.setType(jobType) jobDescription = createFile(job) res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(res["OK"], res.get("Message")) jobID = res["Value"] jobIDs.append(jobID) res = jobMonitor.getSites() print(res) self.assertTrue(res["OK"], res.get("Message")) self.assertTrue( set(res["Value"]) <= {"ANY", "DIRAC.Jenkins.ch", "Site"}, msg="Got %s" % res["Value"]) res = jobMonitor.getJobTypes() self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(sorted(res["Value"]), sorted(types), msg="Got %s" % str(sorted(res["Value"]))) res = jobMonitor.getApplicationStates() self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"], ["app status", "Unknown"], msg="Got %s" % str(res["Value"])) res = jobMonitor.getOwners() self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getOwnerGroup() self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getProductionIds() self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobGroups() self.assertTrue(res["OK"], res.get("Message")) resJG_empty = res["Value"] res = jobMonitor.getJobGroups(None, datetime.datetime.utcnow()) self.assertTrue(res["OK"], res.get("Message")) resJG_olderThanNow = res["Value"] self.assertEqual(resJG_empty, resJG_olderThanNow) res = jobMonitor.getJobGroups( None, datetime.datetime.utcnow() - datetime.timedelta(days=365)) self.assertTrue(res["OK"], res.get("Message")) resJG_olderThanOneYear = res["Value"] self.assertTrue( set(resJG_olderThanOneYear).issubset(set(resJG_olderThanNow)), resJG_olderThanOneYear) res = jobMonitor.getStates() self.assertTrue(res["OK"], res.get("Message")) self.assertTrue( sorted(res["Value"]) in [[JobStatus.RECEIVED], sorted([JobStatus.RECEIVED, JobStatus.KILLED])], res["Value"]) res = jobMonitor.getMinorStates() self.assertTrue(res["OK"], res.get("Message")) self.assertTrue( sorted(res["Value"]) in [ ["Job accepted"], sorted(["Job accepted", "Job Rescheduled"]), sorted(["Job accepted", "Marked for termination"]), ], res["Value"], ) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobs() self.assertTrue(res["OK"], res.get("Message")) self.assertTrue( set([str(x) for x in jobIDs]) <= set(res["Value"]), res["Value"]) # res = jobMonitor.getCounters(attrList) # self.assertTrue(res['OK'], res.get('Message')) res = jobMonitor.getJobsSummary(jobIDs) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobPageSummaryWeb({}, [], 0, 100) self.assertTrue(res["OK"], res.get("Message")) res = jobStateUpdate.setJobStatusBulk( jobID, { str(datetime.datetime.utcnow()): { "Status": JobStatus.CHECKING, "MinorStatus": "MinorStatus", "Source": "Unknown", } }, False, ) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobSummary(int(jobID)) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"]["Status"], JobStatus.CHECKING) self.assertEqual(res["Value"]["MinorStatus"], "MinorStatus") res = jobStateUpdate.setJobStatusBulk( jobID, { str(datetime.datetime.utcnow() + datetime.timedelta(hours=1)): { "Status": JobStatus.WAITING, "MinorStatus": "MinorStatus", "Source": "Unknown", }, str(datetime.datetime.utcnow() + datetime.timedelta(hours=2)): { "Status": JobStatus.MATCHED, "MinorStatus": "MinorStatus-matched", "Source": "Unknown", }, }, False, ) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobSummary(int(jobID)) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"]["Status"], JobStatus.MATCHED) self.assertEqual(res["Value"]["MinorStatus"], "MinorStatus-matched") res = jobStateUpdate.setJobsParameter({jobID: ["Whatever", "booh"]}) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobSummary(int(jobID)) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"]["Status"], JobStatus.MATCHED) self.assertEqual(res["Value"]["MinorStatus"], "MinorStatus-matched") res = jobStateUpdate.setJobAttribute(jobID, "Status", JobStatus.RUNNING) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobSummary(int(jobID)) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"]["Status"], JobStatus.RUNNING) # delete the jobs - this will just set its status to "deleted" wmsClient.deleteJob(jobIDs)
def test_JobStateUpdateAndJobMonitoring(self): """ Verifying all JobStateUpdate and JobMonitoring functions """ wmsClient = WMSClient() jobMonitor = JobMonitoringClient() jobStateUpdate = JobStateUpdateClient() # create a job and check stuff job = helloWorldJob() jobDescription = createFile(job) # submitting the job. Checking few stuff res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(res['OK']) jobID = int(res['Value']) # jobID = res['JobID'] res = jobMonitor.getJobJDL(jobID, True) self.assertTrue(res['OK']) res = jobMonitor.getJobJDL(jobID, False) self.assertTrue(res['OK']) res = jobMonitor.getJobsParameters([jobID], []) self.assertTrue(res['OK']) self.assertEqual(res['Value'], {}) res = jobMonitor.getJobsParameters([jobID], ['Owner']) self.assertTrue(res['OK']) # Adding stuff res = jobStateUpdate.setJobStatus(jobID, 'Matched', 'matching', 'source') self.assertTrue(res['OK']) res = jobStateUpdate.setJobParameters(jobID, [('par1', 'par1Value'), ('par2', 'par2Value')]) self.assertTrue(res['OK']) res = jobStateUpdate.setJobApplicationStatus(jobID, 'app status', 'source') self.assertTrue(res['OK']) # res = jobStateUpdate.setJobFlag() # self.assertTrue(res['OK']) # res = jobStateUpdate.unsetJobFlag() # self.assertTrue(res['OK']) res = jobStateUpdate.setJobSite(jobID, 'Site') self.assertTrue(res['OK']) # res = jobMonitor.traceJobParameter( 'Site', 1, 'Status' ) # self.assertTrue(res['OK']) # now checking few things res = jobMonitor.getJobStatus(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Running') res = jobMonitor.getJobParameter(jobID, 'par1') self.assertTrue(res['OK']) self.assertEqual(res['Value'], {'par1': 'par1Value'}) res = jobMonitor.getJobParameters(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], {jobID: {'par1': 'par1Value', 'par2': 'par2Value'}}) res = jobMonitor.getJobParameters(jobID, 'par1') self.assertTrue(res['OK']) self.assertEqual(res['Value'], {jobID: {'par1': 'par1Value'}}) res = jobMonitor.getJobAttribute(jobID, 'Site') self.assertTrue(res['OK']) self.assertEqual(res['Value'], 'Site') res = jobMonitor.getJobAttributes(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value']['ApplicationStatus'], 'app status') self.assertEqual(res['Value']['JobName'], 'helloWorld') res = jobMonitor.getJobSummary(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value']['ApplicationStatus'], 'app status') self.assertEqual(res['Value']['Status'], 'Running') res = jobMonitor.getJobHeartBeatData(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], []) res = jobMonitor.getInputData(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value'], []) res = jobMonitor.getJobPrimarySummary(jobID) self.assertTrue(res['OK']) res = jobMonitor.getAtticJobParameters(jobID) self.assertTrue(res['OK']) res = jobStateUpdate.setJobsStatus([jobID], 'Done', 'MinorStatus', 'Unknown') self.assertTrue(res['OK']) res = jobMonitor.getJobSummary(jobID) self.assertTrue(res['OK']) self.assertEqual(res['Value']['Status'], 'Done') self.assertEqual(res['Value']['MinorStatus'], 'MinorStatus') self.assertEqual(res['Value']['ApplicationStatus'], 'app status') res = jobStateUpdate.sendHeartBeat(jobID, {'bih': 'bih'}, {'boh': 'boh'}) self.assertTrue(res['OK']) # delete the job - this will just set its status to "deleted" wmsClient.deleteJob(jobID)
class TransformationInfo(object): """Hold information about a transformation.""" def __init__(self, transformationID, transInfoDict, enabled, tClient, fcClient, jobMon): """Store clients etc.""" self.log = gLogger.getSubLogger(__name__ + "[%s]" % transformationID) self.enabled = enabled self.tID = transformationID self.transName = transInfoDict['TransformationName'] self.tClient = tClient self.jobMon = jobMon self.fcClient = fcClient self.transType = transInfoDict['Type'] self.authorDN = transInfoDict['AuthorDN'] self.authorGroup = transInfoDict['AuthorGroup'] self.jobStateClient = JobStateUpdateClient() def checkTasksStatus( self ): """Check the status for the task of given transformation and taskID""" res = self.tClient.getTransformationFiles( condDict = { 'TransformationID': self.tID } ) if not res['OK']: raise RuntimeError( "Failed to get transformation tasks: %s" % res['Message'] ) tasksDict = {} for task in res['Value']: taskID = task['TaskID'] lfn = task['LFN'] status = task['Status'] fileID = task['FileID'] errorCount = task['ErrorCount'] tasksDict[taskID] = dict( FileID=fileID, LFN=lfn, Status=status, ErrorCount=errorCount ) return tasksDict def setJobDone( self, job ): """ set the taskID to Done""" if not self.enabled: return self.__setTaskStatus( job, 'Done' ) if job.status != 'Done': self.__updateJobStatus( job.jobID, 'Done', "Job forced to Done" ) def setJobFailed( self, job ): """ set the taskID to Done""" if not self.enabled: return self.__setTaskStatus( job, 'Failed' ) if job.status != 'Failed': self.__updateJobStatus( job.jobID, "Failed", "Job forced to Failed" ) def setInputUnused( self, job ): """set the inputfile to unused""" self.__setInputStatus( job, "Unused" ) def setInputMaxReset( self, job ): """set the inputfile to MaxReset""" self.__setInputStatus( job, "MaxReset" ) def setInputProcessed( self, job ): """set the inputfile to processed""" self.__setInputStatus( job, "Processed" ) def setInputDeleted( self, job ): """set the inputfile to processed""" self.__setInputStatus( job, "Deleted" ) def __setInputStatus( self, job, status ): """set the input file to status""" if self.enabled: result = self.tClient.setFileStatusForTransformation(self.tID, status, [job.inputFile], force = True) if not result['OK']: gLogger.error( "Failed updating status", result['Message'] ) raise RuntimeError( "Failed updating file status" ) def __setTaskStatus( self, job, status ): """update the task in the TransformationDB""" taskID = job.taskID res = self.tClient.setTaskStatus( self.transName, taskID, status ) if not res['OK']: raise RuntimeError( "Failed updating task status: %s" % res['Message'] ) def __updateJobStatus(self, jobID, status, minorstatus=''): """Update the job status.""" if self.enabled: source = 'DataRecoveryAgent' result = self.jobStateClient.setJobStatus(jobID, status, minorstatus, source) else: return S_OK('DisabledMode') if not result['OK']: self.log.error('Failed to update job status', result['Message']) raise RuntimeError('Failed to update job status') return result def __findAllDescendants( self, lfnList ): """finds all descendants of a list of LFNs""" allDescendants = [] result = self.fcClient.getFileDescendents( lfnList, range(1,8) ) if not result['OK']: return allDescendants for dummy_lfn, descendants in result['Value']['Successful'].items(): allDescendants.extend( descendants ) return allDescendants def cleanOutputs( self, jobInfo ): """remove all job outputs""" if len(jobInfo.outputFiles) == 0: return descendants = self.__findAllDescendants( jobInfo.outputFiles ) existingOutputFiles = [ lfn for lfn, status in izip_longest(jobInfo.outputFiles, jobInfo.outputFileStatus) if status=="Exists" ] filesToDelete = existingOutputFiles + descendants if not filesToDelete: return if not self.enabled: self.log.notice( "Would have removed these files: \n +++ %s " % "\n +++ ".join(filesToDelete) ) return self.log.notice( "Remove these files: \n +++ %s " % "\n +++ ".join(filesToDelete) ) errorReasons = defaultdict(list) successfullyRemoved = 0 for lfnList in breakListIntoChunks(filesToDelete, 200): with UserProxy(proxyUserDN=self.authorDN, proxyUserGroup=self.authorGroup) as proxyResult: if not proxyResult['OK']: raise RuntimeError('Failed to get a proxy: %s' % proxyResult['Message']) result = DataManager().removeFile(lfnList) if not result['OK']: self.log.error("Failed to remove LFNs", result['Message']) raise RuntimeError("Failed to remove LFNs: %s" % result['Message']) for lfn, err in result['Value']['Failed'].items(): reason = str(err) errorReasons[reason].append(lfn) successfullyRemoved += len(result['Value']['Successful'].keys()) for reason, lfns in errorReasons.items(): self.log.error("Failed to remove %d files with error: %s" % (len(lfns), reason)) self.log.notice("Successfully removed %d files" % successfullyRemoved) def getJobs( self, statusList=None ): """get done and failed jobs""" done = S_OK([]) failed = S_OK([]) if statusList is None: statusList = [ 'Done', 'Failed' ] if 'Done' in statusList: self.log.notice( "Getting 'Done' Jobs..." ) done = self.__getJobs( ["Done"] ) if 'Failed' in statusList: self.log.notice( "Getting 'Failed' Jobs..." ) failed = self.__getJobs( ["Failed"] ) done = done['Value'] failed = failed['Value'] jobsUnsorted = {} for job in done: jobsUnsorted[int(job)] = JobInfo( job, "Done", self.tID, self.transType ) for job in failed: jobsUnsorted[int(job)] = JobInfo( job, "Failed", self.tID, self.transType ) jobs = OrderedDict( sorted(jobsUnsorted.items(), key=lambda t: t[0]) ) self.log.notice( "Found %d Done Jobs " % len(done) ) self.log.notice( "Found %d Failed Jobs " % len(failed) ) return jobs, len(done), len(failed) def __getJobs( self, status ): """returns list of done jobs""" attrDict = dict( Status=status, JobGroup="%08d" % int(self.tID) ) # if 'Done' in status: # resAppStates = self.jobMon.getApplicationStates() # if not resAppStates['OK']: # raise RuntimeError( "Failed to get application states" ) # appStates = resAppStates['Value'] # appStates.remove( "Job Finished Successfully" ) # attrDict['ApplicationStatus'] = appStates res = self.jobMon.getJobs( attrDict ) if res['OK']: self.log.debug("Found Prod jobs: %s" % res['Value'] ) return res else: self.log.error("Error finding jobs: ", res['Message'] ) raise RuntimeError( "Failed to get jobs" )
def test_JobStateUpdateAndJobMonitoringMultuple(self): """ # Now, let's submit some jobs. Different sites, types, inputs """ wmsClient = WMSClient() jobMonitor = JobMonitoringClient() jobStateUpdate = JobStateUpdateClient() jobIDs = [] lfnss = [['/a/1.txt', '/a/2.txt'], ['/a/1.txt', '/a/3.txt', '/a/4.txt'], []] types = ['User', 'Test'] for lfns in lfnss: for jobType in types: job = helloWorldJob() job.setDestination('DIRAC.Jenkins.ch') job.setInputData(lfns) job.setType(jobType) jobDescription = createFile(job) res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(res['OK']) jobID = res['Value'] jobIDs.append(jobID) res = jobMonitor.getSites() print res self.assertTrue(res['OK']) self.assertTrue(set(res['Value']) <= {'ANY', 'DIRAC.Jenkins.ch'}) res = jobMonitor.getJobTypes() self.assertTrue(res['OK']) self.assertEqual(sorted(res['Value']), sorted(types)) res = jobMonitor.getApplicationStates() self.assertTrue(res['OK']) self.assertEqual(sorted(res['Value']), sorted(['Unknown'])) res = jobMonitor.getOwners() self.assertTrue(res['OK']) res = jobMonitor.getOwnerGroup() self.assertTrue(res['OK']) res = jobMonitor.getProductionIds() self.assertTrue(res['OK']) res = jobMonitor.getJobGroups() self.assertTrue(res['OK']) res = jobMonitor.getStates() self.assertTrue(res['OK']) self.assertTrue(sorted(res['Value']) in [['Received'], sorted(['Received', 'Waiting'])]) res = jobMonitor.getMinorStates() self.assertTrue(res['OK']) self.assertTrue(sorted(res['Value']) in [['Job accepted'], sorted(['Job accepted', 'Job Rescheduled'])]) self.assertTrue(res['OK']) res = jobMonitor.getJobs() self.assertTrue(res['OK']) self.assertTrue(set([str(x) for x in jobIDs]) <= set(res['Value'])) # res = jobMonitor.getCounters(attrList) # self.assertTrue(res['OK']) res = jobMonitor.getCurrentJobCounters() self.assertTrue(res['OK']) try: self.assertTrue( res['Value'].get('Received') + res['Value'].get('Waiting') >= long( len(lfnss) * len(types))) except TypeError: pass res = jobMonitor.getJobsSummary(jobIDs) self.assertTrue(res['OK']) res = jobMonitor.getJobPageSummaryWeb({}, [], 0, 100) self.assertTrue(res['OK']) res = jobStateUpdate.setJobStatusBulk(jobID, {str(datetime.datetime.utcnow()): {'Status': 'Running', 'MinorStatus': 'MinorStatus', 'ApplicationStatus': 'ApplicationStatus', 'Source': 'Unknown'}}) self.assertTrue(res['OK']) res = jobStateUpdate.setJobsParameter({jobID: ['Status', 'Running']}) self.assertTrue(res['OK']) # delete the jobs - this will just set its status to "deleted" wmsClient.deleteJob(jobIDs)
def finalizeRequest(self, requestID, jobID, useCertificates=True): """check request status and perform finalization if necessary update the request status and the corresponding job parameter :param self: self reference :param str requestID: request id :param int jobID: job id """ stateServer = JobStateUpdateClient(useCertificates=useCertificates) # Checking if to update the job status - we should fail here, so it will be re-tried later # Checking the state, first res = self.getRequestStatus(requestID) if not res["OK"]: self.log.error( "finalizeRequest: failed to get request", "request: %s status: %s" % (requestID, res["Message"])) return res if res["Value"] != "Done": return S_ERROR( "The request %s isn't 'Done' but '%s', this should never happen, why are we here?" % (requestID, res["Value"])) # The request is 'Done', let's update the job status. If we fail, we should re-try later monitorServer = JobMonitoringClient(useCertificates=useCertificates) res = monitorServer.getJobSummary(int(jobID)) if not res["OK"]: self.log.error("finalizeRequest: Failed to get job status", "JobID: %d" % jobID) return res elif not res["Value"]: self.log.info( "finalizeRequest: job %d does not exist (anymore): finalizing" % jobID) return S_OK() else: jobStatus = res["Value"]["Status"] jobMinorStatus = res["Value"]["MinorStatus"] jobAppStatus = "" newJobStatus = "" if jobStatus == JobStatus.STALLED: # If job is stalled, find the previous status from the logging info res = monitorServer.getJobLoggingInfo(int(jobID)) if not res["OK"]: self.log.error( "finalizeRequest: Failed to get job logging info", "JobID: %d" % jobID) return res # Check the last status was Stalled and get the one before if len(res["Value"] ) >= 2 and res["Value"][-1][0] == JobStatus.STALLED: jobStatus, jobMinorStatus, jobAppStatus = res["Value"][ -2][:3] newJobStatus = jobStatus # update the job pending request digest in any case since it is modified self.log.info( "finalizeRequest: Updating request digest for job %d" % jobID) digest = self.getDigest(requestID) if digest["OK"]: digest = digest["Value"] self.log.verbose(digest) res = stateServer.setJobParameter(jobID, "PendingRequest", digest) if not res["OK"]: self.log.info( "finalizeRequest: Failed to set job %d parameter: %s" % (jobID, res["Message"])) return res else: self.log.error( "finalizeRequest: Failed to get request digest for %s: %s" % (requestID, digest["Message"])) if jobStatus == JobStatus.COMPLETED: # What to do? Depends on what we have in the minorStatus if jobMinorStatus == JobMinorStatus.PENDING_REQUESTS: newJobStatus = JobStatus.DONE elif jobMinorStatus == JobMinorStatus.APP_ERRORS: newJobStatus = JobStatus.FAILED elif jobMinorStatus == JobMinorStatus.MARKED_FOR_TERMINATION: # If the job has been Killed, set it Killed newJobStatus = JobStatus.KILLED else: self.log.error( "finalizeRequest: Unexpected jobMinorStatus", "for %d (got %s)" % (jobID, jobMinorStatus)) return S_ERROR("Unexpected jobMinorStatus") if newJobStatus: self.log.info( "finalizeRequest: Updating job status", "for %d to '%s/%s'" % (jobID, newJobStatus, JobMinorStatus.REQUESTS_DONE), ) else: self.log.info( "finalizeRequest: Updating job minor status", "for %d to '%s' (current status is %s)" % (jobID, JobMinorStatus.REQUESTS_DONE, jobStatus), ) stateUpdate = stateServer.setJobStatus( jobID, newJobStatus, JobMinorStatus.REQUESTS_DONE, "RMS") if jobAppStatus and stateUpdate["OK"]: stateUpdate = stateServer.setJobApplicationStatus( jobID, jobAppStatus, "RMS") if not stateUpdate["OK"]: self.log.error( "finalizeRequest: Failed to set job status", "JobID: %d, error: %s" % (jobID, stateUpdate["Message"]), ) return stateUpdate return S_OK(newJobStatus)
def test_FullChain(self): """This test will - call all the WMSClient methods that will end up calling all the JobManager service methods - use the JobMonitoring to verify few properties - call the JobCleaningAgent to eliminate job entries from the DBs """ wmsClient = WMSClient() jobMonitor = JobMonitoringClient() jobStateUpdate = JobStateUpdateClient() # create the job job = helloWorldJob() jobDescription = createFile(job) # submit the job res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) self.assertTrue(res["OK"], res.get("Message")) self.assertTrue(isinstance(res["Value"], int), msg="Got %s" % type(res["Value"])) self.assertEqual(res["Value"], res["JobID"], msg="Got %s, expected %s" % (str(res["Value"]), res["JobID"])) jobID = res["JobID"] jobID = res["Value"] # updating the status res = jobStateUpdate.setJobStatus(jobID, JobStatus.RUNNING, "Executing Minchiapp", "source") self.assertTrue(res["OK"], res.get("Message")) # reset the job res = wmsClient.resetJob(jobID) self.assertTrue(res["OK"], res.get("Message")) # reschedule the job res = wmsClient.rescheduleJob(jobID) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobsStatus(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"][jobID]["Status"], JobStatus.RECEIVED, msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobsMinorStatus([jobID]) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"], {jobID: { "MinorStatus": "Job Rescheduled" }}, msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobsApplicationStatus([jobID]) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"], {jobID: { "ApplicationStatus": "Unknown" }}, msg="Got %s" % str(res["Value"])) res = jobMonitor.getJobsStates([jobID]) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual( res["Value"], { jobID: { "Status": JobStatus.RECEIVED, "MinorStatus": "Job Rescheduled", "ApplicationStatus": "Unknown" } }, msg="Got %s" % str(res["Value"]), ) # updating the status again res = jobStateUpdate.setJobStatus(jobID, JobStatus.CHECKING, "checking", "source") self.assertTrue(res["OK"], res.get("Message")) res = jobStateUpdate.setJobStatus(jobID, JobStatus.WAITING, "waiting", "source") self.assertTrue(res["OK"], res.get("Message")) res = jobStateUpdate.setJobStatus(jobID, JobStatus.MATCHED, "matched", "source") self.assertTrue(res["OK"], res.get("Message")) # kill the job res = wmsClient.killJob(jobID) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobsStatus(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"][jobID]["Status"], JobStatus.KILLED, msg="Got %s" % str(res["Value"])) # delete the job - this will just set its status to "deleted" res = wmsClient.deleteJob(jobID) self.assertTrue(res["OK"], res.get("Message")) res = jobMonitor.getJobsStatus(jobID) self.assertTrue(res["OK"], res.get("Message")) self.assertEqual(res["Value"][jobID]["Status"], JobStatus.DELETED, msg="Got %s" % str(res["Value"]))
class TransformationInfo(object): """Hold information about a transformation.""" def __init__(self, transformationID, transInfoDict, enabled, tClient, fcClient, jobMon): """Store clients etc.""" self.log = gLogger.getSubLogger(__name__ + "[%s]" % transformationID) self.enabled = enabled self.tID = transformationID self.transName = transInfoDict['TransformationName'] self.tClient = tClient self.jobMon = jobMon self.fcClient = fcClient self.transType = transInfoDict['Type'] self.authorDN = transInfoDict['AuthorDN'] self.authorGroup = transInfoDict['AuthorGroup'] self.jobStateClient = JobStateUpdateClient() def checkTasksStatus(self): """Check the status for the task of given transformation and taskID""" res = self.tClient.getTransformationFiles( condDict={'TransformationID': self.tID}) if not res['OK']: raise RuntimeError("Failed to get transformation tasks: %s" % res['Message']) tasksDict = defaultdict(list) for task in res['Value']: taskID = task['TaskID'] lfn = task['LFN'] status = task['Status'] fileID = task['FileID'] errorCount = task['ErrorCount'] tasksDict[taskID].append( dict(FileID=fileID, LFN=lfn, Status=status, ErrorCount=errorCount)) return tasksDict def setJobDone(self, job): """ set the taskID to Done""" if not self.enabled: return self.__setTaskStatus(job, 'Done') if job.status != 'Done': self.__updateJobStatus(job.jobID, 'Done', "Job forced to Done") def setJobFailed(self, job): """ set the taskID to Done""" if not self.enabled: return self.__setTaskStatus(job, 'Failed') if job.status != 'Failed': self.__updateJobStatus(job.jobID, "Failed", "Job forced to Failed") def setInputUnused(self, job): """Set the inputfiles to unused""" self.__setInputStatus(job, 'Unused') def setInputMaxReset(self, job): """set the inputfile to MaxReset""" self.__setInputStatus(job, "MaxReset") def setInputProcessed(self, job): """set the inputfile to processed""" self.__setInputStatus(job, "Processed") def setInputDeleted(self, job): """set the inputfile to processed""" self.__setInputStatus(job, "Deleted") def __setInputStatus(self, job, status): """set the input file to status""" if self.enabled: result = self.tClient.setFileStatusForTransformation( self.tID, status, job.inputFiles, force=True) if not result['OK']: gLogger.error("Failed updating status", result['Message']) raise RuntimeError("Failed updating file status") def __setTaskStatus(self, job, status): """update the task in the TransformationDB""" taskID = job.taskID res = self.tClient.setTaskStatus(self.transName, taskID, status) if not res['OK']: raise RuntimeError("Failed updating task status: %s" % res['Message']) def __updateJobStatus(self, jobID, status, minorstatus=''): """Update the job status.""" if self.enabled: source = 'DataRecoveryAgent' result = self.jobStateClient.setJobStatus(jobID, status, minorstatus, source) else: return S_OK('DisabledMode') if not result['OK']: self.log.error('Failed to update job status', result['Message']) raise RuntimeError('Failed to update job status') return result def __findAllDescendants(self, lfnList): """Find all descendants of a list of LFNs""" allDescendants = [] result = self.fcClient.getFileDescendents(lfnList, range(1, 8)) if not result['OK']: return allDescendants for dummy_lfn, descendants in result['Value']['Successful'].items(): allDescendants.extend(descendants) return allDescendants def cleanOutputs(self, jobInfo): """Remove all job outputs for job represented by jobInfo object. Including removal of descendents, if defined. """ if len(jobInfo.outputFiles) == 0: return descendants = self.__findAllDescendants(jobInfo.outputFiles) existingOutputFiles = [ lfn for lfn, status in izip_longest(jobInfo.outputFiles, jobInfo.outputFileStatus) if status == "Exists" ] filesToDelete = existingOutputFiles + descendants if not filesToDelete: return if not self.enabled: self.log.notice("Would have removed these files: \n +++ %s " % "\n +++ ".join(filesToDelete)) return self.log.notice("Remove these files: \n +++ %s " % "\n +++ ".join(filesToDelete)) errorReasons = defaultdict(list) successfullyRemoved = 0 for lfnList in breakListIntoChunks(filesToDelete, 200): with UserProxy(proxyUserDN=self.authorDN, proxyUserGroup=self.authorGroup) as proxyResult: if not proxyResult['OK']: raise RuntimeError('Failed to get a proxy: %s' % proxyResult['Message']) result = DataManager().removeFile(lfnList) if not result['OK']: self.log.error("Failed to remove LFNs", result['Message']) raise RuntimeError("Failed to remove LFNs: %s" % result['Message']) for lfn, err in result['Value']['Failed'].items(): reason = str(err) errorReasons[reason].append(lfn) successfullyRemoved += len( result['Value']['Successful'].keys()) for reason, lfns in errorReasons.items(): self.log.error("Failed to remove %d files with error: %s" % (len(lfns), reason)) self.log.notice("Successfully removed %d files" % successfullyRemoved) def getJobs(self, statusList=None): """Get done and failed jobs. :param list statusList: optional list of status to find jobs :returns: 3-tuple of OrderedDict of JobInfo objects, keyed by jobID; number of Done jobs; number of Failed jobs """ done = S_OK([]) failed = S_OK([]) if statusList is None: statusList = ['Done', 'Failed'] if 'Done' in statusList: self.log.notice("Getting 'Done' Jobs...") done = self.__getJobs(["Done"]) if 'Failed' in statusList: self.log.notice("Getting 'Failed' Jobs...") failed = self.__getJobs(["Failed"]) done = done['Value'] failed = failed['Value'] jobsUnsorted = {} for job in done: jobsUnsorted[int(job)] = JobInfo(job, "Done", self.tID, self.transType) for job in failed: jobsUnsorted[int(job)] = JobInfo(job, "Failed", self.tID, self.transType) jobs = OrderedDict(sorted(jobsUnsorted.items(), key=lambda t: t[0])) self.log.notice("Found %d Done Jobs " % len(done)) self.log.notice("Found %d Failed Jobs " % len(failed)) return jobs, len(done), len(failed) def __getJobs(self, status): """Return list of jobs with given status. :param list status: list of status to find :returns: S_OK with result :raises: RuntimeError when failing to find jobs """ attrDict = dict(Status=status, JobGroup='%08d' % int(self.tID)) res = self.jobMon.getJobs(attrDict) if res['OK']: self.log.debug('Found Trans jobs: %s' % res['Value']) return res else: self.log.error('Error finding jobs: ', res['Message']) raise RuntimeError('Failed to get jobs')
__RCSID__ = "$Id$" from DIRAC.Core.Base import Script Script.parseCommandLine() from DIRAC.WorkloadManagementSystem.Client.WMSClient import WMSClient # sut from DIRAC.WorkloadManagementSystem.Client.JobMonitoringClient import JobMonitoringClient from DIRAC.WorkloadManagementSystem.Client.JobStateUpdateClient import JobStateUpdateClient from DIRAC.tests.Integration.WorkloadManagementSystem.Test_Client_WMS import helloWorldJob, createFile jobMonitoringClient = JobMonitoringClient() jobStateUpdateClient = JobStateUpdateClient() def createJob(): job = helloWorldJob() jobDescription = createFile(job) wmsClient = WMSClient() res = wmsClient.submitJob(job._toJDL(xmlFile=jobDescription)) assert res['OK'], res['Message'] jobID = int(res['Value']) return jobID def updateFlag():