def loadJobsFromList(self, idList): """ _loadJobsFromList_ Load jobs in bulk """ loadAction = self.daoFactory(classname = "Jobs.LoadForErrorHandler") binds = [] for jobID in idList: binds.append({"jobid": jobID}) results = loadAction.execute(jobID = binds) # You have to have a list if type(results) == dict: results = [results] listOfJobs = [] for entry in results: # One job per entry tmpJob = Job(id = entry['id']) tmpJob.update(entry) listOfJobs.append(tmpJob) return listOfJobs
def loadJobsFromListFull(self, idList): """ _loadJobsFromList_ Load jobs in bulk. Include the full metadata. """ binds = [] for jobID in idList: binds.append({"jobid": jobID}) results = self.loadAction.execute(jobID=binds) # You have to have a list if isinstance(results, dict): results = [results] listOfJobs = [] for entry in results: # One job per entry tmpJob = Job(id=entry['id']) tmpJob.update(entry) listOfJobs.append(tmpJob) return listOfJobs
def loadJobsFromList(self, idList): """ _loadJobsFromList_ Load jobs in bulk """ binds = [] for jobID in idList: binds.append({"jobid": jobID}) results = self.idLoad.execute(jobID = binds) # You have to have a list if type(results) == dict: results = [results] listOfJobs = [] for entry in results: # One job per entry tmpJob = Job(id = entry['id']) tmpJob.update(entry) listOfJobs.append(tmpJob) return listOfJobs
def loadJobsFromListFull(self, idList): """ _loadJobsFromList_ Load jobs in bulk. Include the full metadata. """ binds = [] for jobID in idList: binds.append({"jobid": jobID}) results = self.loadAction.execute(jobID = binds) # You have to have a list if type(results) == dict: results = [results] listOfJobs = [] for entry in results: # One job per entry tmpJob = Job(id = entry['id']) tmpJob.update(entry) listOfJobs.append(tmpJob) return listOfJobs
def loadJobsFromList(self, idList): """ _loadJobsFromList_ Load jobs in bulk """ loadAction = self.daoFactory(classname="Jobs.LoadFromID") getTypeAction = self.daoFactory(classname="Jobs.GetType") binds = [] for jobID in idList: binds.append({"jobid": jobID}) results = loadAction.execute(jobID=binds) typeResults = getTypeAction.execute(jobID=idList) subTypes = {} for typeEntry in typeResults: subTypes[typeEntry['id']] = typeEntry['type'] # You have to have a list if isinstance(results, dict): results = [results] listOfJobs = [] for entry in results: # One job per entry tmpJob = Job(id=entry['id']) tmpJob.update(entry) tmpJob['jobType'] = subTypes[entry['id']] listOfJobs.append(tmpJob) return listOfJobs
def killWorkflow(workflowName, jobCouchConfig, bossAirConfig=None): """ _killWorkflow_ Kill a workflow that is already executing inside the agent. This will mark all incomplete jobs as failed and files that belong to all non-cleanup and non-logcollect subscriptions as failed. The name of the JSM couch database and the URL to the database must be passed in as well so the state transitions are logged. """ myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) killFilesAction = daoFactory(classname="Subscriptions.KillWorkflow") killJobsAction = daoFactory(classname="Jobs.KillWorkflow") existingTransaction = False if myThread.transaction.conn: existingTransaction = True else: myThread.transaction.begin() killFilesAction.execute(workflowName=workflowName, conn=myThread.transaction.conn, transaction=True) liveJobs = killJobsAction.execute(workflowName=workflowName, conn=myThread.transaction.conn, transaction=True) changeState = ChangeState(jobCouchConfig) # Deal with any jobs that are running in the batch system # only works if we can start the API if bossAirConfig: bossAir = BossAirAPI(config=bossAirConfig, noSetup=True) killableJobs = [] for liveJob in liveJobs: if liveJob["state"].lower() == 'executing': # Then we need to kill this on the batch system liveWMBSJob = Job(id=liveJob["id"]) liveWMBSJob.update(liveJob) changeState.propagate(liveWMBSJob, "killed", liveJob["state"]) killableJobs.append(liveJob) # Now kill them try: bossAir.kill(jobs=killableJobs) except BossAirException, ex: # Something's gone wrong # Jobs not killed! logging.error( "Error while trying to kill running jobs in workflow!\n") logging.error(str(ex)) trace = getattr(ex, 'traceback', '') logging.error(trace) # But continue; we need to kill the jobs in the master # the batch system will have to take care of itself. pass
def killWorkflow(workflowName, jobCouchConfig, bossAirConfig = None): """ _killWorkflow_ Kill a workflow that is already executing inside the agent. This will mark all incomplete jobs as failed and files that belong to all non-cleanup and non-logcollect subscriptions as failed. The name of the JSM couch database and the URL to the database must be passed in as well so the state transitions are logged. """ myThread = threading.currentThread() daoFactory = DAOFactory(package = "WMCore.WMBS", logger = myThread.logger, dbinterface = myThread.dbi) killFilesAction = daoFactory(classname = "Subscriptions.KillWorkflow") killJobsAction = daoFactory(classname = "Jobs.KillWorkflow") existingTransaction = False if myThread.transaction.conn: existingTransaction = True else: myThread.transaction.begin() killFilesAction.execute(workflowName = workflowName, conn = myThread.transaction.conn, transaction = True) liveJobs = killJobsAction.execute(workflowName = workflowName, conn = myThread.transaction.conn, transaction = True) changeState = ChangeState(jobCouchConfig) # Deal with any jobs that are running in the batch system # only works if we can start the API if bossAirConfig: bossAir = BossAirAPI(config = bossAirConfig, noSetup = True) killableJobs = [] for liveJob in liveJobs: if liveJob["state"].lower() == 'executing': # Then we need to kill this on the batch system liveWMBSJob = Job(id = liveJob["id"]) liveWMBSJob.update(liveJob) changeState.propagate(liveWMBSJob, "killed", liveJob["state"]) killableJobs.append(liveJob) # Now kill them try: bossAir.kill(jobs = killableJobs) except BossAirException, ex: # Something's gone wrong # Jobs not killed! logging.error("Error while trying to kill running jobs in workflow!\n") logging.error(str(ex)) trace = getattr(ex, 'traceback', '') logging.error(trace) # But continue; we need to kill the jobs in the master # the batch system will have to take care of itself. pass
def findFinishedJobs(self): """ _findFinishedJobs_ Will actually, surprisingly, find finished jobs (i.e., jobs either exhausted or successful) """ jobList = [] jobListAction = self.daoFactory(classname="Jobs.GetAllJobs") jobList1 = jobListAction.execute(state="success", limitRows=self.numberOfJobsToArchive) jobList2 = jobListAction.execute(state="exhausted", limitRows=self.numberOfJobsToArchive) jobList3 = jobListAction.execute(state="killed", limitRows=self.numberOfJobsToArchive) jobList.extend(jobList1) jobList.extend(jobList2) jobList.extend(jobList3) if len(jobList) == 0: # Then nothing is ready return [] # Put together a list of job IDs binds = [] for jobID in jobList: binds.append({"jobid": jobID}) results = self.loadAction.execute(jobID=binds) if not isinstance(results, list): results = [results] doneList = [] for entry in results: # One job per entry tmpJob = Job(id=entry['id']) tmpJob.update(entry) doneList.append(tmpJob) return doneList
def findFinishedJobs(self): """ _findFinishedJobs_ Will actually, surprisingly, find finished jobs (i.e., jobs either exhausted or successful) """ jobList = [] jobListAction = self.daoFactory(classname = "Jobs.GetAllJobs") jobList1 = jobListAction.execute(state = "success") jobList2 = jobListAction.execute(state = "exhausted") jobList3 = jobListAction.execute(state = "killed") jobList.extend(jobList1) jobList.extend(jobList2) jobList.extend(jobList3) if len(jobList) == 0: # Then nothing is ready return [] # Put together a list of job IDs binds = [] for jobID in jobList: binds.append({"jobid": jobID}) results = self.loadAction.execute(jobID = binds) if not type(results) == list: results = [results] doneList = [] for entry in results: # One job per entry tmpJob = Job(id = entry['id']) tmpJob.update(entry) doneList.append(tmpJob) return doneList
def loadJobsFromList(self, idList): """ _loadJobsFromList_ Load jobs in bulk """ binds = [] for jobID in idList: binds.append({"jobid": jobID}) results = self.idLoad.execute(jobID=binds) # You have to have a list if isinstance(results, dict): results = [results] listOfJobs = [] for entry in results: # One job per entry tmpJob = Job(id=entry['id']) tmpJob.update(entry) listOfJobs.append(tmpJob) return listOfJobs
# Something's gone wrong # Jobs not killed! logging.error("Error while trying to kill running jobs in workflow!\n") logging.error(str(ex)) trace = getattr(ex, 'traceback', '') logging.error(trace) # But continue; we need to kill the jobs in the master # the batch system will have to take care of itself. pass for liveJob in liveJobs: if liveJob["state"] == "killed": # Then we've killed it already continue liveWMBSJob = Job(id = liveJob["id"]) liveWMBSJob.update(liveJob) changeState.propagate(liveWMBSJob, "killed", liveJob["state"]) if not existingTransaction: myThread.transaction.commit() return def freeSlots(multiplier = 1.0, minusRunning = False, allowedStates = ['Normal'], knownCmsSites = None): """ Get free resources from wmbs. Specify multiplier to apply a ratio to the actual numbers. minusRunning control if running jobs should be counted """ from WMCore.ResourceControl.ResourceControl import ResourceControl rc_sites = ResourceControl().listThresholdsForCreate()
def killWorkflow(workflowName, jobCouchConfig, bossAirConfig=None): """ _killWorkflow_ Kill a workflow that is already executing inside the agent. This will mark all incomplete jobs as failed and files that belong to all non-cleanup and non-logcollect subscriptions as failed. The name of the JSM couch database and the URL to the database must be passed in as well so the state transitions are logged. """ myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) killFilesAction = daoFactory(classname="Subscriptions.KillWorkflow") killJobsAction = daoFactory(classname="Jobs.KillWorkflow") killFilesAction.execute(workflowName=workflowName, conn=myThread.transaction.conn) liveJobs = killJobsAction.execute(workflowName=workflowName, conn=myThread.transaction.conn) changeState = ChangeState(jobCouchConfig) # Deal with any jobs that are running in the batch system # only works if we can start the API if bossAirConfig: bossAir = BossAirAPI(config=bossAirConfig, noSetup=True) killableJobs = [] for liveJob in liveJobs: if liveJob["state"].lower() == 'executing': # Then we need to kill this on the batch system liveWMBSJob = Job(id=liveJob["id"]) liveWMBSJob.update(liveJob) killableJobs.append(liveJob) # Now kill them try: logging.info("Killing %d jobs for workflow: %s", len(killableJobs), workflowName) bossAir.kill(jobs=killableJobs, workflowName=workflowName) except BossAirException as ex: # Something's gone wrong. Jobs not killed! logging.error( "Error while trying to kill running jobs in workflow!\n") logging.error(str(ex)) trace = getattr(ex, 'traceback', '') logging.error(trace) # But continue; we need to kill the jobs in the master # the batch system will have to take care of itself. liveWMBSJobs = defaultdict(list) for liveJob in liveJobs: if liveJob["state"] == "killed": # Then we've killed it already continue liveWMBSJob = Job(id=liveJob["id"]) liveWMBSJob.update(liveJob) liveWMBSJobs[liveJob["state"]].append(liveWMBSJob) for state, jobsByState in liveWMBSJobs.items(): if len(jobsByState) > 100 and state != "executing": # if there are to many jobs skip the couch and dashboard update # TODO: couch and dashboard need to be updated or parallel. changeState.check("killed", state) changeState.persist(jobsByState, "killed", state) else: changeState.propagate(jobsByState, "killed", state) return
def killWorkflow(workflowName, jobCouchConfig, bossAirConfig=None): """ _killWorkflow_ Kill a workflow that is already executing inside the agent. This will mark all incomplete jobs as failed and files that belong to all non-cleanup and non-logcollect subscriptions as failed. The name of the JSM couch database and the URL to the database must be passed in as well so the state transitions are logged. """ myThread = threading.currentThread() daoFactory = DAOFactory(package="WMCore.WMBS", logger=myThread.logger, dbinterface=myThread.dbi) killFilesAction = daoFactory(classname="Subscriptions.KillWorkflow") killJobsAction = daoFactory(classname="Jobs.KillWorkflow") killFilesAction.execute(workflowName=workflowName, conn=myThread.transaction.conn) liveJobs = killJobsAction.execute(workflowName=workflowName, conn=myThread.transaction.conn) changeState = ChangeState(jobCouchConfig) # Deal with any jobs that are running in the batch system # only works if we can start the API if bossAirConfig: bossAir = BossAirAPI(config=bossAirConfig, noSetup=True) killableJobs = [] for liveJob in liveJobs: if liveJob["state"].lower() == 'executing': # Then we need to kill this on the batch system liveWMBSJob = Job(id=liveJob["id"]) liveWMBSJob.update(liveJob) killableJobs.append(liveJob) # Now kill them try: logging.info("Killing %d jobs for workflow: %s", len(killableJobs), workflowName) bossAir.kill(jobs=killableJobs, workflowName=workflowName) except BossAirException as ex: # Something's gone wrong. Jobs not killed! logging.error("Error while trying to kill running jobs in workflow!\n") logging.error(str(ex)) trace = getattr(ex, 'traceback', '') logging.error(trace) # But continue; we need to kill the jobs in the master # the batch system will have to take care of itself. liveWMBSJobs = defaultdict(list) for liveJob in liveJobs: if liveJob["state"] == "killed": # Then we've killed it already continue liveWMBSJob = Job(id=liveJob["id"]) liveWMBSJob.update(liveJob) liveWMBSJobs[liveJob["state"]].append(liveWMBSJob) for state, jobsByState in liveWMBSJobs.items(): if len(jobsByState) > 100 and state != "executing": # if there are to many jobs skip the couch and dashboard update # TODO: couch and dashboard need to be updated or parallel. changeState.check("killed", state) changeState.persist(jobsByState, "killed", state) else: changeState.propagate(jobsByState, "killed", state) return
# Jobs not killed! logging.error( "Error while trying to kill running jobs in workflow!\n") logging.error(str(ex)) trace = getattr(ex, 'traceback', '') logging.error(trace) # But continue; we need to kill the jobs in the master # the batch system will have to take care of itself. pass for liveJob in liveJobs: if liveJob["state"] == "killed": # Then we've killed it already continue liveWMBSJob = Job(id=liveJob["id"]) liveWMBSJob.update(liveJob) changeState.propagate(liveWMBSJob, "killed", liveJob["state"]) if not existingTransaction: myThread.transaction.commit() return def freeSlots(multiplier=1.0, minusRunning=False, allowedStates=['Normal'], knownCmsSites=None): """ Get free resources from wmbs. Specify multiplier to apply a ratio to the actual numbers.