class BossAirAPI(WMConnectionBase): """ _BossAirAPI_ The API layer for the BossAir prototype """ def __init__(self, config, insertStates=False): """ __init__ BossAir should work with the standard config structure of WMAgent """ WMConnectionBase.__init__(self, daoPackage="WMCore.BossAir") myThread = threading.currentThread() self.config = config self.plugins = {} self.states = [] self.jobs = [] self.pluginDir = config.BossAir.pluginDir # This is the default state jobs are created in self.newState = getattr(config.BossAir, 'newState', 'New') # Get any proxy info self.checkProxy = getattr(config.BossAir, 'checkProxy', False) self.cert = getattr(config.BossAir, 'cert', None) self.stateMachine = ChangeState(self.config) # Create a factory to load plugins self.pluginFactory = WMFactory("plugins", self.pluginDir) self.daoFactory = DAOFactory(package="WMCore.BossAir", logger=myThread.logger, dbinterface=myThread.dbi) self.deleteDAO = self.daoFactory(classname="DeleteJobs") self.stateDAO = self.daoFactory(classname="NewState") self.loadByWMBSDAO = self.daoFactory(classname="LoadByWMBSID") self.updateDAO = self.daoFactory(classname="UpdateJobs") self.newJobDAO = self.daoFactory(classname="NewJobs") self.runningJobDAO = self.daoFactory(classname="LoadRunning") self.completeJobDAO = self.daoFactory(classname="LoadComplete") self.loadJobsDAO = self.daoFactory(classname="LoadByStatus") self.completeDAO = self.daoFactory(classname="CompleteJob") self.monitorDAO = self.daoFactory(classname="JobStatusForMonitoring") self.states = None self.loadPlugin(insertStates) return def loadPlugin(self, insertStates): """ _loadPlugin_ Actually load the plugin and init the database """ states = set() for name in self.config.BossAir.pluginNames: self.plugins[name] = self.pluginFactory.loadObject(classname=name, args=self.config) for state in self.plugins[name].states: states.add(state) if self.newState not in states: states.add(self.newState) if insertStates: # Add states only if we're not # doing a secondary instantiation self.addStates(states=states) self.states = states return def addStates(self, states): """ _addStates_ Add States to bl_status table. Meant to be done only once in an agent lifetime. """ existingTransaction = self.beginTransaction() self.stateDAO.execute(states=states, conn=self.getDBConn(), transaction=self.existingTransaction()) self.commitTransaction(existingTransaction) return def createNewJobs(self, wmbsJobs): """ _createNewJobs_ Create new jobs in the BossAir database Accepts WMBS Jobs """ existingTransaction = self.beginTransaction() jobsToCreate = [] # First turn wmbsJobs into runJobs for wmbsJob in wmbsJobs: runJob = RunJob() runJob.buildFromJob(job=wmbsJob) if runJob.get('status') not in self.states: runJob['status'] = self.newState jobsToCreate.append(runJob) # Next insert them into the database self.newJobDAO.execute(jobs=jobsToCreate, conn=self.getDBConn(), transaction=self.existingTransaction()) self.commitTransaction(existingTransaction) return def _listRunJobs(self, active=True): """ _listRunJobs_ List runjobs, either active or complete """ if active: runJobDicts = self.runningJobDAO.execute(conn=self.getDBConn(), transaction=self.existingTransaction()) else: runJobDicts = self.completeJobDAO.execute(conn=self.getDBConn(), transaction=self.existingTransaction()) runJobs = [] for jDict in runJobDicts: rj = RunJob() rj.update(jDict) runJobs.append(rj) return runJobs def _loadByStatus(self, status, complete='1'): """ _loadByStatus_ Load jobs by status """ if status not in self.states: msg = "Asked to load by status %s which is not loaded\n" % (status) msg += "This indicates that the wrong plugins are loaded\n" logging.error(msg) raise BossAirException(msg) loadJobs = self.loadJobsDAO.execute(status=status, complete=complete, conn=self.getDBConn(), transaction=self.existingTransaction()) statusJobs = [] for jDict in loadJobs: rj = RunJob() rj.update(jDict) statusJobs.append(rj) return statusJobs def _loadByID(self, jobs): """ _loadByID_ Load by running Job ID """ loadJobsDAO = self.daoFactory(classname="LoadByID") loadJobs = loadJobsDAO.execute(jobs=jobs, conn=self.getDBConn(), transaction=self.existingTransaction()) loadedJobs = [] for jDict in loadJobs: rj = RunJob() rj.update(jDict) loadedJobs.append(rj) return loadedJobs def _updateJobs(self, jobs): """ _updateJobs_ Update the job entries in the BossAir database """ if len(jobs) < 1: # Nothing to do return existingTransaction = self.beginTransaction() self.updateDAO.execute(jobs=jobs, conn=self.getDBConn(), transaction=self.existingTransaction()) jobsWithLocation = [job for job in jobs if job.get('location') is not None] if jobsWithLocation: self.stateMachine.recordLocationChange(jobsWithLocation) self.commitTransaction(existingTransaction) return def _deleteJobs(self, jobs): """ _deleteJobs_ Delete the job entries in the BossAir database NOTE: only used by unit tests """ if len(jobs) < 1: # Nothing to do return idList = [x['id'] for x in jobs] existingTransaction = self.beginTransaction() self.deleteDAO.execute(jobs=idList, conn=self.getDBConn(), transaction=self.existingTransaction()) self.commitTransaction(existingTransaction) return def loadByWMBS(self, wmbsJobs): """ _loadByWMBS_ Load BossAir info based on wmbs Jobs. """ if len(wmbsJobs) < 1: return [] jobList = self.loadByWMBSDAO.execute(jobs=wmbsJobs, conn=self.getDBConn(), transaction=self.existingTransaction()) loadedJobs = [] for job in jobList: rj = RunJob() rj.buildFromJob(job) loadedJobs.append(rj) if len(loadedJobs) != len(wmbsJobs): logging.error("Could not load all jobs in BossAir for WMBS input!") idList = [x['jobid'] for x in loadedJobs] for job in wmbsJobs: if job['id'] not in idList: logging.error("Failed to retrieve wmbs_id %i and WMBS job info: %s", job['id'], job) return loadedJobs def check(self): """ _check_ Perform checks of critical components, i.e. proxy validation, etc. """ if self.checkProxy: command = 'voms-proxy-info' if self.cert is not None and self.cert != '': command += ' --file ' + self.cert pipe = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output, dummyerr = pipe.communicate() try: output = output.split("timeleft :")[1].strip() except IndexError: raise BossAirException("Missing Proxy", output.strip()) if output == "0:00:00": raise BossAirException("Proxy Expired", output.strip()) return def submit(self, jobs, info=None): """ _submit_ Submit jobs using the plugin Requires both plugin name and workflow user from submitter Deals internally in RunJob objects, but interfaces to the outside with WMBS Job analogs Returns (successes, failures) """ self.check() successJobs = [] failureJobs = [] # TODO: Add plugin and user to input via JobSubmitter # IMPORTANT IMPORTANT IMPORTANT # Put job into RunJob format pluginDict = {} for job in jobs: rj = RunJob() rj.buildFromJob(job=job) if not job.get('location', False): rj['location'] = job.get('custom', {}).get('location', None) plugin = rj['plugin'] pluginDict.setdefault(plugin, []) pluginDict[plugin].append(rj) # Can't add to the cache in submit() # It's NOT the same bossAir instance # self.jobs.append(rj) for plugin in pluginDict.keys(): if plugin not in self.plugins.keys(): # Then we have a non-existant plugin msg = "CRITICAL ERROR: Non-existant plugin!\n" msg += "Given a plugin %s that we don't have access to.\n" % (plugin) msg += "Ignoring the jobs for this plugin for now" logging.error(msg) continue try: pluginInst = self.plugins[plugin] jobsToSubmit = pluginDict.get(plugin, []) logging.debug("About to submit %i jobs to plugin %s", len(jobsToSubmit), plugin) localSuccess, localFailure = pluginInst.submit(jobs=jobsToSubmit, info=info) for job in localSuccess: successJobs.append(job.buildWMBSJob()) for job in localFailure: failureJobs.append(job.buildWMBSJob()) except WMException: raise except Exception as ex: msg = "Unhandled exception while submitting jobs to plugin: %s\n" % plugin msg += str(ex) logging.error(msg) logging.debug("Jobs being submitted: %s\n", jobsToSubmit) logging.debug("Job info: %s\n", info) raise BossAirException(msg) finally: # make sure we release this memory pluginDict.clear() del jobsToSubmit[:] # Create successful jobs in BossAir try: logging.debug("About to create %i new jobs in BossAir", len(successJobs)) self.createNewJobs(wmbsJobs=successJobs) except WMException: raise except Exception as ex: msg = "Unhandled error in creation of %i new jobs.\n" % len(successJobs) msg += str(ex) logging.error(msg) logging.debug("Job: %s", successJobs) raise BossAirException(msg) return successJobs, failureJobs def track(self, runJobIDs=None, wmbsIDs=None): """ _track_ Track all running jobs Load job info from the cache (it should be there since we submitted the job) OPTIONAL: You can submit a list of jobs to check, based either on wmbsIDs or on runjobIDs. This takes a list of integer IDs. """ jobsToChange = [] jobsToComplete = [] jobsToReturn = [] returnList = [] jobsToTrack = {} runningJobs = self._listRunJobs(active=True) if runJobIDs: for job in runningJobs: if job['id'] not in runJobIDs: runningJobs.remove(job) if wmbsIDs: for job in runningJobs: if job['jobid'] not in wmbsIDs: runningJobs.remove(job) if len(runningJobs) < 1: # Then we have no running jobs return returnList logging.info("About to start building running jobs") loadedJobs = self._buildRunningJobsFromRunJobs(runJobs=runningJobs) logging.info("About to look for %i loadedJobs.", len(loadedJobs)) for runningJob in loadedJobs: plugin = runningJob['plugin'] if plugin not in jobsToTrack.keys(): jobsToTrack[plugin] = [] jobsToTrack[plugin].append(runningJob) for plugin in jobsToTrack.keys(): if plugin not in self.plugins.keys(): msg = "Jobs tracking with non-existant plugin %s\n" % (plugin) msg += "They were submitted but can't be tracked?\n" msg += "That's too strange to continue\n" logging.error(msg) raise BossAirException(msg) try: # Then we send them to the plugins # Should give you a lit of jobs to change and jobs to complete pluginInst = self.plugins[plugin] localRunning, localChanges, localCompletes = pluginInst.track(jobs=jobsToTrack[plugin]) jobsToReturn.extend(localRunning) jobsToChange.extend(localChanges) jobsToComplete.extend(localCompletes) logging.info("Executing/changing/completing %i/%i/%i jobs in plugin %s.", len(localRunning), len(localChanges), len(localCompletes), plugin) except WMException: raise except Exception as ex: msg = "Unhandled exception while tracking jobs for plugin %s!\n" % plugin msg += str(ex) logging.error(msg) logging.debug("JobsToTrack: %s", jobsToTrack[plugin]) raise BossAirException(msg) logging.info("About to change %i jobs", len(jobsToChange)) logging.debug("JobsToChange: %s", jobsToChange) logging.info("About to complete %i jobs", len(jobsToComplete)) logging.debug("JobsToComplete: %s", jobsToComplete) self._updateJobs(jobs=jobsToChange) self._complete(jobs=jobsToComplete) # We should have a globalState variable for changed jobs # from the plugin # Return that to the calling function for rj in jobsToReturn: job = rj.buildWMBSJob() job['globalState'] = rj['globalState'] returnList.append(job) return returnList def _complete(self, jobs): """ _complete_ Complete jobs using plugin functions Requires jobs in RunJob format """ if len(jobs) < 1: return # We should be insulated from bad plugins by track() jobsToComplete = {} for job in jobs: if job['plugin'] not in jobsToComplete.keys(): jobsToComplete[job['plugin']] = [] jobsToComplete[job['plugin']].append(job) try: for plugin in jobsToComplete.keys(): self.plugins[plugin].complete(jobsToComplete[plugin]) except WMException: raise except Exception as ex: msg = "Exception while completing jobs!\n" msg += str(ex) logging.error(msg) logging.debug("JobsToComplete: %s", jobsToComplete) raise BossAirException(msg) finally: # If the complete code fails, label the jobs as finished anyway # We want to avoid cyclic repetition of failed jobs # If they don't have a FWJR, the Accountant will catch it. self._completeKill(jobs) return def _completeKill(self, jobs): """ __completeKill_ Mark jobs killed in BossAir as completed Requires jobs in RunJob format """ if len(jobs) < 1: return idsToComplete = [job['id'] for job in jobs] existingTransaction = self.beginTransaction() self.completeDAO.execute(jobs=idsToComplete, conn=self.getDBConn(), transaction=self.existingTransaction()) self.commitTransaction(existingTransaction) return def getComplete(self): """ _getComplete_ The tracker should call this: It's only interested in the jobs that are completed. """ completeJobs = [] completeRunJobs = self._listRunJobs(active=False) for rj in completeRunJobs: job = rj.buildWMBSJob() completeJobs.append(job) return completeJobs def kill(self, jobs, workflowName=None, killMsg=None, errorCode=71300): """ _kill_ Kill jobs using plugin functions: Only active jobs (status = 1) will be killed. If workflowName is given, then kill all its jobs in one shot. An optional killMsg can be sent; this will be written into the job FWJR. The errorCode will be the one specified and if no killMsg is provided then a standard message associated with the exit code will be used. If a previous FWJR exists, this error will be appended to it. """ if not jobs: return jobsToKill = {} # Now get a list of which jobs are in the batch system # only kill jobs present there loadedJobs = self._buildRunningJobs(wmbsJobs=jobs) for runningJob in loadedJobs: plugin = runningJob['plugin'] jobsToKill.setdefault(plugin, []) jobsToKill[plugin].append(runningJob) for plugin in jobsToKill.keys(): if plugin not in self.plugins.keys(): msg = "Jobs tracking with non-existant plugin %s\n" % (plugin) msg += "They were submitted but can't be tracked?\n" msg += "That's too strange to continue\n" logging.error(msg) raise BossAirException(msg) else: # Then we send them to the plugins try: pluginInst = self.plugins[plugin] if workflowName: # jobs are completed regardless whether the kill succeeded or not self._completeKill(jobs=jobsToKill[plugin]) pluginInst.killWorkflowJobs(workflow=workflowName) else: # raise an exception if it fails to kill jobs, such that the same # jobs are retried again in the next cycle pluginInst.kill(jobs=jobsToKill[plugin], raiseEx=True) self._completeKill(jobs=jobsToKill[plugin]) # Register the killed jobs for job in jobsToKill[plugin]: if job.get('cache_dir') is None or job.get('retry_count') is None: continue # Try to save an error report as the jobFWJR if not os.path.isdir(job['cache_dir']): # Then we have a bad cache directory logging.error("Could not write a kill FWJR due to non-existant cache_dir for job %i\n", job['id']) logging.debug("cache_dir: %s\n", job['cache_dir']) continue reportName = os.path.join(job['cache_dir'], 'Report.%i.pkl' % job['retry_count']) errorReport = Report() if os.path.exists(reportName) and os.path.getsize(reportName) > 0: # Then there's already a report there. Add messages errorReport.load(reportName) # Build a better job message if killMsg: reportedMsg = killMsg else: reportedMsg = WM_JOB_ERROR_CODES[errorCode] reportedMsg += '\n Job last known status was: %s' % job.get('globalState', 'Unknown') errorReport.addError("JobKilled", errorCode, "JobKilled", reportedMsg) try: errorReport.save(filename=reportName) except IOError as ioe: logging.warning('Cannot write report %s because of %s', reportName, ioe) except RuntimeError: logging.warning("Plugin failed to remove jobs. It will be retried in the next cycle.") except WMException: raise except Exception as ex: msg = "Unhandled exception while calling kill method for plugin %s\n" % plugin msg += str(ex) logging.error(msg) logging.debug("Interrupted while killing following jobs: %s\n", jobsToKill[plugin]) raise BossAirException(msg) return def update(self, jobs): """ _update_ Overwrite the database with whatever you put into this function. """ runJobs = self._buildRunningJobs(wmbsJobs=jobs) self._updateJobs(jobs=runJobs) return def monitor(self, commonState=True): """ _monitor_ Initiate the call to the monitoring DAO This should not be called by the standard Submitter/Status/Tracker system. It is meant for outside calling. """ results = self.monitorDAO.execute(commonState, conn=self.getDBConn(), transaction=self.existingTransaction()) return results def updateJobInformation(self, workflow, task, **kwargs): """ _updateJobInformation_ Update the information of jobs in a particular workflow and task, the data will be updated according the keyword arguments which will be interpreted by the individual plugins accordingly. """ for plugin in self.plugins.keys(): try: pluginInst = self.plugins[plugin] pluginInst.updateJobInformation(workflow, task, **kwargs) except WMException: raise except Exception as ex: msg = "Unhandled exception while calling update method for plugin %s\n" % plugin msg += str(ex) logging.error(msg) raise BossAirException(msg) return def updateSiteInformation(self, jobs, siteName, excludeSite): """ _updateSiteInformation_ Modify condor classAd for all Idle jobs for a site if it has gone Down, Draining or Aborted. Kill all jobs if the site is the only site for the job. """ jobkill = [] for plugin in self.plugins.keys(): try: pluginInst = self.plugins[plugin] tempjoblist = pluginInst.updateSiteInformation(jobs, siteName, excludeSite) if tempjoblist is not None: jobkill.extend(tempjoblist) except WMException: raise except Exception as ex: msg = "Unhandled exception while calling update method for plugin %s\n" % plugin msg += str(ex) logging.error(msg) raise BossAirException(msg) return jobkill def _buildRunningJobsFromRunJobs(self, runJobs): """ _buildRunningJobsFromRunJobs_ Same as _buildRunningJobs_, but taking runJobs as input """ finalJobs = [] loadedJobs = self._loadByID(jobs=runJobs) for loadJob in loadedJobs: runJob = None for rj in runJobs: if rj['id'] == loadJob['id']: runJob = rj break # We should have two instances of the job for key in runJob.keys(): # Fill one from the other # runJob, being most recent, should be on top if runJob[key] is None: runJob[key] = loadJob.get(key, None) finalJobs.append(runJob) return finalJobs def _buildRunningJobs(self, wmbsJobs): """ _buildRunningJobs_ Build running jobs by loading information from the database and compiling it into a runJob object. This overwrites any information from the database with the info from the WMBS Job """ runJobsLoaded = self.loadByWMBS(wmbsJobs=wmbsJobs) return runJobsLoaded
def testUpdateLocation(self): """ _testUpdateLocation_ Check that we can update the location of a job through the state machine. """ change = ChangeState(self.config, "changestate_t") locationAction = self.daoFactory(classname = "Locations.New") locationAction.execute("site1", seName = "somese.cern.ch") locationAction.execute("site2", seName = "somese2.cern.ch") testWorkflow = Workflow(spec = "spec.xml", owner = "Steve", name = "wf001", task = self.taskName) testWorkflow.create() testFileset = Fileset(name = "TestFileset") testFileset.create() testSubscription = Subscription(fileset = testFileset, workflow = testWorkflow, split_algo = "FileBased") testSubscription.create() testFileA = File(lfn = "SomeLFNA", events = 1024, size = 2048, locations = set(["somese.cern.ch", "somese2.cern.ch"])) testFileB = File(lfn = "SomeLFNB", events = 1025, size = 2049, locations = set(["somese.cern.ch", "somese2.cern.ch"])) testFileA.create() testFileB.create() testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.commit() splitter = SplitterFactory() jobFactory = splitter(package = "WMCore.WMBS", subscription = testSubscription) jobGroup = jobFactory(files_per_job = 1)[0] assert len(jobGroup.jobs) == 2, \ "Error: Splitting should have created two jobs." testJobA = jobGroup.jobs[0] testJobA["user"] = "******" testJobA["group"] = "DMWM" testJobA["taskType"] = "Merge" testJobA["site_cms_name"] = "site1" testJobB = jobGroup.jobs[1] testJobB["user"] = "******" testJobB["group"] = "DMWM" testJobB["taskType"] = "Processing" testJobB["site_cms_name"] = "site2" change.propagate([testJobA, testJobB], "new", "none") change.propagate([testJobA, testJobB], "created", "new") change.propagate([testJobA, testJobB], "executing", "created") testJobADoc = change.jobsdatabase.document(testJobA["couch_record"]) maxKey = max(testJobADoc["states"].keys()) transition = testJobADoc["states"][maxKey] self.assertEqual(transition["location"], "site1") testJobBDoc = change.jobsdatabase.document(testJobB["couch_record"]) maxKey = max(testJobBDoc["states"].keys()) transition = testJobBDoc["states"][maxKey] self.assertEqual(transition["location"], "site2") jobs = [{'jobid' : 1, 'location' : 'site2'}] change.recordLocationChange(jobs) testJobADoc = change.jobsdatabase.document(testJobA["couch_record"]) maxKey = max(testJobADoc["states"].keys()) transition = testJobADoc["states"][maxKey] self.assertEqual(transition["location"], "site2") listJobsDAO = self.daoFactory(classname = "Jobs.GetLocation") jobid = [{'jobid' : 1}, {'jobid' : 2}] jobsLocation = listJobsDAO.execute(jobid) for job in jobsLocation: self.assertEqual(job['site_name'], 'site2') return
class BossAirAPI(WMConnectionBase): """ _BossAirAPI_ The API layer for the BossAir prototype """ def __init__(self, config, insertStates=False): """ __init__ BossAir should work with the standard config structure of WMAgent """ WMConnectionBase.__init__(self, daoPackage="WMCore.BossAir") myThread = threading.currentThread() self.config = config self.plugins = {} self.states = [] self.jobs = [] self.pluginDir = config.BossAir.pluginDir # This is the default state jobs are created in self.newState = getattr(config.BossAir, 'newState', 'New') # Get any proxy info self.checkProxy = getattr(config.BossAir, 'checkProxy', False) self.cert = getattr(config.BossAir, 'cert', None) self.stateMachine = ChangeState(self.config) # Create a factory to load plugins self.pluginFactory = WMFactory("plugins", self.pluginDir) self.daoFactory = DAOFactory(package="WMCore.BossAir", logger=myThread.logger, dbinterface=myThread.dbi) self.deleteDAO = self.daoFactory(classname="DeleteJobs") self.stateDAO = self.daoFactory(classname="NewState") self.loadByWMBSDAO = self.daoFactory(classname="LoadByWMBSID") self.updateDAO = self.daoFactory(classname="UpdateJobs") self.newJobDAO = self.daoFactory(classname="NewJobs") self.runningJobDAO = self.daoFactory(classname="LoadRunning") self.completeJobDAO = self.daoFactory(classname="LoadComplete") self.loadJobsDAO = self.daoFactory(classname="LoadByStatus") self.completeDAO = self.daoFactory(classname="CompleteJob") self.monitorDAO = self.daoFactory(classname="JobStatusForMonitoring") self.states = None self.loadPlugin(insertStates) return def loadPlugin(self, insertStates): """ _loadPlugin_ Actually load the plugin and init the database """ states = set() for name in self.config.BossAir.pluginNames: self.plugins[name] = self.pluginFactory.loadObject( classname=name, args=self.config) for state in self.plugins[name].states: states.add(state) if self.newState not in states: states.add(self.newState) if insertStates: # Add states only if we're not # doing a secondary instantiation self.addStates(states=states) self.states = states return def addStates(self, states): """ _addStates_ Add States to bl_status table. Meant to be done only once in an agent lifetime. """ existingTransaction = self.beginTransaction() self.stateDAO.execute(states=states, conn=self.getDBConn(), transaction=self.existingTransaction()) self.commitTransaction(existingTransaction) return def createNewJobs(self, wmbsJobs): """ _createNewJobs_ Create new jobs in the BossAir database Accepts WMBS Jobs """ existingTransaction = self.beginTransaction() jobsToCreate = [] # First turn wmbsJobs into runJobs for wmbsJob in wmbsJobs: runJob = RunJob() runJob.buildFromJob(job=wmbsJob) if runJob.get('status') not in self.states: runJob['status'] = self.newState jobsToCreate.append(runJob) # Next insert them into the database self.newJobDAO.execute(jobs=jobsToCreate, conn=self.getDBConn(), transaction=self.existingTransaction()) self.commitTransaction(existingTransaction) return def _listRunJobs(self, active=True): """ _listRunJobs_ List runjobs, either active or complete """ if active: runJobDicts = self.runningJobDAO.execute( conn=self.getDBConn(), transaction=self.existingTransaction()) else: runJobDicts = self.completeJobDAO.execute( conn=self.getDBConn(), transaction=self.existingTransaction()) runJobs = [] for jDict in runJobDicts: rj = RunJob() rj.update(jDict) runJobs.append(rj) return runJobs def _loadByStatus(self, status, complete='1'): """ _loadByStatus_ Load jobs by status """ if status not in self.states: msg = "Asked to load by status %s which is not loaded\n" % (status) msg += "This indicates that the wrong plugins are loaded\n" logging.error(msg) raise BossAirException(msg) loadJobs = self.loadJobsDAO.execute( status=status, complete=complete, conn=self.getDBConn(), transaction=self.existingTransaction()) statusJobs = [] for jDict in loadJobs: rj = RunJob() rj.update(jDict) statusJobs.append(rj) return statusJobs def _loadByID(self, jobs): """ _loadByID_ Load by running Job ID """ loadJobsDAO = self.daoFactory(classname="LoadByID") loadJobs = loadJobsDAO.execute(jobs=jobs, conn=self.getDBConn(), transaction=self.existingTransaction()) loadedJobs = [] for jDict in loadJobs: rj = RunJob() rj.update(jDict) loadedJobs.append(rj) return loadedJobs def _updateJobs(self, jobs): """ _updateJobs_ Update the job entries in the BossAir database """ if len(jobs) < 1: # Nothing to do return existingTransaction = self.beginTransaction() self.updateDAO.execute(jobs=jobs, conn=self.getDBConn(), transaction=self.existingTransaction()) jobsWithLocation = [ job for job in jobs if job.get('location') is not None ] if jobsWithLocation: self.stateMachine.recordLocationChange(jobsWithLocation) self.commitTransaction(existingTransaction) return def _deleteJobs(self, jobs): """ _deleteJobs_ Delete the job entries in the BossAir database NOTE: only used by unit tests """ if len(jobs) < 1: # Nothing to do return idList = [x['id'] for x in jobs] existingTransaction = self.beginTransaction() self.deleteDAO.execute(jobs=idList, conn=self.getDBConn(), transaction=self.existingTransaction()) self.commitTransaction(existingTransaction) return def loadByWMBS(self, wmbsJobs): """ _loadByWMBS_ Load BossAir info based on wmbs Jobs. """ if len(wmbsJobs) < 1: return [] jobList = self.loadByWMBSDAO.execute( jobs=wmbsJobs, conn=self.getDBConn(), transaction=self.existingTransaction()) loadedJobs = [] for job in jobList: rj = RunJob() rj.buildFromJob(job) loadedJobs.append(rj) if len(loadedJobs) != len(wmbsJobs): logging.error("Could not load all jobs in BossAir for WMBS input!") idList = [x['jobid'] for x in loadedJobs] for job in wmbsJobs: if job['id'] not in idList: logging.error( "Failed to retrieve wmbs_id %i and WMBS job info: %s", job['id'], job) return loadedJobs def check(self): """ _check_ Perform checks of critical components, i.e. proxy validation, etc. """ if self.checkProxy: command = 'voms-proxy-info' if self.cert is not None and self.cert != '': command += ' --file ' + self.cert pipe = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output, dummyerr = pipe.communicate() try: output = output.split("timeleft :")[1].strip() except IndexError: raise BossAirException("Missing Proxy", output.strip()) if output == "0:00:00": raise BossAirException("Proxy Expired", output.strip()) return def submit(self, jobs, info=None): """ _submit_ Submit jobs using the plugin Requires both plugin name and workflow user from submitter Deals internally in RunJob objects, but interfaces to the outside with WMBS Job analogs Returns (successes, failures) """ self.check() successJobs = [] failureJobs = [] # TODO: Add plugin and user to input via JobSubmitter # IMPORTANT IMPORTANT IMPORTANT # Put job into RunJob format pluginDict = {} for job in jobs: rj = RunJob() rj.buildFromJob(job=job) if not job.get('location', False): rj['location'] = job.get('custom', {}).get('location', None) plugin = rj['plugin'] pluginDict.setdefault(plugin, []) pluginDict[plugin].append(rj) # Can't add to the cache in submit() # It's NOT the same bossAir instance # self.jobs.append(rj) for plugin in pluginDict.keys(): if plugin not in self.plugins.keys(): # Then we have a non-existant plugin msg = "CRITICAL ERROR: Non-existant plugin!\n" msg += "Given a plugin %s that we don't have access to.\n" % ( plugin) msg += "Ignoring the jobs for this plugin for now" logging.error(msg) continue try: pluginInst = self.plugins[plugin] jobsToSubmit = pluginDict.get(plugin, []) logging.debug("About to submit %i jobs to plugin %s", len(jobsToSubmit), plugin) localSuccess, localFailure = pluginInst.submit( jobs=jobsToSubmit, info=info) for job in localSuccess: successJobs.append(job.buildWMBSJob()) for job in localFailure: failureJobs.append(job.buildWMBSJob()) except WMException: raise except Exception as ex: msg = "Unhandled exception while submitting jobs to plugin: %s\n" % plugin msg += str(ex) logging.error(msg) logging.debug("Jobs being submitted: %s\n", jobsToSubmit) logging.debug("Job info: %s\n", info) raise BossAirException(msg) finally: # make sure we release this memory pluginDict.clear() del jobsToSubmit[:] # Create successful jobs in BossAir try: logging.debug("About to create %i new jobs in BossAir", len(successJobs)) self.createNewJobs(wmbsJobs=successJobs) except WMException: raise except Exception as ex: msg = "Unhandled error in creation of %i new jobs.\n" % len( successJobs) msg += str(ex) logging.error(msg) logging.debug("Job: %s", successJobs) raise BossAirException(msg) return successJobs, failureJobs def track(self, runJobIDs=None, wmbsIDs=None): """ _track_ Track all running jobs Load job info from the cache (it should be there since we submitted the job) OPTIONAL: You can submit a list of jobs to check, based either on wmbsIDs or on runjobIDs. This takes a list of integer IDs. """ jobsToChange = [] jobsToComplete = [] jobsToReturn = [] returnList = [] jobsToTrack = {} runningJobs = self._listRunJobs(active=True) if runJobIDs: for job in runningJobs: if job['id'] not in runJobIDs: runningJobs.remove(job) if wmbsIDs: for job in runningJobs: if job['jobid'] not in wmbsIDs: runningJobs.remove(job) if len(runningJobs) < 1: # Then we have no running jobs return returnList logging.info("About to start building running jobs") loadedJobs = self._buildRunningJobsFromRunJobs(runJobs=runningJobs) logging.info("About to look for %i loadedJobs.", len(loadedJobs)) for runningJob in loadedJobs: plugin = runningJob['plugin'] if plugin not in jobsToTrack.keys(): jobsToTrack[plugin] = [] jobsToTrack[plugin].append(runningJob) for plugin in jobsToTrack.keys(): if plugin not in self.plugins.keys(): msg = "Jobs tracking with non-existant plugin %s\n" % (plugin) msg += "They were submitted but can't be tracked?\n" msg += "That's too strange to continue\n" logging.error(msg) raise BossAirException(msg) try: # Then we send them to the plugins # Should give you a lit of jobs to change and jobs to complete pluginInst = self.plugins[plugin] localRunning, localChanges, localCompletes = pluginInst.track( jobs=jobsToTrack[plugin]) jobsToReturn.extend(localRunning) jobsToChange.extend(localChanges) jobsToComplete.extend(localCompletes) logging.info( "Executing/changing/completing %i/%i/%i jobs in plugin %s.", len(localRunning), len(localChanges), len(localCompletes), plugin) except WMException: raise except Exception as ex: msg = "Unhandled exception while tracking jobs for plugin %s!\n" % plugin msg += str(ex) logging.error(msg) logging.debug("JobsToTrack: %s", jobsToTrack[plugin]) raise BossAirException(msg) logging.info("About to change %i jobs", len(jobsToChange)) logging.debug("JobsToChange: %s", jobsToChange) logging.info("About to complete %i jobs", len(jobsToComplete)) logging.debug("JobsToComplete: %s", jobsToComplete) self._updateJobs(jobs=jobsToChange) self._complete(jobs=jobsToComplete) # We should have a globalState variable for changed jobs # from the plugin # Return that to the calling function for rj in jobsToReturn: job = rj.buildWMBSJob() job['globalState'] = rj['globalState'] returnList.append(job) return returnList def _complete(self, jobs): """ _complete_ Complete jobs using plugin functions Requires jobs in RunJob format """ if len(jobs) < 1: return # We should be insulated from bad plugins by track() jobsToComplete = {} for job in jobs: if job['plugin'] not in jobsToComplete.keys(): jobsToComplete[job['plugin']] = [] jobsToComplete[job['plugin']].append(job) try: for plugin in jobsToComplete.keys(): self.plugins[plugin].complete(jobsToComplete[plugin]) except WMException: raise except Exception as ex: msg = "Exception while completing jobs!\n" msg += str(ex) logging.error(msg) logging.debug("JobsToComplete: %s", jobsToComplete) raise BossAirException(msg) finally: # If the complete code fails, label the jobs as finished anyway # We want to avoid cyclic repetition of failed jobs # If they don't have a FWJR, the Accountant will catch it. self._completeKill(jobs) return def _completeKill(self, jobs): """ __completeKill_ Mark jobs killed in BossAir as completed Requires jobs in RunJob format """ if len(jobs) < 1: return idsToComplete = [job['id'] for job in jobs] existingTransaction = self.beginTransaction() self.completeDAO.execute(jobs=idsToComplete, conn=self.getDBConn(), transaction=self.existingTransaction()) self.commitTransaction(existingTransaction) return def getComplete(self): """ _getComplete_ The tracker should call this: It's only interested in the jobs that are completed. """ completeJobs = [] completeRunJobs = self._listRunJobs(active=False) for rj in completeRunJobs: job = rj.buildWMBSJob() completeJobs.append(job) return completeJobs def kill(self, jobs, workflowName=None, killMsg=None, errorCode=71300): """ _kill_ Kill jobs using plugin functions: Only active jobs (status = 1) will be killed. If workflowName is given, then kill all its jobs in one shot. An optional killMsg can be sent; this will be written into the job FWJR. The errorCode will be the one specified and if no killMsg is provided then a standard message associated with the exit code will be used. If a previous FWJR exists, this error will be appended to it. """ if not jobs: return jobsToKill = {} # Now get a list of which jobs are in the batch system # only kill jobs present there loadedJobs = self._buildRunningJobs(wmbsJobs=jobs) for runningJob in loadedJobs: plugin = runningJob['plugin'] jobsToKill.setdefault(plugin, []) jobsToKill[plugin].append(runningJob) for plugin in jobsToKill.keys(): if plugin not in self.plugins.keys(): msg = "Jobs tracking with non-existant plugin %s\n" % (plugin) msg += "They were submitted but can't be tracked?\n" msg += "That's too strange to continue\n" logging.error(msg) raise BossAirException(msg) else: # Then we send them to the plugins try: pluginInst = self.plugins[plugin] if workflowName: pluginInst.killWorkflowJobs(workflow=workflowName) else: pluginInst.kill(jobs=jobsToKill[plugin]) # Register the killed jobs for job in jobsToKill[plugin]: if job.get('cache_dir') is None or job.get( 'retry_count') is None: continue # Try to save an error report as the jobFWJR if not os.path.isdir(job['cache_dir']): # Then we have a bad cache directory logging.error( "Could not write a kill FWJR due to non-existant cache_dir for job %i\n", job['id']) logging.debug("cache_dir: %s\n", job['cache_dir']) continue reportName = os.path.join( job['cache_dir'], 'Report.%i.pkl' % job['retry_count']) errorReport = Report() if os.path.exists(reportName) and os.path.getsize( reportName) > 0: # Then there's already a report there. Add messages errorReport.load(reportName) # Build a better job message if killMsg: reportedMsg = killMsg else: reportedMsg = WM_JOB_ERROR_CODES[errorCode] reportedMsg += '\n Job last known status was: %s' % job.get( 'globalState', 'Unknown') errorReport.addError("JobKilled", errorCode, "JobKilled", reportedMsg) try: errorReport.save(filename=reportName) except IOError as ioe: logging.warning( 'Cannot write report %s because of %s', reportName, ioe) except WMException: raise except Exception as ex: msg = "Unhandled exception while calling kill method for plugin %s\n" % plugin msg += str(ex) logging.error(msg) logging.debug( "Interrupted while killing following jobs: %s\n", jobsToKill[plugin]) raise BossAirException(msg) finally: # Even if kill fails, complete the jobs self._completeKill(jobs=jobsToKill[plugin]) return def update(self, jobs): """ _update_ Overwrite the database with whatever you put into this function. """ runJobs = self._buildRunningJobs(wmbsJobs=jobs) self._updateJobs(jobs=runJobs) return def monitor(self, commonState=True): """ _monitor_ Initiate the call to the monitoring DAO This should not be called by the standard Submitter/Status/Tracker system. It is meant for outside calling. """ results = self.monitorDAO.execute( commonState, conn=self.getDBConn(), transaction=self.existingTransaction()) return results def updateJobInformation(self, workflow, task, **kwargs): """ _updateJobInformation_ Update the information of jobs in a particular workflow and task, the data will be updated according the keyword arguments which will be interpreted by the individual plugins accordingly. """ for plugin in self.plugins.keys(): try: pluginInst = self.plugins[plugin] pluginInst.updateJobInformation(workflow, task, **kwargs) except WMException: raise except Exception as ex: msg = "Unhandled exception while calling update method for plugin %s\n" % plugin msg += str(ex) logging.error(msg) raise BossAirException(msg) return def updateSiteInformation(self, jobs, siteName, excludeSite): """ _updateSiteInformation_ Modify condor classAd for all Idle jobs for a site if it has gone Down, Draining or Aborted. Kill all jobs if the site is the only site for the job. """ jobkill = [] for plugin in self.plugins.keys(): try: pluginInst = self.plugins[plugin] tempjoblist = pluginInst.updateSiteInformation( jobs, siteName, excludeSite) if tempjoblist is not None: jobkill.extend(tempjoblist) except WMException: raise except Exception as ex: msg = "Unhandled exception while calling update method for plugin %s\n" % plugin msg += str(ex) logging.error(msg) raise BossAirException(msg) return jobkill def _buildRunningJobsFromRunJobs(self, runJobs): """ _buildRunningJobsFromRunJobs_ Same as _buildRunningJobs_, but taking runJobs as input """ finalJobs = [] loadedJobs = self._loadByID(jobs=runJobs) for loadJob in loadedJobs: runJob = None for rj in runJobs: if rj['id'] == loadJob['id']: runJob = rj break # We should have two instances of the job for key in runJob.keys(): # Fill one from the other # runJob, being most recent, should be on top if runJob[key] is None: runJob[key] = loadJob.get(key, None) finalJobs.append(runJob) return finalJobs def _buildRunningJobs(self, wmbsJobs): """ _buildRunningJobs_ Build running jobs by loading information from the database and compiling it into a runJob object. This overwrites any information from the database with the info from the WMBS Job """ runJobsLoaded = self.loadByWMBS(wmbsJobs=wmbsJobs) return runJobsLoaded
class BossAirAPI(WMConnectionBase): """ _BossAirAPI_ The API layer for the BossAir prototype """ def __init__(self, config, noSetup = False): """ __init__ BossAir should work with the standard config structure of WMAgent """ WMConnectionBase.__init__(self, daoPackage = "WMCore.BossAir") myThread = threading.currentThread() self.config = config self.plugins = {} self.states = [] self.jobs = [] self.pluginDir = config.BossAir.pluginDir # This is the default state jobs are created in self.newState = getattr(config.BossAir, 'newState', 'New') # Get any proxy info self.checkProxy = getattr(config.BossAir, 'checkProxy', False) self.cert = getattr(config.BossAir, 'cert', None) self.stateMachine = ChangeState(self.config) # Create a factory to load plugins self.pluginFactory = WMFactory("plugins", self.pluginDir) self.daoFactory = DAOFactory(package = "WMCore.BossAir", logger = myThread.logger, dbinterface = myThread.dbi) self.deleteDAO = self.daoFactory(classname = "DeleteJobs") self.stateDAO = self.daoFactory(classname = "NewState") self.loadByWMBSDAO = self.daoFactory(classname = "LoadByWMBSID") self.updateDAO = self.daoFactory(classname = "UpdateJobs") self.newJobDAO = self.daoFactory(classname = "NewJobs") self.runningJobDAO = self.daoFactory(classname = "LoadRunning") self.completeJobDAO = self.daoFactory(classname = "LoadComplete") self.loadJobsDAO = self.daoFactory(classname = "LoadByStatus") self.completeDAO = self.daoFactory(classname = "CompleteJob") self.monitorDAO = self.daoFactory(classname = "JobStatusForMonitoring") self.loadPlugin(noSetup) return def loadPlugin(self, noSetup = False): """ _loadPlugin_ Actually load the plugin and init the database """ states = set() for name in self.config.BossAir.pluginNames: self.plugins[name] = self.pluginFactory.loadObject(classname = name, args = self.config) for state in self.plugins[name].states: states.add(state) states = list(states) if not self.newState in states: states.append(self.newState) if not noSetup: # Add states only if we're not # doing a secondary instantiation self.addStates(states = states) self.states = states return def addStates(self, states): """ _addStates_ Add States to bl_status table """ existingTransaction = self.beginTransaction() self.stateDAO.execute(states = states, conn = self.getDBConn(), transaction = self.existingTransaction()) self.commitTransaction(existingTransaction) return def createNewJobs(self, wmbsJobs): """ _createNewJobs_ Create new jobs in the BossAir database Accepts WMBS Jobs """ existingTransaction = self.beginTransaction() jobsToCreate = [] # First turn wmbsJobs into runJobs for wmbsJob in wmbsJobs: runJob = RunJob() runJob.buildFromJob(job = wmbsJob) if not runJob.get('status', None): runJob['status'] = self.newState jobsToCreate.append(runJob) # Next insert them into the database self.newJobDAO.execute(jobs = jobsToCreate, conn = self.getDBConn(), transaction = self.existingTransaction()) self.commitTransaction(existingTransaction) return def _listRunJobs(self, active = True): """ _listRunJobs_ List runjobs, either active or complete """ existingTransaction = self.beginTransaction() if active: runJobDicts = self.runningJobDAO.execute(conn = self.getDBConn(), transaction = self.existingTransaction()) else: runJobDicts = self.completeJobDAO.execute(conn = self.getDBConn(), transaction = self.existingTransaction()) runJobs = [] for jDict in runJobDicts: rj = RunJob() rj.update(jDict) runJobs.append(rj) self.commitTransaction(existingTransaction) return runJobs def _loadByStatus(self, status, complete = '1'): """ _loadByStatus_ Load jobs by status """ if status not in self.states: msg = "Asked to load by status %s which is not loaded\n" % (status) msg += "This indicates that the wrong plugins are loaded\n" logging.error(msg) raise BossAirException(msg) existingTransaction = self.beginTransaction() loadJobs = self.loadJobsDAO.execute(status = status, complete = complete, conn = self.getDBConn(), transaction = self.existingTransaction()) statusJobs = [] for jDict in loadJobs: rj = RunJob() rj.update(jDict) statusJobs.append(rj) self.commitTransaction(existingTransaction) return statusJobs def _loadByID(self, jobs): """ _loadByID_ Load by running Job ID """ existingTransaction = self.beginTransaction() loadJobsDAO = self.daoFactory(classname = "LoadByID") loadJobs = loadJobsDAO.execute(jobs = jobs, conn = self.getDBConn(), transaction = self.existingTransaction()) loadedJobs = [] for jDict in loadJobs: rj = RunJob() rj.update(jDict) loadedJobs.append(rj) self.commitTransaction(existingTransaction) return loadedJobs # FIXME : internal function that is unused => remove it ? def _completeJobs(self, jobs): """ _completeJobs_ Complete jobs in the database Expects runJob input """ if len(jobs) < 1: # Nothing to do return idList = [x['id'] for x in jobs] existingTransaction = self.beginTransaction() completeDAO = self.daoFactory(classname = "CompleteJob") completeDAO.execute(jobs = idList, conn = self.getDBConn(), transaction = self.existingTransaction()) self.commitTransaction(existingTransaction) return def _updateJobs(self, jobs): """ _updateJobs_ Update the job entries in the BossAir database """ if len(jobs) < 1: # Nothing to do return existingTransaction = self.beginTransaction() self.updateDAO.execute(jobs = jobs, conn = self.getDBConn(), transaction = self.existingTransaction()) jobsWithLocation = filter(lambda x : x.get('location') is not None, jobs) if jobsWithLocation: self.stateMachine.recordLocationChange(jobsWithLocation) self.commitTransaction(existingTransaction) return def _deleteJobs(self, jobs): """ _deleteJobs_ Delete the job entries in the BossAir database """ if len(jobs) < 1: # Nothing to do return idList = [x['id'] for x in jobs] existingTransaction = self.beginTransaction() self.deleteDAO.execute(jobs = idList, conn = self.getDBConn(), transaction = self.existingTransaction()) self.commitTransaction(existingTransaction) return def loadByWMBS(self, wmbsJobs): """ _loadByWMBS_ Load BossAir info based on wmbs Jobs. """ if len(wmbsJobs) < 1: return [] existingTransaction = self.beginTransaction() jobList = self.loadByWMBSDAO.execute(jobs = wmbsJobs, conn = self.getDBConn(), transaction = self.existingTransaction()) loadedJobs = [] for job in jobList: rj = RunJob() rj.update(job) loadedJobs.append(rj) self.commitTransaction(existingTransaction) if not len(loadedJobs) == len(wmbsJobs): logging.error("Mismatch in WMBS load: Some requested jobs not found!") idList = [x['jobid'] for x in loadedJobs] for job in wmbsJobs: if not job['id'] in idList: logging.error("Could not retrieve job with WMBS ID %i from BossAir database" % (job['id'])) return loadedJobs def check(self): """ _check_ Perform checks of critical components, i.e. proxy validation, etc. """ if self.checkProxy: command = 'voms-proxy-info' if self.cert is not None and self.cert != '' : command += ' --file ' + self.cert pipe = subprocess.Popen(command, stdout = subprocess.PIPE, stderr = subprocess.PIPE) output, err = pipe.communicate() try: output = output.split("timeleft :")[1].strip() except IndexError: raise BossAirException("Missing Proxy", output.strip()) if output == "0:00:00": raise BossAirException("Proxy Expired", output.strip()) return def submit(self, jobs, info = None): """ _submit_ Submit jobs using the plugin Requires both plugin name and workflow user from submitter Deals internally in RunJob objects, but interfaces to the outside with WMBS Job analogs Returns (successes, failures) """ self.check() successJobs = [] failureJobs = [] #TODO: Add plugin and user to input via JobSubmitter # IMPORTANT IMPORTANT IMPORTANT # Put job into RunJob format runJobs = [] for job in jobs: rj = RunJob() rj.buildFromJob(job = job) if not job.get('location', False): rj['location'] = job.get('custom', {}).get('location', None) runJobs.append(rj) # Can't add to the cache in submit() # It's NOT the same bossAir instance #self.jobs.append(rj) # Now figure out which plugin we need pluginDict = {} for job in runJobs: plugin = job['plugin'] if not plugin in pluginDict.keys(): pluginDict[plugin] = [] pluginDict[plugin].append(job) for plugin in pluginDict.keys(): if not plugin in self.plugins.keys(): # Then we have a non-existant plugin msg = "CRITICAL ERROR: Non-existant plugin!\n" msg += "Given a plugin %s that we don't have access to.\n" % (plugin) msg += "Ignoring the jobs for this plugin for now" logging.error(msg) continue try: pluginInst = self.plugins[plugin] jobsToSubmit = pluginDict.get(plugin, []) logging.debug("About to submit %i jobs to plugin %s" % (len(jobsToSubmit), plugin)) localSuccess, localFailure = pluginInst.submit(jobs = jobsToSubmit, info = info) for job in localSuccess: successJobs.append(job.buildWMBSJob()) for job in localFailure: failureJobs.append(job.buildWMBSJob()) except WMException: raise except Exception, ex: msg = "Unhandled exception while submitting jobs to plugin: %s\n" % plugin msg += str(ex) logging.error(msg) logging.debug("Jobs being submitted: %s\n" % (jobsToSubmit)) logging.debug("Job info: %s\n" % (info)) raise BossAirException(msg) # Create successful jobs in BossAir try: logging.debug("About to create %i new jobs in BossAir" % len(successJobs)) self.createNewJobs(wmbsJobs = successJobs) except WMException: raise except Exception, ex: msg = "Unhandled error in creation of %i new jobs.\n" % len(successJobs) msg += str(ex) logging.error(msg) logging.debug("Job: %s" % successJobs) raise BossAirException(msg)
def testUpdateLocation(self): """ _testUpdateLocation_ Check that we can update the location of a job through the state machine. """ change = ChangeState(self.config, "changestate_t") locationAction = self.daoFactory(classname="Locations.New") locationAction.execute("site1", pnn="T2_CH_CERN") locationAction.execute("site2", pnn="T1_US_FNAL_Disk") testWorkflow = Workflow(spec=self.specUrl, owner="Steve", name="wf001", task=self.taskName) testWorkflow.create() testFileset = Fileset(name="TestFileset") testFileset.create() testSubscription = Subscription(fileset=testFileset, workflow=testWorkflow, split_algo="FileBased") testSubscription.create() testFileA = File(lfn="SomeLFNA", events=1024, size=2048, locations=set(["T2_CH_CERN", "T1_US_FNAL_Disk"])) testFileB = File(lfn="SomeLFNB", events=1025, size=2049, locations=set(["T2_CH_CERN", "T1_US_FNAL_Disk"])) testFileA.create() testFileB.create() testFileset.addFile(testFileA) testFileset.addFile(testFileB) testFileset.commit() splitter = SplitterFactory() jobFactory = splitter(package="WMCore.WMBS", subscription=testSubscription) jobGroup = jobFactory(files_per_job=1)[0] assert len(jobGroup.jobs) == 2, \ "Error: Splitting should have created two jobs." testJobA = jobGroup.jobs[0] testJobA["user"] = "******" testJobA["group"] = "DMWM" testJobA["taskType"] = "Merge" testJobA["site_cms_name"] = "site1" testJobB = jobGroup.jobs[1] testJobB["user"] = "******" testJobB["group"] = "DMWM" testJobB["taskType"] = "Processing" testJobB["site_cms_name"] = "site2" change.propagate([testJobA, testJobB], "new", "none") change.propagate([testJobA, testJobB], "created", "new") change.propagate([testJobA, testJobB], "executing", "created") testJobADoc = change.jobsdatabase.document(testJobA["couch_record"]) maxKey = max(testJobADoc["states"].keys()) transition = testJobADoc["states"][maxKey] self.assertEqual(transition["location"], "site1") testJobBDoc = change.jobsdatabase.document(testJobB["couch_record"]) maxKey = max(testJobBDoc["states"].keys()) transition = testJobBDoc["states"][maxKey] self.assertEqual(transition["location"], "site2") jobs = [{'jobid' : 1, 'location' : 'site2'}] change.recordLocationChange(jobs) testJobADoc = change.jobsdatabase.document(testJobA["couch_record"]) maxKey = max(testJobADoc["states"].keys()) transition = testJobADoc["states"][maxKey] self.assertEqual(transition["location"], "site2") listJobsDAO = self.daoFactory(classname="Jobs.GetLocation") jobid = [{'jobid' : 1}, {'jobid' : 2}] jobsLocation = listJobsDAO.execute(jobid) for job in jobsLocation: self.assertEqual(job['site_name'], 'site2') return