def delete(job): """Delete a job. This method is for deleting a job, it will only delete a single job at a time. This method is the generic function calling point for the scheduler specific delete method (provided by a plugin) which contains the actual code specific to deleting a job for a given scheduler. Required arguments are: job (dictionary) - A single job dictionary, this is often simply passed in as a subset of the main jobs dictionary. """ scheduler = job["scheduler"] try: LOG.info("Deleting the job '%s'", job["jobname"]) getattr(schedulers, scheduler.lower()).delete(job) except AttributeError: raise exceptions.PluginattributeError( "delete method cannot be found in plugin '{0}'" .format(scheduler)) except exceptions.JobdeleteError: LOG.info("Unable to delete job '%s'", job["jobname"]) LOG.info("Deletion successful")
def _checkwaitingjobs(jobs, save): """Check if any jobs marked as "Waiting Submission" can be submitted.""" for job in [a for a in jobs if "lbowconf" not in a]: # Check if we can submit any further jobs. resource = jobs[job]["resource"] if (jobs[job]["laststatus"] == "Waiting Submission" and int(jobs["lbowconf"][resource + "-" + "queue-slots"]) < int(jobs["lbowconf"][resource + "-" + "queue-max"])): # Try and submit this job. try: getattr(schedulers, jobs[job]["scheduler"].lower()).submit(jobs[job]) jobs[job]["laststatus"] = "Queued" LOG.info("Job '%s' submitted with id '%s'", job, jobs[job]["jobid"]) # Increment the queue counter by one (used to count the slots). jobs["lbowconf"][resource + "-" + "queue-slots"] = str(int( jobs["lbowconf"][resource + "-" + "queue-slots"]) + 1) save = True except AttributeError: # Submit method can't be found. raise exceptions.PluginattributeError( "Submit method cannot be found in plugin '{0}'" .format(jobs[job]["scheduler"])) # Some sort of error in submitting the job. except exceptions.JobsubmitError as err: LOG.error(err) jobs[job]["laststatus"] = "Submit Error" # This time if a queue error is raised it might be due to other # constraints such as resource limits on the queue. except exceptions.QueuemaxError: LOG.error("Job is still failing to submit, which could " "indicate problems with resource limits for this " "particular queue - marking this as in error state") jobs[job]["laststatus"] = "Submit Error" return save
def prepare(jobs): """Create job submission scripts. This method will loop through all jobs in the "jobs" data structure and use the parameters for each job to create the submission file. This method acts as a generic interface to scheduler specific plugins which contain the specific code to create the submit file. Required arguments are: jobs (dictionary) - The Longbow jobs data structure, see configuration.py for more information about the format of this structure. """ LOG.info("Creating submit files for job/s.") for item in [a for a in jobs if "lbowconf" not in a]: job = jobs[item] scheduler = job["scheduler"] try: if job["subfile"] == "": LOG.info("Creating submit file for job '%s'", item) getattr(schedulers, scheduler.lower()).prepare(job) LOG.info("Submit file created successfully") else: LOG.info("For job '%s' user has supplied their own job submit " "script - skipping creation.", item) job["upload-include"] = (job["upload-include"] + ", " + job["subfile"]) except AttributeError: raise exceptions.PluginattributeError( "prepare method cannot be found in plugin '{0}'" .format(scheduler)) LOG.info("Submit file/s created.")
def _polljobs(jobs, save): """Poll the status of all jobs. Poll the status of all jobs that are not in error states, queued or finihed. """ for job in [a for a in jobs if "lbowconf" not in a]: if (jobs[job]["laststatus"] != "Finished" and jobs[job]["laststatus"] != "Complete" and jobs[job]["laststatus"] != "Submit Error" and jobs[job]["laststatus"] != "Waiting Submission"): # Get the job status. try: status = getattr( schedulers, jobs[job]["scheduler"].lower()).status( jobs[job]) except AttributeError: raise exceptions.PluginattributeError( "Status method cannot be" "found in plugin '{0}'".format(jobs[job]["scheduler"])) # If the last status is different then change the flag (stops # logfile getting flooded!) if jobs[job]["laststatus"] != status: jobs[job]["laststatus"] = status save = True if status == "Finished": qslots = jobs[job]["resource"] + "-" + "queue-slots" jobs["lbowconf"][qslots] = str(int( jobs["lbowconf"][qslots]) - 1) LOG.info("Status of job '%s' with id '%s' is '%s'", job, jobs[job]["jobid"], status) return save
def submit(jobs): """Submit all jobs. A method containing the generic and boiler plate Longbow code for submitting a job. Required arguments are: jobs (dictionary) - The Longbow jobs data structure, see configuration.py for more information about the format of this structure. """ # Initialise some counters. submitted = 0 queued = 0 error = 0 LOG.info("Submitting job/s.") for item in [a for a in jobs if "lbowconf" not in a]: job = jobs[item] # Set up counters for each resource. jobs["lbowconf"][job["resource"] + "-" + "queue-slots"] = str(0) jobs["lbowconf"][job["resource"] + "-" + "queue-max"] = str(0) for item in [a for a in jobs if "lbowconf" not in a]: job = jobs[item] scheduler = job["scheduler"] # Try and submit. try: getattr(schedulers, scheduler.lower()).submit(job) LOG.info("Job '%s' submitted with id '%s'", item, job["jobid"]) job["laststatus"] = "Queued" # Increment the queue counter by one (used to count the slots). jobs["lbowconf"][job["resource"] + "-" + "queue-slots"] = str(int( jobs["lbowconf"][job["resource"] + "-" + "queue-slots"]) + 1) submitted += 1 # Submit method can't be found. except AttributeError: raise exceptions.PluginattributeError( "submit method cannot be found in plugin '{0}'" .format(scheduler)) # Some sort of error in submitting the job. except exceptions.JobsubmitError as err: LOG.error(err) job["laststatus"] = "Submit Error" error += 1 # Hit maximum slots on resource, Longbow will sub-schedule these. except exceptions.QueuemaxError: for item in [a for a in jobs if "lbowconf" not in a]: if "laststatus" not in jobs[item]: LOG.info("The job '%s' has been held back by Longbow due " "to reaching queue slot limit, it will be " "submitted when a slot opens up.", item) # We will set a flag so that we can inform the user that # it is handled. jobs[item]["laststatus"] = "Waiting Submission" queued += 1 break # We want to find out what the maximum number of slots we have are. if int(jobs["lbowconf"][job["resource"] + "-" + "queue-slots"]) > \ int(jobs["lbowconf"][job["resource"] + "-" + "queue-max"]): jobs["lbowconf"][job["resource"] + "-" + "queue-max"] = \ jobs["lbowconf"][job["resource"] + "-" + "queue-slots"] # Save out the recovery files. if (os.path.isdir(os.path.expanduser('~/.longbow')) and jobs["lbowconf"]["recoveryfile"] != ""): basepath = os.path.expanduser('~/.longbow') recoveryfile = os.path.join(basepath, jobs["lbowconf"]["recoveryfile"]) try: LOG.info("Recovery file will be placed at path '%s'", recoveryfile) configuration.saveini(recoveryfile, jobs) except (OSError, IOError): LOG.warning( "Could not write recovery file, possibly due to permissions " "on the ~/.longbow directory.") LOG.info("%s Submitted, %s Held due to queue limits and %s Failed.", submitted, queued, error)