def read(self, qsubstr): #pylint: disable=too-many-branches, too-many-statements """ Set this Job object from string representing a submit script appropriate for the config.software(). Args: qsubstr (str): A submit script as a string """ config.software().read(self, qsubstr)
def submit(self, add=True, dbpath=None): """ Submit this Job using the appropriate command for prisms_jobs.config.software(). Args: add (bool): Should this job be added to the JobDB database? dbpath (str): Specify a non-default JobDB database Raises: prisms_jobs.JobsError: If error submitting the job. """ self.jobID = config.software().submit(substr=self.sub_string()) if add: db = jobdb.JobDB(dbpath=dbpath) #pylint: disable=invalid-name status = jobdb.job_status_dict(jobid=self.jobID, jobname=self.name, rundir=os.getcwd(), jobstatus="?", auto=self.auto, qsubstr=self.sub_string(), walltime=misc.seconds( self.walltime), nodes=self.nodes, procs=self.nodes * self.ppn) db.add(status) db.close()
def update(self): """Update records using qstat. Any jobs found using qstat that are not in the jobs database are saved in 'self.untracked'. """ # update jobstatus # * this method can be configured/customized via set_update_selection_method config.update_selection_method()(self.curs) # newstatus will contain the updated info newstatus = dict() # any jobs that we don't find with qstat should be marked as 'C' for f in sql_iter(self.curs): #pylint: disable=invalid-name newstatus[f["jobid"]] = "C" # get job_status dict for all jobs found with qstat active_status = config.software().job_status() # reset untracked self.untracked = [] # collect job status for k in active_status: if k in newstatus: newstatus[k] = active_status[k] else: self.curs.execute("SELECT jobid FROM jobs WHERE jobid=?", (k, )) if self.curs.fetchone() is None: self.untracked.append(active_status[k]) # update database with latest job status for key, jobstatus in iteritems(newstatus): if jobstatus == "C": self.curs.execute( "UPDATE jobs SET jobstatus=?, elapsedtime=?, modifytime=? WHERE jobid=?", ("C", None, int(time.time()), key)) #elif jobstatus["qstatstr"] is None: # self.curs.execute( # "UPDATE jobs SET jobstatus=?, elapsedtime=?, modifytime=? WHERE jobid=?", # (jobstatus["jobstatus"], jobstatus["elapsedtime"], int(time.time()), key)) else: self.curs.execute( "UPDATE jobs SET jobstatus=?, elapsedtime=?, starttime=?,\ completiontime=?, qstatstr=?, modifytime=? WHERE jobid=?", (jobstatus["jobstatus"], jobstatus["elapsedtime"], jobstatus["starttime"], jobstatus["completiontime"], jobstatus["qstatstr"], int(time.time()), key)) self.conn.commit() # update taskstatus for non-auto jobs self.curs.execute( "UPDATE jobs SET taskstatus='Check', modifytime=? \ WHERE jobstatus='C' AND taskstatus='Incomplete' AND auto=0", (int(time.time()), )) self.conn.commit()
def error_job(message, jobid=None, dbpath=None): """Mark the job as 'Error: message' if possible Args: message (str): Error message to save in JobDB. dbpath (str, optional): Path to JobDB database. If not given, use default database. jobid (str, optional): ID of job to mark 'Error: message'. If not given, uses current job ID determined from the environment. Raises: JobsError: If job ID could not be determined """ db = JobDB(dbpath) #pylint: disable=invalid-name if jobid is None: jobid = config.software().job_id() if jobid is None: raise prisms_jobs.JobsError(0, "Could not determine jobid") job = db.select_job(jobid) db.error_job(message, job=job) db.close()
def complete_job( jobid=None, dbpath=None, ): """Mark the job as 'Complete' if possible Args: dbpath (str): Path to JobDB database. If not given, use default database. jobid (str): ID of job to mark 'Complete'. If not given, uses current job ID determined from the environment. Raises: JobsError: If job ID could not be determined """ db = JobDB(dbpath) #pylint: disable=invalid-name if jobid is None: jobid = config.software().job_id() if jobid is None: raise prisms_jobs.JobsError(0, "Could not determine jobid") job = db.select_job(jobid) #pylint: disable=unused-variable db.complete_job(jobid) db.close()
def delete_job(self, jobid=None, job=None, series=False): """ Delete job if running, and delete job from the database. Args: jobid (str): jobid of the job to continue job (sqlite3.Row): If this is given, jobid is not necessary and is ignored if given series (bool): If 'series'=True, deletes entire job series """ if job is None: job = self.select_job(jobid) if series: jobseries = self.select_series_id(job["jobid"]) else: jobseries = [job["jobid"]] for j in jobseries: config.software().delete(j) self.curs.execute("DELETE from jobs WHERE jobid=?", (j, )) self.conn.commit()
def abort_job(self, jobid=None, job=None): """ Delete a job and mark job taskstatus as Aborted Args: jobid: jobid of the job to continue job: (sqlite3.Row) If this is given, jobid is not necessary and is ignored if given Raises: EligibilityError if job not eligible to be aborted """ if job is None: job = self.select_job(jobid) eligible, id, msg = self.eligible_to_abort(job) #pylint: disable=invalid-name, redefined-builtin if not eligible: raise EligibilityError(id, msg) config.software().delete(job["jobid"]) self.curs.execute( "UPDATE jobs SET taskstatus='Aborted', modifytime=?\ WHERE jobid=?", (int(time.time()), job["jobid"])) self.conn.commit()
def continue_job(self, jobid=None, job=None): """ Resubmit one job with given jobid. Args: jobid: jobid of the job to continue job: (sqlite3.Row) If this is given, jobid is not necessary and is ignored if given Raises: EligibilityError if job not eligible to be continued """ if job is None: job = self.select_job(jobid) eligible, id, msg = self.eligible_to_continue(job) #pylint: disable=invalid-name, redefined-builtin if not eligible: raise EligibilityError(id, msg) wd = os.getcwd() #pylint: disable=invalid-name os.chdir(job["rundir"]) new_jobid = config.software().submit(substr=job["qsubstr"]) self.curs.execute( "UPDATE jobs SET taskstatus='Continued', modifytime=?,\ continuation_jobid=? WHERE jobid=?", (int(time.time()), new_jobid, job["jobid"])) status = job_status_dict(jobid=new_jobid, jobname=job["jobname"], rundir=os.getcwd(), jobstatus="?", auto=job["auto"], qsubstr=job["qsubstr"], nodes=job["nodes"], procs=job["procs"], walltime=job["walltime"]) self.add(status) os.chdir(wd)
"""Automatically resubmit jobs""" from __future__ import (absolute_import, division, print_function, unicode_literals) from builtins import * import argparse import sys import subprocess from six import iteritems import prisms_jobs from prisms_jobs import config software = config.software() def check_for_other(): jobid = software.job_id(name="taskmaster") if not len(jobid): return tmaster_status = software.job_status(jobid) for j in jobid: if j != software.job_id() and tmaster_status[j]["jobstatus"] != "C": print("A taskmaster is already running. JobID:", j, " Status:", tmaster_status[j]["jobstatus"]) sys.exit() DESC = \ """ Automatically resubmit jobs. 'taskmaster' submits itself with instructions to be run after an amount of time
def sub_string(self): #pylint: disable=too-many-branches """ Output Job as a string suitable for prisms_jobs.config.software() """ return config.software().sub_string(self)