def kill(self, wait_time=None): """ Kills or cancels the supplied job """ dag.kill(self.process) # Could have Wait here and check with Balsam its killed - # but not implemented yet. logger.info("Killing job {}".format(self.name)) self.state = 'USER_KILLED' self.finished = True self.calc_job_timing()
def kill_job(self) -> None: ''' Kill job once its status is in ``UNFINISHED_STATES`` >>> UNFINISHED_STATES = [ 'CREATED', 'AWAITING_PARENTS', 'READY', 'STAGED_IN', 'PREPROCESSED', 'RUNNING', 'RUN_DONE', 'POSTPROCESSED'] ''' if self.job is None: return if self.job.state in UNFINISHED_STATES: kill(self.job)
def kill(args): from balsam import setup setup() from balsam.core import models from balsam.launcher import dag Job = models.BalsamJob job_id = args.id job = Job.objects.filter(job_id__startswith=job_id) if job.count() > 1: raise RuntimeError(f"More than one job matches {job_id}") if job.count() == 0: print(f"No jobs match the given ID {job_id}") job = job.first() if cmd_confirmation(f'Really kill job {job.name} {job.cute_id} ??'): dag.kill(job, recursive=args.recursive) print("Job killed")
def cancel(self): dag.kill(self._job) self._state = 'cancelled'
BalsamJob = dag.BalsamJob #If job already finished will stage out results #pending_sim1_jobs = BalsamJob.objects.filter(name__contains='t3_for_sim_id_1').exclude(state='JOB_FINISHED') #If job already finished will NOT stage out results - once classed as USER_KILLED pending_sim1_jobs = BalsamJob.objects.filter( name__contains='t3_for_sim_id_1') num_pending = pending_sim1_jobs.count( ) #will only kill if already in database #Iterate over the jobs and kill: for sim in pending_sim1_jobs: dag.kill(sim) print("Number of jobs should be killed: ", num_pending) success = poll_until_state(current_job, 'JOB_FINISHED') #OR job killed if success: print("Completed job: %s rank=%d time=%f" % (jobname, myrank, time.time() - start)) else: print( "Job not completed: %s rank=%d time=%f Status" % (jobname, myrank, time.time() - start), current_job.state) end = time.time() print("Done: rank=%d time=%f" % (myrank, end - start))
print("Host job rank is %d Output dir is %s" % (myrank, sim_input_dir)) start = time.time() for sim_id in range(steps): jobname = 'outfile_t2_' + 'for_sim_id_' + str(sim_id) + '_ranks_' + str( myrank) + '.txt' current_job = dag.add_job(name=jobname, workflow="libe_workflow", application="helloworld", application_args=str(sleep_time), num_nodes=1, ranks_per_node=8, stage_out_url="local:" + sim_path, stage_out_files=jobname + ".out") if sim_id == 1: dag.kill(current_job) success = poll_until_state(current_job, 'JOB_FINISHED') # OR job killed if success: print("Completed job: %s rank=%d time=%f" % (jobname, myrank, time.time() - start)) else: print( "Task not completed: %s rank=%d time=%f Status" % (jobname, myrank, time.time() - start), current_job.state) end = time.time() print("Done: rank=%d time=%f" % (myrank, end - start))
def mock_kill(): current_job = dag.current_job dag.kill(current_job, recursive=True)
import sys import balsam.launcher.dag as dag import time start = time.time() while dag.BalsamJob.objects.filter(name="slow_job").count() == 0: time.sleep(2) if time.time() - start > 40: raise RuntimeError("the slow job never started") slow_job = dag.BalsamJob.objects.get(name='slow_job') if 'when-running' in ' '.join(sys.argv): while slow_job.state != 'RUNNING': time.sleep(2) slow_job.refresh_from_db() dag.kill(slow_job)