Esempio n. 1
0
    def kill(self, wait_time=None):
        """ Kills or cancels the supplied job """

        dag.kill(self.process)

        # Could have Wait here and check with Balsam its killed -
        # but not implemented yet.

        logger.info("Killing job {}".format(self.name))
        self.state = 'USER_KILLED'
        self.finished = True
        self.calc_job_timing()
Esempio n. 2
0
    def kill_job(self) -> None:
        ''' Kill job once its status is in ``UNFINISHED_STATES``

        >>> UNFINISHED_STATES = [
                            'CREATED',
                            'AWAITING_PARENTS',
                            'READY',
                            'STAGED_IN',
                            'PREPROCESSED',
                            'RUNNING',
                            'RUN_DONE',
                            'POSTPROCESSED']

        '''
        if self.job is None:
            return
        if self.job.state in UNFINISHED_STATES:
            kill(self.job)
Esempio n. 3
0
def kill(args):
    from balsam import setup
    setup()
    from balsam.core import models
    from balsam.launcher import dag
    Job = models.BalsamJob

    job_id = args.id

    job = Job.objects.filter(job_id__startswith=job_id)
    if job.count() > 1:
        raise RuntimeError(f"More than one job matches {job_id}")
    if job.count() == 0:
        print(f"No jobs match the given ID {job_id}")

    job = job.first()

    if cmd_confirmation(f'Really kill job {job.name} {job.cute_id} ??'):
        dag.kill(job, recursive=args.recursive)
        print("Job killed")
Esempio n. 4
0
 def cancel(self):
     dag.kill(self._job)
     self._state = 'cancelled'
            BalsamJob = dag.BalsamJob

            #If job already finished will stage out results

            #pending_sim1_jobs = BalsamJob.objects.filter(name__contains='t3_for_sim_id_1').exclude(state='JOB_FINISHED')

            #If job already finished will NOT stage out results - once classed as USER_KILLED
            pending_sim1_jobs = BalsamJob.objects.filter(
                name__contains='t3_for_sim_id_1')

            num_pending = pending_sim1_jobs.count(
            )  #will only kill if already in database

            #Iterate over the jobs and kill:
            for sim in pending_sim1_jobs:
                dag.kill(sim)

            print("Number of jobs should be killed: ", num_pending)

    success = poll_until_state(current_job, 'JOB_FINISHED')  #OR job killed
    if success:
        print("Completed job: %s rank=%d  time=%f" %
              (jobname, myrank, time.time() - start))
    else:
        print(
            "Job not completed: %s rank=%d  time=%f Status" %
            (jobname, myrank, time.time() - start), current_job.state)

end = time.time()
print("Done: rank=%d  time=%f" % (myrank, end - start))
Esempio n. 6
0
print("Host job rank is %d Output dir is %s" % (myrank, sim_input_dir))

start = time.time()
for sim_id in range(steps):
    jobname = 'outfile_t2_' + 'for_sim_id_' + str(sim_id) + '_ranks_' + str(
        myrank) + '.txt'

    current_job = dag.add_job(name=jobname,
                              workflow="libe_workflow",
                              application="helloworld",
                              application_args=str(sleep_time),
                              num_nodes=1,
                              ranks_per_node=8,
                              stage_out_url="local:" + sim_path,
                              stage_out_files=jobname + ".out")
    if sim_id == 1:
        dag.kill(current_job)

    success = poll_until_state(current_job, 'JOB_FINISHED')  # OR job killed
    if success:
        print("Completed job: %s rank=%d time=%f" %
              (jobname, myrank, time.time() - start))
    else:
        print(
            "Task not completed: %s rank=%d time=%f Status" %
            (jobname, myrank, time.time() - start), current_job.state)

end = time.time()
print("Done: rank=%d  time=%f" % (myrank, end - start))
Esempio n. 7
0
def mock_kill():
    current_job = dag.current_job
    dag.kill(current_job, recursive=True)
Esempio n. 8
0
import sys
import balsam.launcher.dag as dag
import time

start = time.time()

while dag.BalsamJob.objects.filter(name="slow_job").count() == 0:
    time.sleep(2)
    if time.time() - start > 40:
        raise RuntimeError("the slow job never started")

slow_job = dag.BalsamJob.objects.get(name='slow_job')

if 'when-running' in ' '.join(sys.argv):
    while slow_job.state != 'RUNNING':
        time.sleep(2)
        slow_job.refresh_from_db()
dag.kill(slow_job)