def job_queue_events_for_sleep_job(test_dir, default_condor): sub_description = """ executable = /bin/sleep arguments = 10 queue """ submit_file = write_file(test_dir / "job.sub", sub_description) submit_cmd = default_condor.run_command(["condor_submit", submit_file]) clusterid, num_procs = parse_submit_result(submit_cmd) jobid = JobID(clusterid, 0) default_condor.job_queue.wait_for_events( { jobid: [ ( # when the job starts running, hold it SetJobStatus(JobStatus.RUNNING), lambda jobid, event: default_condor.run_command( ["condor_hold", jobid]), ), ( # once the job is held, release it SetJobStatus(JobStatus.HELD), lambda jobid, event: default_condor.run_command( ["condor_release", jobid]), ), SetJobStatus(JobStatus.COMPLETED), ] }, timeout=60, ) return default_condor.job_queue.by_jobid[jobid]
def job_queue_events_for_sleep_job(default_condor, path_to_sleep): handle = default_condor.submit( description={ "executable": path_to_sleep, "arguments": "10", }, count=1, ) jobid = handle.job_ids[0] default_condor.job_queue.wait_for_events( { jobid: [ ( # when the job starts running, hold it SetJobStatus(JobStatus.RUNNING), lambda jobid, event: default_condor.run_command( ["condor_hold", jobid]), ), ( # once the job is held, release it SetJobStatus(JobStatus.HELD), lambda jobid, event: default_condor.run_command( ["condor_release", jobid]), ), SetJobStatus(JobStatus.COMPLETED), ] }, timeout=120, ) return default_condor.job_queue.by_jobid[jobid]
def handle(condor, concurrency_limit): handle = condor.submit( description={ "executable": "/bin/sleep", "arguments": "5", "request_memory": "100MB", "request_disk": "10MB", "concurrency_limits": concurrency_limit["submit-value"], }, count=(concurrency_limit["max-running"] + 1) * 2, ) condor.job_queue.wait_for_events( { jobid: [ ( SetJobStatus(JobStatus.RUNNING), lambda j, e: condor.run_command(["condor_q"], echo=True), ), SetJobStatus(JobStatus.COMPLETED), ] for jobid in handle.job_ids }, timeout=60, ) yield handle handle.remove()
def all_jobs_ran(condor, concurrency_limit, path_to_sleep): handle = condor.submit( description={ "executable": path_to_sleep, "arguments": "5", "request_memory": "100MB", "request_disk": "10MB", "concurrency_limits": concurrency_limit["submit-value"], }, count=(concurrency_limit["max-running"] + 1) * 2, ) condor.job_queue.wait_for_events( { jobid: [ ( SetJobStatus(JobStatus.RUNNING), lambda j, e: condor.run_command(["condor_q"], echo=True), ), SetJobStatus(JobStatus.COMPLETED), ] for jobid in handle.job_ids }, # On my unloaded machine, it takes ~48 seconds for the longest test. timeout=180, ) yield handle handle.remove()
def num_jobs_running_history(condor, handle, num_resources): return track_quantity( condor.job_queue.filter(lambda j, e: j in handle.job_ids), increment_condition=lambda id_event: id_event[-1] == SetJobStatus( JobStatus.RUNNING), decrement_condition=lambda id_event: id_event[-1] == SetJobStatus( JobStatus.COMPLETED), max_quantity=num_resources, expected_quantity=num_resources, )
def test_all_jobs_ran(self, condor, all_jobs_ran): for jobid in all_jobs_ran.job_ids: assert in_order( condor.job_queue.by_jobid[jobid], [ SetJobStatus(JobStatus.IDLE), SetJobStatus(JobStatus.RUNNING), SetJobStatus(JobStatus.COMPLETED), ], )
def num_jobs_running_history(condor, all_jobs_ran, concurrency_limit): return track_quantity( condor.job_queue.filter(lambda j, e: j in all_jobs_ran.job_ids), increment_condition=lambda id_event: id_event[-1] == SetJobStatus( JobStatus.RUNNING), decrement_condition=lambda id_event: id_event[-1] == SetJobStatus( JobStatus.COMPLETED), max_quantity=concurrency_limit["max-running"], expected_quantity=concurrency_limit["max-running"], )
def test_job_queue_events_in_correct_order(self, job_queue_events_for_sleep_job): assert in_order( job_queue_events_for_sleep_job, [ SetJobStatus(JobStatus.IDLE), SetJobStatus(JobStatus.RUNNING), SetJobStatus(JobStatus.COMPLETED), ], )
def finished_sleep_jobid(default_condor, submit_sleep_job_cmd): clusterid, num_procs = parse_submit_result(submit_sleep_job_cmd) jobid = JobID(clusterid, 0) default_condor.job_queue.wait_for_events( expected_events={jobid: [SetJobStatus(JobStatus.COMPLETED)]}, unexpected_events={jobid: {SetJobStatus(JobStatus.HELD)}}, ) return jobid
def num_materialized_jobs_history(condor, jobids_for_sleep_jobs): num_materialized = 0 history = [] for jobid, event in condor.job_queue.filter( lambda j, e: j in jobids_for_sleep_jobs): if event == SetJobStatus(JobStatus.IDLE): num_materialized += 1 if event == SetJobStatus(JobStatus.COMPLETED): num_materialized -= 1 history.append(num_materialized) return history
def num_idle_jobs_history(condor, jobids_for_sleep_jobs): num_idle = 0 history = [] for jobid, event in condor.job_queue.filter( lambda j, e: j in jobids_for_sleep_jobs): if event == SetJobStatus(JobStatus.IDLE): num_idle += 1 if event == SetJobStatus(JobStatus.RUNNING): num_idle -= 1 history.append(num_idle) return history
def jobids_for_sleep_jobs(test_dir, condor, max_idle, max_materialize): sub_description = """ executable = /bin/sleep arguments = 10 request_memory = 1MB request_disk = 1MB max_materialize = {max_materialize} max_idle = {max_idle} queue {q} """.format( max_materialize=max_materialize, max_idle=max_idle, q=max_materialize + max_idle + 1, ) submit_file = write_file(test_dir / "queue.sub", sub_description) submit_cmd = condor.run_command(["condor_submit", submit_file]) clusterid, num_procs = parse_submit_result(submit_cmd) jobids = [JobID(clusterid, n) for n in range(num_procs)] condor.job_queue.wait_for_events( {jobid: [SetJobStatus(JobStatus.COMPLETED)] for jobid in jobids}, timeout=60 ) return jobids
def test_job_handle(condor, path_to_sleep, test_dir, test_universe): handle = condor.submit(description={ "executable": path_to_sleep, "arguments": 2 * (ALLOWED_JOB_DURATION + PERIODIC_EXPR_INTERVAL + 1), "transfer_executable": False, "should_transfer_files": True, "universe": test_universe, "log": test_dir / "test_job.log", "on_exit_remove": False, "allowed_job_duration": ALLOWED_JOB_DURATION, }, count=1) job_id = handle.job_ids[0] timeout = 4 * (ALLOWED_JOB_DURATION + PERIODIC_EXPR_INTERVAL + 1) handle.wait(condition=ClusterState.any_held, fail_condition=ClusterState.any_complete, verbose=True, timeout=timeout) condor.job_queue.wait_for_events( expected_events={job_id: [SetJobStatus(JobStatus.HELD)]}, unexpected_events={job_id: [SetJobStatus(JobStatus.COMPLETED)]}, timeout=timeout) yield handle handle.remove()
def test_allowed_job_duration_sequence(self, test_job_queue_events): assert in_order(test_job_queue_events, [ SetJobStatus(JobStatus.IDLE), SetJobStatus(JobStatus.RUNNING), SetJobStatus(JobStatus.HELD), ])