def test__placement_exclusive_job(exclusive_host): excl_constraint = pod_pb2.Constraint( type=1, # Label constraint label_constraint=pod_pb2.LabelConstraint( kind=2, # Host condition=2, # Equal requirement=1, label=peloton_pb2_v1alpha.Label(key="peloton/exclusive", value="exclusive-test-label"), ), ) # We have 1 exclusive host and 2 non-exclusive hosts. Set number of # instances to be a few more than what can run simulatenously on # a single exclusive host job = StatelessJob(job_file="test_stateless_job_cpus_large_spec.yaml") job.job_spec.default_spec.constraint.CopyFrom(excl_constraint) job.job_spec.instance_count = 6 job.create() job.wait_for_state(goal_state="RUNNING") job.wait_for_all_pods_running(num_pods=4) job.stop() job.wait_for_terminated() # check that all of them ran on exclusive host pod_summaries = job.list_pods() for s in pod_summaries: if s.status.host: assert "exclusive" in s.status.host
def stop_jobs(client): ''' Calls peloton API to terminate all batch jobs and stateless jobs ''' # obtain a list of jobs from all resource pools and terminate them jobs = list_jobs() for job in jobs: job = StatelessJob(client=client, job_id=job.job_id.value) job.config.max_retry_attempts = 100 job.stop() job.wait_for_terminated()
def test__placement_non_exclusive_job(exclusive_host): # We have 1 exclusive host and 2 non-exclusive hosts. Set number of # instances to be a few more than what can run simulatenously # on 2 non-exclusive hosts job = StatelessJob(job_file="test_stateless_job_cpus_large_spec.yaml") job.job_spec.instance_count = 10 job.create() job.wait_for_state(goal_state="RUNNING") job.wait_for_all_pods_running(num_pods=5) job.stop() job.wait_for_terminated() # check that none of them ran on exclusive host pod_summaries = job.list_pods() for s in pod_summaries: if s.status.host: assert "exclusive" not in s.status.host
def test__revocable_job_slack_limit(): revocable_job = StatelessJob( job_file="test_stateless_job_revocable_slack_limit_spec.yaml") revocable_job.create() revocable_job.wait_for_state(goal_state="RUNNING") # 2 tasks are running out of 3 def partial_tasks_running(): count = 0 for pod_id in range(0, revocable_job.job_spec.instance_count): pod_state = revocable_job.get_pod(pod_id).get_pod_status().state if pod_state == pod.POD_STATE_RUNNING: count += 1 return count == 2 revocable_job.wait_for_condition(partial_tasks_running) # cleanup job from jobmgr revocable_job.stop() revocable_job.wait_for_terminated()
def test__create_revocable_job(): revocable_job1 = StatelessJob( job_file="test_stateless_job_revocable_spec.yaml") revocable_job1.create() revocable_job1.wait_for_state(goal_state="RUNNING") revocable_job1.wait_for_all_pods_running() revocable_job2 = StatelessJob( job_file="test_stateless_job_revocable_spec.yaml") revocable_job2.create() revocable_job2.wait_for_state(goal_state="RUNNING") revocable_job2.wait_for_all_pods_running() non_revocable_job = StatelessJob( job_file="test_stateless_job_cpus_large_spec.yaml") non_revocable_job.create() non_revocable_job.wait_for_state(goal_state="RUNNING") non_revocable_job.wait_for_all_pods_running() # cleanup jobs from jobmgr revocable_job1.stop() revocable_job2.stop() non_revocable_job.stop() revocable_job1.wait_for_terminated() revocable_job2.wait_for_terminated() non_revocable_job.wait_for_terminated()
def test__revocable_tasks_move_to_revocable_queue(): revocable_job1 = StatelessJob(job_file='test_stateless_job_revocable_spec.yaml') revocable_job1.create() revocable_job1.wait_for_state(goal_state='RUNNING') revocable_job1.wait_for_all_pods_running() # 1 task is running out of 3 def partial_tasks_running(): count = 0 for pod_id in range(0, revocable_job2.job_spec.instance_count): pod_state = revocable_job2.get_pod(pod_id).get_pod_status().state if pod_state == pod.POD_STATE_RUNNING: count += 1 return count == 1 revocable_job2 = StatelessJob( job_file='test_stateless_job_revocable_slack_limit_spec.yaml') revocable_job2.create() # sleep for 5 seconds to make sure job has enough time time.sleep(5) revocable_job2.wait_for_condition(partial_tasks_running) non_revocable_job = StatelessJob(job_file='test_stateless_job_spec.yaml') non_revocable_job.create() non_revocable_job.wait_for_state('RUNNING') non_revocable_job.wait_for_all_pods_running() # cleanup jobs from jobmgr revocable_job1.stop() revocable_job2.stop() non_revocable_job.stop() revocable_job1.wait_for_terminated() revocable_job2.wait_for_terminated() non_revocable_job.wait_for_terminated()
def test__stop_nonrevocable_job_to_free_resources_for_revocable_job(): non_revocable_job1 = StatelessJob( job_file="test_stateless_job_memory_large_spec.yaml" ) non_revocable_job1.create() non_revocable_job1.wait_for_state("RUNNING") non_revocable_job2 = StatelessJob( job_file="test_stateless_preemptible_job_memory_large_spec.yaml" ) non_revocable_job2.create() non_revocable_job2.wait_for_state("RUNNING") non_revocable_job1.wait_for_all_pods_running() non_revocable_job2.wait_for_all_pods_running() revocable_job = StatelessJob( job_file="test_stateless_job_revocable_spec.yaml" ) revocable_job.create() # no tasks should be running def no_task_running(): count = 0 for pod_id in range(0, revocable_job.job_spec.instance_count): pod_state = revocable_job.get_pod(pod_id).get_pod_status().state if pod_state == pod.POD_STATE_RUNNING: count += 1 return count == 0 # give job 5 seconds to run, even after that no tasks should be running time.sleep(5) revocable_job.wait_for_condition(no_task_running) # stop non_revocable job to free up resources for revocable job non_revocable_job2.stop() non_revocable_job2.wait_for_terminated() # After non_revocable job is killed, all revocable tasks should be running revocable_job.wait_for_all_pods_running() # cleanup jobs from jobmgr non_revocable_job1.stop() revocable_job.stop() non_revocable_job1.wait_for_terminated() revocable_job.wait_for_terminated()
def test__preempt_revocable_job_to_run_non_revocable_job(): non_revocable_job1 = StatelessJob( job_file="test_stateless_preemptible_job_memory_large_spec.yaml" ) non_revocable_job1.create() non_revocable_job1.wait_for_state(goal_state="RUNNING") non_revocable_job1.wait_for_all_pods_running() revocable_job = StatelessJob( job_file="test_stateless_job_revocable_spec.yaml" ) revocable_job.create() revocable_job.wait_for_state(goal_state="RUNNING") revocable_job.wait_for_all_pods_running() # launch second non-revocable job which will pre-empt revocable job non_revocable_job2 = StatelessJob( job_file="test_stateless_job_memory_large_spec.yaml" ) non_revocable_job2.create() non_revocable_job2.wait_for_state(goal_state="RUNNING") non_revocable_job2.wait_for_all_pods_running() # no revocable job tasks should be running def zero_tasks_running(): count = 0 for pod_id in range(0, revocable_job.job_spec.instance_count): pod_state = revocable_job.get_pod(pod_id).get_pod_status().state if pod_state == pod.POD_STATE_RUNNING: count += 1 return count == 0 revocable_job.wait_for_condition(zero_tasks_running) revocable_job.stop() non_revocable_job1.stop() non_revocable_job2.stop() revocable_job.wait_for_terminated() non_revocable_job1.wait_for_terminated() non_revocable_job2.wait_for_terminated()