예제 #1
0
def test__placement_exclusive_job(exclusive_host):
    excl_constraint = pod_pb2.Constraint(
        type=1,  # Label constraint
        label_constraint=pod_pb2.LabelConstraint(
            kind=2,  # Host
            condition=2,  # Equal
            requirement=1,
            label=peloton_pb2_v1alpha.Label(key="peloton/exclusive",
                                            value="exclusive-test-label"),
        ),
    )
    # We have 1 exclusive host and 2 non-exclusive hosts. Set number of
    # instances to be a few more than what can run simulatenously on
    # a single exclusive host
    job = StatelessJob(job_file="test_stateless_job_cpus_large_spec.yaml")
    job.job_spec.default_spec.constraint.CopyFrom(excl_constraint)
    job.job_spec.instance_count = 6
    job.create()
    job.wait_for_state(goal_state="RUNNING")
    job.wait_for_all_pods_running(num_pods=4)

    job.stop()
    job.wait_for_terminated()

    # check that all of them ran on exclusive host
    pod_summaries = job.list_pods()
    for s in pod_summaries:
        if s.status.host:
            assert "exclusive" in s.status.host
예제 #2
0
def stop_jobs(client):
    '''
    Calls peloton API to terminate all batch jobs and stateless jobs
    '''
    # obtain a list of jobs from all resource pools and terminate them
    jobs = list_jobs()
    for job in jobs:
        job = StatelessJob(client=client, job_id=job.job_id.value)
        job.config.max_retry_attempts = 100
        job.stop()
        job.wait_for_terminated()
예제 #3
0
def test__placement_non_exclusive_job(exclusive_host):
    # We have 1 exclusive host and 2 non-exclusive hosts. Set number of
    # instances to be a few more than what can run simulatenously
    # on 2 non-exclusive hosts
    job = StatelessJob(job_file="test_stateless_job_cpus_large_spec.yaml")
    job.job_spec.instance_count = 10
    job.create()
    job.wait_for_state(goal_state="RUNNING")
    job.wait_for_all_pods_running(num_pods=5)

    job.stop()
    job.wait_for_terminated()

    # check that none of them ran on exclusive host
    pod_summaries = job.list_pods()
    for s in pod_summaries:
        if s.status.host:
            assert "exclusive" not in s.status.host
예제 #4
0
def test__revocable_job_slack_limit():
    revocable_job = StatelessJob(
        job_file="test_stateless_job_revocable_slack_limit_spec.yaml")
    revocable_job.create()
    revocable_job.wait_for_state(goal_state="RUNNING")

    # 2 tasks are running out of 3
    def partial_tasks_running():
        count = 0
        for pod_id in range(0, revocable_job.job_spec.instance_count):
            pod_state = revocable_job.get_pod(pod_id).get_pod_status().state
            if pod_state == pod.POD_STATE_RUNNING:
                count += 1
        return count == 2

    revocable_job.wait_for_condition(partial_tasks_running)

    # cleanup job from jobmgr
    revocable_job.stop()
    revocable_job.wait_for_terminated()
예제 #5
0
def test__create_revocable_job():
    revocable_job1 = StatelessJob(
        job_file="test_stateless_job_revocable_spec.yaml")
    revocable_job1.create()
    revocable_job1.wait_for_state(goal_state="RUNNING")
    revocable_job1.wait_for_all_pods_running()

    revocable_job2 = StatelessJob(
        job_file="test_stateless_job_revocable_spec.yaml")
    revocable_job2.create()
    revocable_job2.wait_for_state(goal_state="RUNNING")
    revocable_job2.wait_for_all_pods_running()

    non_revocable_job = StatelessJob(
        job_file="test_stateless_job_cpus_large_spec.yaml")
    non_revocable_job.create()
    non_revocable_job.wait_for_state(goal_state="RUNNING")
    non_revocable_job.wait_for_all_pods_running()

    # cleanup jobs from jobmgr
    revocable_job1.stop()
    revocable_job2.stop()
    non_revocable_job.stop()
    revocable_job1.wait_for_terminated()
    revocable_job2.wait_for_terminated()
    non_revocable_job.wait_for_terminated()
예제 #6
0
def test__revocable_tasks_move_to_revocable_queue():
    revocable_job1 = StatelessJob(job_file='test_stateless_job_revocable_spec.yaml')
    revocable_job1.create()
    revocable_job1.wait_for_state(goal_state='RUNNING')
    revocable_job1.wait_for_all_pods_running()

    # 1 task is running out of 3
    def partial_tasks_running():
        count = 0
        for pod_id in range(0, revocable_job2.job_spec.instance_count):
            pod_state = revocable_job2.get_pod(pod_id).get_pod_status().state
            if pod_state == pod.POD_STATE_RUNNING:
                count += 1
        return count == 1

    revocable_job2 = StatelessJob(
        job_file='test_stateless_job_revocable_slack_limit_spec.yaml')
    revocable_job2.create()

    # sleep for 5 seconds to make sure job has enough time
    time.sleep(5)
    revocable_job2.wait_for_condition(partial_tasks_running)

    non_revocable_job = StatelessJob(job_file='test_stateless_job_spec.yaml')
    non_revocable_job.create()
    non_revocable_job.wait_for_state('RUNNING')
    non_revocable_job.wait_for_all_pods_running()

    # cleanup jobs from jobmgr
    revocable_job1.stop()
    revocable_job2.stop()
    non_revocable_job.stop()
    revocable_job1.wait_for_terminated()
    revocable_job2.wait_for_terminated()
    non_revocable_job.wait_for_terminated()
예제 #7
0
def test__stop_nonrevocable_job_to_free_resources_for_revocable_job():
    non_revocable_job1 = StatelessJob(
        job_file="test_stateless_job_memory_large_spec.yaml"
    )
    non_revocable_job1.create()
    non_revocable_job1.wait_for_state("RUNNING")

    non_revocable_job2 = StatelessJob(
        job_file="test_stateless_preemptible_job_memory_large_spec.yaml"
    )
    non_revocable_job2.create()
    non_revocable_job2.wait_for_state("RUNNING")

    non_revocable_job1.wait_for_all_pods_running()
    non_revocable_job2.wait_for_all_pods_running()

    revocable_job = StatelessJob(
        job_file="test_stateless_job_revocable_spec.yaml"
    )
    revocable_job.create()

    # no tasks should be running
    def no_task_running():
        count = 0
        for pod_id in range(0, revocable_job.job_spec.instance_count):
            pod_state = revocable_job.get_pod(pod_id).get_pod_status().state
            if pod_state == pod.POD_STATE_RUNNING:
                count += 1
        return count == 0

    # give job 5 seconds to run, even after that no tasks should be running
    time.sleep(5)
    revocable_job.wait_for_condition(no_task_running)

    # stop non_revocable job to free up resources for revocable job
    non_revocable_job2.stop()
    non_revocable_job2.wait_for_terminated()

    # After non_revocable job is killed, all revocable tasks should be running
    revocable_job.wait_for_all_pods_running()

    # cleanup jobs from jobmgr
    non_revocable_job1.stop()
    revocable_job.stop()
    non_revocable_job1.wait_for_terminated()
    revocable_job.wait_for_terminated()
예제 #8
0
def test__preempt_revocable_job_to_run_non_revocable_job():
    non_revocable_job1 = StatelessJob(
        job_file="test_stateless_preemptible_job_memory_large_spec.yaml"
    )
    non_revocable_job1.create()
    non_revocable_job1.wait_for_state(goal_state="RUNNING")
    non_revocable_job1.wait_for_all_pods_running()

    revocable_job = StatelessJob(
        job_file="test_stateless_job_revocable_spec.yaml"
    )
    revocable_job.create()
    revocable_job.wait_for_state(goal_state="RUNNING")
    revocable_job.wait_for_all_pods_running()

    # launch second non-revocable job which will pre-empt revocable job
    non_revocable_job2 = StatelessJob(
        job_file="test_stateless_job_memory_large_spec.yaml"
    )
    non_revocable_job2.create()
    non_revocable_job2.wait_for_state(goal_state="RUNNING")
    non_revocable_job2.wait_for_all_pods_running()

    # no revocable job tasks should be running
    def zero_tasks_running():
        count = 0
        for pod_id in range(0, revocable_job.job_spec.instance_count):
            pod_state = revocable_job.get_pod(pod_id).get_pod_status().state
            if pod_state == pod.POD_STATE_RUNNING:
                count += 1
        return count == 0

    revocable_job.wait_for_condition(zero_tasks_running)

    revocable_job.stop()
    non_revocable_job1.stop()
    non_revocable_job2.stop()
    revocable_job.wait_for_terminated()
    non_revocable_job1.wait_for_terminated()
    non_revocable_job2.wait_for_terminated()