Example #1
0
def test_controller_task_limit():
    # This tests the controller limit of a resource pool. Once it is fully
    # allocated by a controller task, subsequent tasks can't be admitted.
    # 1. start controller job1 which uses all the controller limit
    # 2. start controller job2, make sure it remains pending.
    # 3. kill  job1, make sure job2 starts running.

    # job1 uses all the controller limit
    job1 = Job(job_file='test_controller_job.yaml',
               config=IntegrationTestConfig(
                   pool_file='test_respool_controller_limit.yaml'))

    job1.create()
    job1.wait_for_state(goal_state='RUNNING')

    # job2 should remain pending as job1 used the controller limit
    job2 = Job(job_file='test_controller_job.yaml',
               config=IntegrationTestConfig(
                   pool_file='test_respool_controller_limit.yaml'))
    job2.create()

    # sleep for 5 seconds to make sure job 2 has enough time
    time.sleep(5)

    # make sure job2 can't run
    job2.wait_for_state(goal_state='PENDING')

    # stop job1
    job1.stop()
    job1.wait_for_state(goal_state='KILLED')

    # make sure job2 starts running
    job2.wait_for_state(goal_state='RUNNING')

    kill_jobs([job2])
Example #2
0
def test__host_limit(peloton_client):
    job = Job(
        client=peloton_client,
        job_file="test_stateless_job_host_limit_1.yaml",
        config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=2),
    )
    job.create()
    job.wait_for_state(goal_state="RUNNING")

    # All running tasks should have different hosts
    def different_hosts_for_running_tasks():
        hosts = set()
        num_running, num_pending = 0, 0
        tasks = job.list_tasks().value
        for id, t in tasks.items():
            if t.runtime.state == task_pb2.TaskState.Value("RUNNING"):
                num_running = num_running + 1
                hosts.add(t.runtime.host)
            if t.runtime.state == task_pb2.TaskState.Value("PENDING"):
                num_pending = num_pending + 1

        # number of running tasks should be equal to the size of the hosts set
        # there should be 1 task in PENDING
        return len(hosts) == num_running and num_pending == 1

    job.wait_for_condition(different_hosts_for_running_tasks)

    job.stop()
    job.wait_for_state(goal_state="KILLED")
Example #3
0
def test__start_stop_task_without_job_id():
    job_without_id = Job()
    resp = job_without_id.start()
    assert resp.HasField('error')
    assert resp.error.HasField('notFound')

    resp = job_without_id.stop()
    assert resp.HasField('error')
    assert resp.error.HasField('notFound')
Example #4
0
def test__start_stop_task_without_job_id(peloton_client):
    job_without_id = Job(client=peloton_client, )
    resp = job_without_id.start()
    assert resp.HasField("error")
    assert resp.error.HasField("notFound")

    resp = job_without_id.stop()
    assert resp.HasField("error")
    assert resp.error.HasField("notFound")
Example #5
0
def test__start_stop_task_with_nonexistent_job_id():
    job_with_nonexistent_id = Job()
    job_with_nonexistent_id.job_id = "nonexistent-job-id"
    resp = job_with_nonexistent_id.start()
    assert resp.HasField('error')
    assert resp.error.HasField('notFound')

    resp = job_with_nonexistent_id.stop()
    assert resp.HasField('error')
    assert resp.error.HasField('notFound')
Example #6
0
def test__start_stop_task_with_nonexistent_job_id(peloton_client):
    job_with_nonexistent_id = Job(client=peloton_client)
    job_with_nonexistent_id.job_id = "nonexistent-job-id"
    resp = job_with_nonexistent_id.start()
    assert resp.HasField("error")
    assert resp.error.HasField("notFound")

    resp = job_with_nonexistent_id.stop()
    assert resp.HasField("error")
    assert resp.error.HasField("notFound")
Example #7
0
def test_non_preemptible_job(respool_a):
    # start non-preemptible job using all of CPU reservation.
    np_job_a_1 = Job(
        job_file="test_non_preemptible_job.yaml",
        pool=respool_a,
        config=IntegrationTestConfig(max_retry_attempts=100),
    )
    np_job_a_1.create()
    np_job_a_1.wait_for_state(goal_state="RUNNING")

    # the resource pools CPU allocation should be equal to the reservation.
    assert np_job_a_1.pool.get_reservation(
        "cpu") == np_job_a_1.pool.get_allocation("cpu")

    # start another non-preemptible job which should not be admitted as all
    # the reservation(CPU) of the resource pool is used up.
    np_job_a_2 = Job(
        job_file="test_non_preemptible_job.yaml",
        pool=respool_a,
        config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=5),
    )
    np_job_a_2.create()
    np_job_a_2.wait_for_state(goal_state="PENDING")

    # start preemptible job which should start running.
    p_job_a = Job(
        job_file="test_job.yaml",
        pool=respool_a,
        config=IntegrationTestConfig(max_retry_attempts=100),
    )
    p_job_a.create()
    p_job_a.wait_for_state(goal_state="RUNNING")

    # stop the first non-preemptible job.
    np_job_a_1.stop()
    np_job_a_1.wait_for_state(goal_state="KILLED")

    # make sure the second one completes.
    np_job_a_2.wait_for_state(goal_state="RUNNING")

    kill_jobs([np_job_a_2, p_job_a])
Example #8
0
def test__dynamic_partition_pool_restrictions(peloton_client):
    # we start with shared=1, batch_reserved=2
    # delete batch_reserved so that its hosts go to "default"
    delete_host_pool(util.HOSTPOOL_BATCH_RESERVED)

    # setup 3 host-pools with 1 host each
    ensure_host_pool(util.HOSTPOOL_BATCH_RESERVED, 1)
    ensure_host_pool(util.HOSTPOOL_SHARED, 1)
    ensure_host_pool(util.HOSTPOOL_STATELESS, 1)

    hostToPool = dict()
    resp = list_host_pools()
    for pool in resp.pools:
        for h in pool.hosts:
            hostToPool[h] = pool.name

    # Job has two instances with 3 cpus each.
    # Only one instance will run.
    npjob = Job(
        client=peloton_client,
        job_file="test_non_preemptible_job.yaml",
        config=IntegrationTestConfig(max_retry_attempts=100),
    )
    npjob.create()
    npjob.wait_for_state(goal_state='RUNNING')

    count = 0
    for t in npjob.get_tasks():
        if npjob.get_task(t).state_str == "PENDING":
            count = count + 1
        else:
            hostname = npjob.get_task(t).get_runtime().host
            assert hostToPool[hostname] == util.HOSTPOOL_BATCH_RESERVED

    assert count == 1

    # Stateless job has 4 instances with host limit 1
    # so only one instance will run
    sjob = Job(
        client=peloton_client,
        job_file="test_stateless_job_host_limit_1.yaml",
        config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=2),
    )
    sjob.create()
    sjob.wait_for_state(goal_state="RUNNING")

    count = 0
    for t in sjob.get_tasks():
        if sjob.get_task(t).state_str == "PENDING":
            count = count + 1
        else:
            hostname = sjob.get_task(t).get_runtime().host
            assert hostToPool[hostname] == util.HOSTPOOL_STATELESS

    assert count == 3

    # Preemptible batch job has 12 instances with 1 CPU each,
    # so 4 instances will run.
    pjob = Job(
        client=peloton_client,
        job_file="test_preemptible_job.yaml",
        config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=2),
    )
    pjob.create()
    pjob.wait_for_state(goal_state="RUNNING")

    count = 0
    for t in pjob.get_tasks():
        if pjob.get_task(t).state_str == "PENDING":
            count = count + 1
        else:
            hostname = pjob.get_task(t).get_runtime().host
            assert hostToPool[hostname] == util.HOSTPOOL_SHARED

    assert count == 8

    # Stop all jobs
    npjob.stop()
    sjob.stop()
    pjob.stop()