Esempio n. 1
0
def test_controller_task_limit():
    # This tests the controller limit of a resource pool. Once it is fully
    # allocated by a controller task, subsequent tasks can't be admitted.
    # 1. start controller job1 which uses all the controller limit
    # 2. start controller job2, make sure it remains pending.
    # 3. kill  job1, make sure job2 starts running.

    # job1 uses all the controller limit
    job1 = Job(job_file='test_controller_job.yaml',
               config=IntegrationTestConfig(
                   pool_file='test_respool_controller_limit.yaml'))

    job1.create()
    job1.wait_for_state(goal_state='RUNNING')

    # job2 should remain pending as job1 used the controller limit
    job2 = Job(job_file='test_controller_job.yaml',
               config=IntegrationTestConfig(
                   pool_file='test_respool_controller_limit.yaml'))
    job2.create()

    # sleep for 5 seconds to make sure job 2 has enough time
    time.sleep(5)

    # make sure job2 can't run
    job2.wait_for_state(goal_state='PENDING')

    # stop job1
    job1.stop()
    job1.wait_for_state(goal_state='KILLED')

    # make sure job2 starts running
    job2.wait_for_state(goal_state='RUNNING')

    kill_jobs([job2])
Esempio n. 2
0
def test_non_preemptible_job(respool_a):
    # start non-preemptible job using all of CPU reservation.
    np_job_a_1 = Job(job_file='test_non_preemptible_job.yaml', pool=respool_a,
                     config=IntegrationTestConfig(max_retry_attempts=100))
    np_job_a_1.create()
    np_job_a_1.wait_for_state(goal_state='RUNNING')

    # the resource pools CPU allocation should be equal to the reservation.
    pool_info = np_job_a_1.pool.pool_info()
    assert get_reservation('cpu', pool_info) == \
        get_allocation('cpu', pool_info)

    # start another non-preemptible job which should not be admitted as all
    # the reservation(CPU) of the resource pool is used up.
    np_job_a_2 = Job(job_file='test_non_preemptible_job.yaml', pool=respool_a,
                     config=IntegrationTestConfig(max_retry_attempts=100,
                                                  sleep_time_sec=5))
    np_job_a_2.create()
    np_job_a_2.wait_for_state(goal_state='PENDING')

    # start preemptible job which should start running.
    p_job_a = Job(job_file='test_job.yaml', pool=respool_a,
                  config=IntegrationTestConfig(max_retry_attempts=100))
    p_job_a.create()
    p_job_a.wait_for_state(goal_state='RUNNING')

    # stop the first non-preemptible job.
    np_job_a_1.stop()
    np_job_a_1.wait_for_state(goal_state='KILLED')

    # make sure the second one completes.
    np_job_a_2.wait_for_state(goal_state='RUNNING')

    kill_jobs([np_job_a_2, p_job_a])
Esempio n. 3
0
def test_update_job_increase_instances():
    job = Job(job_file='long_running_job.yaml',
              config=IntegrationTestConfig(max_retry_attempts=100))
    job.create()
    job.wait_for_state(goal_state='RUNNING')

    # job has only 1 task to begin with
    expected_count = 3

    def tasks_count():
        count = 0
        for t in job.get_tasks().values():
            if t.state == 8 or t.state == 9:
                count += 1

        print "total instances running/completed: %d" % count
        return count == expected_count

    job.wait_for_condition(tasks_count)

    # update the job with the new config
    job.update(new_job_file='long_running_job_update_instances.yaml')

    # number of tasks should increase to 4
    expected_count = 4
    job.wait_for_condition(tasks_count)
    job.wait_for_state(goal_state='RUNNING')

    kill_jobs([job])
Esempio n. 4
0
def test__create_a_stateless_job_with_3_tasks_on_3_different_hosts():
    label_key = "job.name"
    label_value = "peloton_stateless_job"

    job = Job(
                job_file='test_stateless_job.yaml',
                config=IntegrationTestConfig(
                    max_retry_attempts=100,
                ),
                options=[
                    with_labels({
                        label_key: label_value,
                    }),
                    with_constraint(_label_constraint(label_key, label_value)),
                    with_instance_count(3),
                ]
            )

    job.create()

    job.wait_for_state(goal_state='RUNNING')
    # Determine if tasks run on different hosts
    hosts = set()
    for _, task in job.get_tasks().iteritems():
        task_info = task.get_info()
        hosts = hosts.union({task_info.runtime.host})

    kill_jobs([job])

    # Ensure that the tasks run on 3 different hosts
    assert len(hosts) == 3
Esempio n. 5
0
def test_job_succeeds_if_controller_task_succeeds():
    # only controller task in cjob would succeed.
    # other tasks would fail, but only controller task should determine
    # job terminal state
    cjob = Job(job_file='test_job_succecced_controller_task.yaml')

    cjob.create()
    cjob.wait_for_state(goal_state='SUCCEEDED')

    kill_jobs([cjob])
Esempio n. 6
0
def test__preemption_task_level(respool_a, respool_b):
    p_job_a = Job(job_file='test_preemptible_job_preemption_override.yaml',
                  pool=respool_a,
                  config=IntegrationTestConfig(max_retry_attempts=100,
                                               sleep_time_sec=10))

    p_job_a.create()
    p_job_a.wait_for_state(goal_state='RUNNING')

    # we should have all 12 tasks in running state
    def all_running():
        return all(t.state == 8 for t in p_job_a.get_tasks().values())

    p_job_a.wait_for_condition(all_running)

    # odd instance ids should be preempted
    expected_preempted_tasks = set([1, 3, 5, 7, 9, 11])
    # even instance ids should be running
    expected_running_tasks = set([0, 2, 4, 6, 8, 10])

    preempted_task_set, running_task_set = set([]), set([])

    # 6(6 CPUs worth) tasks from p_job_a should be preempted
    def task_preempted():
        preempted_task_set.clear()
        running_task_set.clear()
        preempted_count, running_count = 0, 0
        for t in p_job_a.get_tasks().values():
            # tasks should be KILLED since killOnPreempt is set to true
            if t.state == task.KILLED:
                preempted_count += 1
                preempted_task_set.add(t.instance_id)
            if t.state == task.RUNNING:
                running_count += 1
                running_task_set.add(t.instance_id)

        return running_count == 6 and preempted_count == 6

    p_job_b = Job(job_file='test_preemptible_job.yaml', pool=respool_b,
                  config=IntegrationTestConfig())
    # starting the second job should change the entitlement calculation and
    # start preempting tasks from p_job_a
    p_job_b.create()

    # 6 tasks(odd instance ids) should be preempted from job1 to make space for job2
    p_job_a.wait_for_condition(task_preempted)

    # check the preempted tasks and check instance ids should be odd.
    assert preempted_task_set == expected_preempted_tasks
    assert running_task_set == expected_running_tasks

    # wait for p_job_b to start running
    p_job_b.wait_for_state('RUNNING')

    kill_jobs([p_job_a, p_job_b])
Esempio n. 7
0
def test__create_a_batch_job_and_restart_jobmgr_completes_jobs(jobmgr):
    job = Job(job_file='test_job_no_container.yaml',
              config=IntegrationTestConfig(max_retry_attempts=100))
    job.create()

    # Restart immediately. That will lave some fraction unallocated and another
    # fraction initialized.
    jobmgr.restart()

    job.wait_for_state()
    kill_jobs([job])
Esempio n. 8
0
def test__preemption_spark_goalstate(respool_a, respool_b):
    p_job_a = Job(job_file='test_preemptible_job_preemption_policy.yaml',
                  pool=respool_a,
                  config=IntegrationTestConfig(max_retry_attempts=100,
                                               sleep_time_sec=10))

    p_job_a.create()
    p_job_a.wait_for_state(goal_state='RUNNING')

    # we should have all 12 tasks in running state
    def all_running():
        return all(t.state == 8 for t in p_job_a.get_tasks().values())

    p_job_a.wait_for_condition(all_running)

    preempted_task_set = {}

    # 6(6 CPUs worth) tasks from p_job_a should be preempted
    def task_preempted():
        count = 0
        for t in p_job_a.get_tasks().values():
            # tasks should be KILLED since killOnPreempt is set to true
            if t.state == task.KILLED:
                count += 1
                preempted_task_set[t] = True
        return count == 6

    p_job_b = Job(job_file='test_preemptible_job.yaml', pool=respool_b,
                  config=IntegrationTestConfig())
    # starting the second job should change the entitlement calculation
    p_job_b.create()

    # 6 jobs should be preempted from job1 to make space for job2
    p_job_a.wait_for_condition(task_preempted)

    # check the preempted tasks and check the runtime info.
    for t in preempted_task_set:
        assert t.state == task.KILLED
        assert t.goal_state == task.PREEMPTING

    kill_jobs([p_job_a, p_job_b])
Esempio n. 9
0
def test__preemption_tasks_reschedules_task(respool_a, respool_b):
    p_job_a = Job(job_file='test_preemptible_job.yaml', pool=respool_a,
                  config=IntegrationTestConfig(max_retry_attempts=100,
                                               sleep_time_sec=5))

    p_job_a.create()
    p_job_a.wait_for_state(goal_state='RUNNING')

    # we should have all 12 tasks in running state
    def all_running():
        return all(t.state == 8 for t in p_job_a.get_tasks().values())

    p_job_a.wait_for_condition(all_running)

    # 6(6 CPUs worth) tasks from p_job_a should be preempted
    def task_preempted():
        count = 0
        for t in p_job_a.get_tasks().values():
            # tasks should be enqueued back by the jobmanager and once
            # enqueued they should transition to PENDING state
            if t.state == task.PENDING:
                count += 1
        return count == 6

    p_job_b = Job(job_file='test_preemptible_job.yaml', pool=respool_b,
                  config=IntegrationTestConfig(max_retry_attempts=100,
                                               sleep_time_sec=10))
    # starting the second job should change the entitlement calculation
    p_job_b.create()

    # 6 tasks should be preempted from job1 to make space for job2
    p_job_a.wait_for_condition(task_preempted)

    # p_job_b should succeed
    p_job_b.wait_for_state(goal_state='SUCCEEDED')

    kill_jobs([p_job_a, p_job_b])
Esempio n. 10
0
def test_controller_task_limit_executor_can_run():
    # This tests the controller limit isn't applied to non-controller jobs.
    # 1. start controller cjob1 which uses all the controller limit
    # 2. start controller cjob2, make sure it remains pending.
    # 3. start non-controller job, make sure it succeeds.

    # job1 uses all the controller limit
    cjob1 = Job(job_file='test_controller_job.yaml',
                config=IntegrationTestConfig(
                    pool_file='test_respool_controller_limit.yaml'))

    cjob1.create()
    cjob1.wait_for_state(goal_state='RUNNING')

    # job2 should remain pending as job1 used the controller limit
    cjob2 = Job(job_file='test_controller_job.yaml',
                config=IntegrationTestConfig(
                    pool_file='test_respool_controller_limit.yaml'))
    cjob2.create()

    # sleep for 5 seconds to make sure job 2 has enough time
    time.sleep(5)

    # make sure job2 can't run
    cjob2.wait_for_state(goal_state='PENDING')

    # start a normal executor job
    job = Job(job_file='test_job.yaml',
              config=IntegrationTestConfig(
                  pool_file='test_respool_controller_limit.yaml'))
    job.create()

    # make sure job can run and finish
    job.wait_for_state(goal_state='SUCCEEDED')

    kill_jobs([cjob1, cjob2])
Esempio n. 11
0
def test__preemption_non_preemptible_task(respool_a, respool_b):
    # Create 2 Jobs : 1 preemptible and 1 non-preemptible in respool A
    p_job_a = Job(job_file='test_preemptible_job.yaml', pool=respool_a,
                  config=IntegrationTestConfig(max_retry_attempts=100,
                                               sleep_time_sec=10))
    p_job_a.update_instance_count(6)

    np_job_a = Job(job_file='test_preemptible_job.yaml', pool=respool_a,
                   config=IntegrationTestConfig())
    np_job_a.job_config.sla.preemptible = False
    np_job_a.update_instance_count(6)

    # preemptible job takes 6 CPUs
    p_job_a.create()

    # non preemptible job takes 6 reserved CPUs
    np_job_a.create()

    p_job_a.wait_for_state('RUNNING')
    np_job_a.wait_for_state('RUNNING')

    # pool allocation is more than reservation
    pool_info = np_job_a.pool.pool_info()
    assert get_reservation('cpu', pool_info) < get_allocation('cpu', pool_info)

    # Create another job in respool B
    p_job_b = Job(job_file='test_preemptible_job.yaml', pool=respool_b,
                  config=IntegrationTestConfig(max_retry_attempts=100,
                                               sleep_time_sec=10))
    p_job_b.update_instance_count(6)

    p_job_b.create()

    # p_job_b should remain PENDING since all resources are used by
    # p_job_a
    p_job_b.wait_for_state('PENDING')

    # p_job_a should be preempted and go back to PENDING
    p_job_a.wait_for_state(goal_state='PENDING')

    # np_job_a should keep RUNNING
    np_job_a.wait_for_state('RUNNING')

    def all_tasks_running():
        count = 0
        for t in np_job_a.get_tasks().values():
            if t.state == task.RUNNING:
                count += 1
        return count == 6

    # p_job_b should start running
    p_job_b.wait_for_condition(all_tasks_running)

    # pool A allocation is equal to reservation
    pool_info = np_job_a.pool.pool_info()
    assert get_reservation('cpu', pool_info) == \
        get_allocation('cpu', pool_info)

    # pool B allocation is equal to reservation
    pool_info = p_job_b.pool.pool_info()
    assert get_reservation('cpu', pool_info) == \
        get_allocation('cpu', pool_info)

    # wait for p_job_b to finish
    p_job_b.wait_for_state('SUCCEEDED')

    # make sure p_job_a starts running
    p_job_a.wait_for_state('RUNNING')

    kill_jobs([p_job_a, np_job_a, p_job_b])
Esempio n. 12
0
def test__create_2_stateless_jobs_with_task_to_task_anti_affinity_between_jobs(): # noqa
    label_key = "job.name"

    jobs = []
    for i in range(2):
        job = Job(
            job_file='test_stateless_job.yaml',
            config=IntegrationTestConfig(
                max_retry_attempts=100,
            ),
            options=[
                with_labels({
                    label_key: "peloton_stateless_job%s" % i
                }),
                with_job_name('TestPelotonDockerJob_Stateless' + repr(i)),
                with_instance_count(1),
            ]
        )
        job.job_config.defaultConfig.constraint.CopyFrom(
            task_pb2.Constraint(
                type=2,
                andConstraint=task_pb2.AndConstraint(
                    constraints=[
                        task_pb2.Constraint(
                            type=1,
                            labelConstraint=task_pb2.LabelConstraint(
                                kind=1,
                                condition=2,
                                requirement=0,
                                label=peloton_pb2.Label(
                                    # Tasks of my own job
                                    key='job.name',
                                    value='peloton_stateless_job%s' % i,
                                ),
                            ),
                        ),
                        task_pb2.Constraint(
                            type=1,
                            labelConstraint=task_pb2.LabelConstraint(
                                kind=1,
                                condition=2,
                                requirement=0,
                                label=peloton_pb2.Label(
                                    # Avoid tasks of the other job
                                    key='job.name',
                                    value='peloton_stateless_job%s' % ((i + 1) % 2),
                                ),
                            ),
                        ),
                    ]
                ),
            )
        )
        jobs.append(job)

    for job in jobs:
        job.create()
        time.sleep(1)

    # Determine if tasks run on different hosts
    hosts = set()
    for job in jobs:
        job.wait_for_state(goal_state='RUNNING')
        for _, task in job.get_tasks().iteritems():
            task_info = task.get_info()
            hosts = hosts.union(set({task_info.runtime.host}))

    kill_jobs(jobs)

    # Ensure that the tasks run on 2 different hosts
    assert len(hosts) == 2