예제 #1
0
def test_task_killed_in_ready_succeeds_when_re_enqueued(placement_engines):
    # Tests that a if task is deleted which is in READY state in resource
    # manager and if is re-enqueued succeeds.

    # stop the placement engines to keep the tasks in READY state
    placement_engines.stop()

    # decorate the client to add peloton private API stubs
    c = with_private_stubs(Client())

    # create long running job with 2 instances
    long_running_job = Job(
        job_file='long_running_job.yaml',
        options=[
            with_instance_count(2),
        ],
        client=c,
    )

    long_running_job.create()
    long_running_job.wait_for_state(goal_state='PENDING')

    task = long_running_job.get_task(0)
    # wait for task to reach READY
    task.wait_for_pending_state(goal_state='READY')

    # kill the task
    task.stop()

    # re-enqueue the task
    task.start()

    # gentlemen, start your (placement) engines
    placement_engines.start()

    def wait_for_instance_to_run():
        return long_running_job.get_task(0).state_str == 'RUNNING'

    long_running_job.wait_for_condition(wait_for_instance_to_run)
예제 #2
0
def test__dynamic_partition_pool_restrictions(peloton_client):
    # we start with shared=1, batch_reserved=2
    # delete batch_reserved so that its hosts go to "default"
    delete_host_pool(util.HOSTPOOL_BATCH_RESERVED)

    # setup 3 host-pools with 1 host each
    ensure_host_pool(util.HOSTPOOL_BATCH_RESERVED, 1)
    ensure_host_pool(util.HOSTPOOL_SHARED, 1)
    ensure_host_pool(util.HOSTPOOL_STATELESS, 1)

    hostToPool = dict()
    resp = list_host_pools()
    for pool in resp.pools:
        for h in pool.hosts:
            hostToPool[h] = pool.name

    # Job has two instances with 3 cpus each.
    # Only one instance will run.
    npjob = Job(
        client=peloton_client,
        job_file="test_non_preemptible_job.yaml",
        config=IntegrationTestConfig(max_retry_attempts=100),
    )
    npjob.create()
    npjob.wait_for_state(goal_state='RUNNING')

    count = 0
    for t in npjob.get_tasks():
        if npjob.get_task(t).state_str == "PENDING":
            count = count + 1
        else:
            hostname = npjob.get_task(t).get_runtime().host
            assert hostToPool[hostname] == util.HOSTPOOL_BATCH_RESERVED

    assert count == 1

    # Stateless job has 4 instances with host limit 1
    # so only one instance will run
    sjob = Job(
        client=peloton_client,
        job_file="test_stateless_job_host_limit_1.yaml",
        config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=2),
    )
    sjob.create()
    sjob.wait_for_state(goal_state="RUNNING")

    count = 0
    for t in sjob.get_tasks():
        if sjob.get_task(t).state_str == "PENDING":
            count = count + 1
        else:
            hostname = sjob.get_task(t).get_runtime().host
            assert hostToPool[hostname] == util.HOSTPOOL_STATELESS

    assert count == 3

    # Preemptible batch job has 12 instances with 1 CPU each,
    # so 4 instances will run.
    pjob = Job(
        client=peloton_client,
        job_file="test_preemptible_job.yaml",
        config=IntegrationTestConfig(max_retry_attempts=100, sleep_time_sec=2),
    )
    pjob.create()
    pjob.wait_for_state(goal_state="RUNNING")

    count = 0
    for t in pjob.get_tasks():
        if pjob.get_task(t).state_str == "PENDING":
            count = count + 1
        else:
            hostname = pjob.get_task(t).get_runtime().host
            assert hostToPool[hostname] == util.HOSTPOOL_SHARED

    assert count == 8

    # Stop all jobs
    npjob.stop()
    sjob.stop()
    pjob.stop()
예제 #3
0
def test__tasks_reserve_execution(hostreservepool, peloton_client):
    p_job_median = Job(
        client=peloton_client,
        job_file='test_hostreservation_job_median.yaml',
        pool=hostreservepool,
        config=IntegrationTestConfig(
            max_retry_attempts=100,
            sleep_time_sec=1),
    )

    p_job_median.create()
    p_job_median.wait_for_state(goal_state='RUNNING')

    # we should have all 3 tasks in running state
    def all_running():
        return all(t.state == task.RUNNING for t in p_job_median.get_tasks().values())

    p_job_median.wait_for_condition(all_running)

    # decorate the client to add peloton private API stubs
    client = with_private_stubs(peloton_client)

    p_job_large = Job(
        job_file='test_hostreservation_job_large.yaml',
        pool=hostreservepool,
        config=IntegrationTestConfig(
            sleep_time_sec=1,
            max_retry_attempts=300),
        options=[with_instance_count(1)],
        client=client,
    )
    p_job_large.create()
    p_job_large.wait_for_state(goal_state='PENDING')

    request = hostmgr.GetHostsByQueryRequest()

    # task should get into reserved state and RUNNING state
    t1 = p_job_large.get_task(0)
    t1.wait_for_pending_state(goal_state="RESERVED")

    # the task is running on reserved host
    def get_reserved_host():
        resp = client.hostmgr_svc.GetHostsByQuery(
            request,
            metadata=p_job_large.client.hostmgr_metadata,
            timeout=p_job_large.config.rpc_timeout_sec,)

        for h in resp.hosts:
            if h.status == 'reserved':
                return h.hostname
        return ''

    def is_reserved():
        return get_reserved_host() != ''

    p_job_large.wait_for_condition(is_reserved)
    reserved_host = get_reserved_host()

    t1.wait_for_pending_state(goal_state="RUNNING")
    assert reserved_host == t1.get_info().runtime.host

    # p_job_large should succeed
    p_job_large.wait_for_state()

    # no host is in reserved state
    response = client.hostmgr_svc.GetHostsByQuery(
        request,
        metadata=p_job_large.client.hostmgr_metadata,
        timeout=p_job_large.config.rpc_timeout_sec,)
    for host in response.hosts:
        assert host.status != 'reserved'

    kill_jobs([p_job_median, p_job_large])