Example #1
0
def test__acquire_release_host_offers():
    resource_constraint = v0hostmgr.ResourceConstraint(
        minimum=task.ResourceConfig(cpuLimit=3.0))
    host_filter = v0hostmgr.HostFilter(
        resourceConstraint=resource_constraint,
        quantity=v0hostmgr.QuantityControl(maxHosts=2),
    )
    request = v0hostmgr.AcquireHostOffersRequest(filter=host_filter)
    client = with_private_stubs(Client())

    resp = client.hostmgr_svc.AcquireHostOffers(
        request, metadata=client.hostmgr_metadata, timeout=20)

    # max hosts is 2, we should expect 2 host offers
    assert len(resp.hostOffers) == 2
    for offer in resp.hostOffers:
        assert (offer.hostname in MESOS_AGENTS)

    # release offers to hostmgr
    resp = client.hostmgr_svc.ReleaseHostOffers(
        request=v0hostmgr.ReleaseHostOffersRequest(hostOffers=resp.hostOffers),
        metadata=client.hostmgr_metadata,
        timeout=20)

    assert resp.HasField("error") is False
Example #2
0
def test__cluster_capacity():
    # get cluster capacity
    client = with_private_stubs(Client())
    resp = client.hostmgr_svc.ClusterCapacity(
        request=v0hostmgr.ClusterCapacityRequest(),
        metadata=client.hostmgr_metadata,
        timeout=20)
    assert resp.HasField("error") is False

    # check capacity
    for resource in resp.physicalResources:
        assert resource.kind in ['cpu', 'gpu', 'memory', 'disk', 'fd']
        if resource.kind == 'cpu':
            assert resource.capacity == 12.0  # 4cpu * 3 agents
        if resource.kind == 'memory':
            assert resource.capacity == 6144.0  # 2048Mb * 3 agents
Example #3
0
def test_task_killed_in_ready_succeeds_when_re_enqueued(placement_engines):
    # Tests that a if task is deleted which is in READY state in resource
    # manager and if is re-enqueued succeeds.

    # stop the placement engines to keep the tasks in READY state
    placement_engines.stop()

    # decorate the client to add peloton private API stubs
    c = with_private_stubs(Client())

    # create long running job with 2 instances
    long_running_job = Job(
        job_file='long_running_job.yaml',
        options=[
            with_instance_count(2),
        ],
        client=c,
    )

    long_running_job.create()
    long_running_job.wait_for_state(goal_state='PENDING')

    task = long_running_job.get_task(0)
    # wait for task to reach READY
    task.wait_for_pending_state(goal_state='READY')

    # kill the task
    task.stop()

    # re-enqueue the task
    task.start()

    # gentlemen, start your (placement) engines
    placement_engines.start()

    def wait_for_instance_to_run():
        return long_running_job.get_task(0).state_str == 'RUNNING'

    long_running_job.wait_for_condition(wait_for_instance_to_run)
Example #4
0
def test__acquire_return_offers_errors():
    resource_constraint = v0hostmgr.ResourceConstraint(
        minimum=task.ResourceConfig(cpuLimit=14.0))
    host_filter = v0hostmgr.HostFilter(resourceConstraint=resource_constraint)
    request = v0hostmgr.AcquireHostOffersRequest(filter=host_filter)
    # decorate the client to add peloton private API stubs
    client = with_private_stubs(Client())

    # ask is 14 cpus, so no hosts should match this
    resp = client.hostmgr_svc.AcquireHostOffers(
        request, metadata=client.hostmgr_metadata, timeout=20)
    assert len(resp.hostOffers) == 0

    # release offers to hostmgr with a invalid offer ID
    resp = client.hostmgr_svc.ReleaseHostOffers(
        request=v0hostmgr.ReleaseHostOffersRequest(hostOffers=[
            v0hostmgr.HostOffer(id=peloton.HostOfferID(value="invalid_id"))
        ]),
        metadata=client.hostmgr_metadata,
        timeout=20)

    assert resp.error is not None
Example #5
0
def test__tasks_reserve_execution(hostreservepool, peloton_client):
    p_job_median = Job(
        client=peloton_client,
        job_file='test_hostreservation_job_median.yaml',
        pool=hostreservepool,
        config=IntegrationTestConfig(
            max_retry_attempts=100,
            sleep_time_sec=1),
    )

    p_job_median.create()
    p_job_median.wait_for_state(goal_state='RUNNING')

    # we should have all 3 tasks in running state
    def all_running():
        return all(t.state == task.RUNNING for t in p_job_median.get_tasks().values())

    p_job_median.wait_for_condition(all_running)

    # decorate the client to add peloton private API stubs
    client = with_private_stubs(peloton_client)

    p_job_large = Job(
        job_file='test_hostreservation_job_large.yaml',
        pool=hostreservepool,
        config=IntegrationTestConfig(
            sleep_time_sec=1,
            max_retry_attempts=300),
        options=[with_instance_count(1)],
        client=client,
    )
    p_job_large.create()
    p_job_large.wait_for_state(goal_state='PENDING')

    request = hostmgr.GetHostsByQueryRequest()

    # task should get into reserved state and RUNNING state
    t1 = p_job_large.get_task(0)
    t1.wait_for_pending_state(goal_state="RESERVED")

    # the task is running on reserved host
    def get_reserved_host():
        resp = client.hostmgr_svc.GetHostsByQuery(
            request,
            metadata=p_job_large.client.hostmgr_metadata,
            timeout=p_job_large.config.rpc_timeout_sec,)

        for h in resp.hosts:
            if h.status == 'reserved':
                return h.hostname
        return ''

    def is_reserved():
        return get_reserved_host() != ''

    p_job_large.wait_for_condition(is_reserved)
    reserved_host = get_reserved_host()

    t1.wait_for_pending_state(goal_state="RUNNING")
    assert reserved_host == t1.get_info().runtime.host

    # p_job_large should succeed
    p_job_large.wait_for_state()

    # no host is in reserved state
    response = client.hostmgr_svc.GetHostsByQuery(
        request,
        metadata=p_job_large.client.hostmgr_metadata,
        timeout=p_job_large.config.rpc_timeout_sec,)
    for host in response.hosts:
        assert host.status != 'reserved'

    kill_jobs([p_job_median, p_job_large])
 def client(self):
     return with_private_stubs(Client())
Example #7
0
def test__launch_kill():
    client = with_private_stubs(Client())

    # acquire 1 host offer
    resource_constraint = v0hostmgr.ResourceConstraint(
        minimum=task.ResourceConfig(cpuLimit=3.0))
    host_filter = v0hostmgr.HostFilter(
        resourceConstraint=resource_constraint,
        quantity=v0hostmgr.QuantityControl(maxHosts=1),
    )
    request = v0hostmgr.AcquireHostOffersRequest(filter=host_filter, )

    resp = client.hostmgr_svc.AcquireHostOffers(
        request, metadata=client.hostmgr_metadata, timeout=20)

    assert len(resp.hostOffers) == 1

    # launch a test task using this offer
    cmd = "echo 'succeeded instance task' & sleep 100"
    tc = task.TaskConfig(
        command=mesos.CommandInfo(shell=True, value=cmd),
        name="task_name",
        resource=task.ResourceConfig(cpuLimit=1.0),
    )
    tid = mesos.TaskID(value=str(uuid.uuid4()) + '-1-1')
    t = v0hostmgr.LaunchableTask(
        taskId=tid,
        config=tc,
    )

    # Test 1
    # launch task using invalid offer
    req = v0hostmgr.LaunchTasksRequest(
        hostname=resp.hostOffers[0].hostname,
        agentId=resp.hostOffers[0].agentId,
        tasks=[t],
        id=peloton.HostOfferID(value=str(uuid.uuid4())))
    try:
        resp = client.hostmgr_svc.LaunchTasks(req,
                                              metadata=client.hostmgr_metadata,
                                              timeout=20)
        assert False, 'LaunchTasks should have failed'
    except:
        pass

    # Test 2
    # launch task using valid offer
    req = v0hostmgr.LaunchTasksRequest(hostname=resp.hostOffers[0].hostname,
                                       agentId=resp.hostOffers[0].agentId,
                                       tasks=[t],
                                       id=resp.hostOffers[0].id)
    resp = client.hostmgr_svc.LaunchTasks(req,
                                          metadata=client.hostmgr_metadata,
                                          timeout=20)
    assert resp.HasField("error") is False

    # Test 3
    # kill with empty TaskIDs list
    resp = client.hostmgr_svc.KillTasks(v0hostmgr.KillTasksRequest(taskIds=[]),
                                        metadata=client.hostmgr_metadata,
                                        timeout=20)
    assert resp.HasField("error") is True

    # Test 4
    # kill valid TaskID
    resp = client.hostmgr_svc.KillTasks(
        v0hostmgr.KillTasksRequest(taskIds=[tid]),
        metadata=client.hostmgr_metadata,
        timeout=20)
    assert resp.HasField("error") is False
Example #8
0
def test__hostpool_capacity(peloton_client):
    client = with_private_stubs(peloton_client)

    # Check capacity of default pool.
    resp = client.hostmgr_svc.GetHostPoolCapacity(
        request=v0hostmgr.GetHostPoolCapacityRequest(),
        metadata=client.hostmgr_metadata,
        timeout=20)
    assert len(resp.pools) == 1
    assert resp.pools[0].poolName == "default"
    assert len(resp.pools[0].physicalCapacity) == 4
    assert len(resp.pools[0].slackCapacity) == 4
    for resource in resp.pools[0].physicalCapacity:
        assert resource.kind in ['cpu', 'gpu', 'memory', 'disk']
        if resource.kind == 'cpu':
            assert resource.capacity == 12.0  # 4cpu * 3 agents
        if resource.kind == 'memory':
            assert resource.capacity == 6144.0  # 2048Mb * 3 agents
    for resource in resp.pools[0].slackCapacity:
        assert resource.kind in ['cpu', 'gpu', 'memory', 'disk']
        if resource.kind == 'cpu':
            assert resource.capacity == 12.0  # 4cpu * 3 agents

    # Create a host-pool and move 1 host to it.
    ensure_host_pool("capacity-test", 1, client=peloton_client)

    resp = client.hostmgr_svc.GetHostPoolCapacity(
        request=v0hostmgr.GetHostPoolCapacityRequest(),
        metadata=client.hostmgr_metadata,
        timeout=20)
    assert len(resp.pools) == 2

    for pool in resp.pools:
        assert len(pool.physicalCapacity) == 4
        assert len(pool.slackCapacity) == 4
        if pool.poolName == "default":
            for resource in pool.physicalCapacity:
                assert resource.kind in ['cpu', 'gpu', 'memory', 'disk']
                if resource.kind == 'cpu':
                    assert resource.capacity == 8.0  # 4cpu * 2 agents
                if resource.kind == 'memory':
                    assert resource.capacity == 4096.0  # 2048Mb * 2 agents
            for resource in pool.slackCapacity:
                assert resource.kind in ['cpu', 'gpu', 'memory', 'disk']
                if resource.kind == 'cpu':
                    assert resource.capacity == 8.0  # 4cpu * 2 agents
        elif pool.poolName == "capacity-test":
            for resource in pool.physicalCapacity:
                assert resource.kind in ['cpu', 'gpu', 'memory', 'disk']
                if resource.kind == 'cpu':
                    assert resource.capacity == 4.0  # 4cpu * 1 agent
                if resource.kind == 'memory':
                    assert resource.capacity == 2048.0  # 2048Mb * 1 agent
            for resource in pool.slackCapacity:
                assert resource.kind in ['cpu', 'gpu', 'memory', 'disk']
                if resource.kind == 'cpu':
                    assert resource.capacity == 4.0  # 4cpu * 1 agent
        else:
            assert False, "Unexpected pool %s" % pool.poolName

    cleanup_other_host_pools([], client=peloton_client)