Example #1
0
def get_metrics(package_name, service_name, task_name):
    """Return a list of metrics datapoints.

    Keyword arguments:
    service_name -- the name of the service to get metrics for
    task_name -- the name of the task whose agent to run metrics commands from
    """
    tasks = shakedown.get_service_tasks(service_name)
    for task in tasks:
        if task['name'] == task_name:
            task_to_check = task

    if task_to_check is None:
        raise Exception("Could not find task")

    agent_id = task_to_check['slave_id']
    executor_id = task_to_check['executor_id']

    # TODO: uncomment the following block of comments when the /containers endpoint reports the correct container IDs
    # and remove the code following the comments that gets the correct container ID via 'pod info'
    ## Fetch the list of containers for the agent
    #containers_url = "{}/system/v1/agent/{}/metrics/v0/containers".format(shakedown.dcos_url(), agent_id)
    #containers_response = sdk_cmd.request("GET", containers_url, retry=False)
    #if containers_response.ok is None:
    #    log.info("Unable to fetch containers list")
    #    raise Exception("Unable to fetch containers list: {}".format(containers_url))

    # instead of receiving the pod name in this function's parameter list, extract
    # the name of the pod from the task name to not break the code when the
    # above comment-block is uncommented
    pod_name = '-'.join(task_name.split("-")[:2])
    pod_info = sdk_cmd.svc_cli(package_name, service_name, "pod info {}".format(pod_name), json=True)
    task_info = None
    for task in pod_info:
        if task["info"]["name"] == task_name:
            task_info = task
            break

    if not task_info:
        return []

    container_id = task_info["status"]["containerStatus"]["containerId"]["value"]

    #for container_id in json.loads(containers_response.text):
    app_url = "{}/system/v1/agent/{}/metrics/v0/containers/{}/app".format(
        shakedown.dcos_url(), agent_id, container_id)
    app_response = sdk_cmd.request("GET", app_url, retry=False)
    if app_response.ok is None:
        raise("Failed to get metrics from container")
        #continue

    app_json = json.loads(app_response.text)
    if app_json['dimensions']['executor_id'] == executor_id:
        return app_json['datapoints']

    raise Exception("No metrics found")
def get_scheduler_host(service_name):
    # Marathon mangles foldered paths as follows: "/path/to/svc" => "svc.to.path"
    task_name_elems = service_name.lstrip('/').split('/')
    task_name_elems.reverse()
    app_name = '.'.join(task_name_elems)
    ips = shakedown.get_service_ips('marathon', app_name)
    if len(ips) == 0:
        raise Exception('No IPs found for marathon task "{}". Available tasks are: {}'.format(
            app_name, [task['name'] for task in shakedown.get_service_tasks('marathon')]))
    return ips.pop()
 def fn():
     nonlocal consecutive_task_running
     svc_tasks = shakedown.get_service_tasks(PACKAGE_NAME)
     states = [t['state'] for t in svc_tasks]
     print('Task states: {}'.format(states))
     if 'TASK_RUNNING' in states:
         consecutive_task_running += 1
         assert consecutive_task_running <= 3
     else:
         consecutive_task_running = 0
     return False
def get_metrics(package_name, service_name, task_name):
    """Return a list of DC/OS metrics datapoints.

    Keyword arguments:
    package_name -- the name of the package the service is using
    service_name -- the name of the service to get metrics for
    task_name -- the name of the task whose agent to run metrics commands from
    """
    tasks = shakedown.get_service_tasks(service_name)
    for task in tasks:
        if task['name'] == task_name:
            task_to_check = task

    if task_to_check is None:
        raise Exception("Could not find task")

    agent_id = task_to_check['slave_id']
    executor_id = task_to_check['executor_id']

    pod_name = '-'.join(task_name.split("-")[:2])
    pod_info = sdk_cmd.svc_cli(package_name, service_name, "pod info {}".format(pod_name), json=True)
    task_info = None
    for task in pod_info:
        if task["info"]["name"] == task_name:
            task_info = task
            break

    if not task_info:
        return []

    task_container_id = task_info["status"]["containerStatus"]["containerId"]["value"]

    # Not related to functionality but consuming this
    # endpoint to verify downstream integrity
    containers_response = sdk_cmd.cluster_request(
        "GET", "/system/v1/agent/{}/metrics/v0/containers".format(agent_id), retry=False)
    reported_container_ids = json.loads(containers_response.text)

    container_id_reported = False
    for container_id in reported_container_ids:
        if container_id == task_container_id:
            container_id_reported = True

    if not container_id_reported:
        raise ValueError("The metrics /container endpoint returned {}, expecting {} to be returned as well".format(
            reported_container_ids, task_container_id))

    app_response = sdk_cmd.cluster_request(
        "GET", "/system/v1/agent/{}/metrics/v0/containers/{}/app".format(agent_id, task_container_id), retry=False)
    app_json = json.loads(app_response.text)
    if app_json['dimensions']['executor_id'] == executor_id:
        return app_json['datapoints']

    raise Exception("No metrics found")
def get_hello_world_agent_sets():
    all_tasks = shakedown.get_service_tasks(config.SERVICE_NAME)
    hello_agents = []
    world_agents = []
    for task in all_tasks:
        if task['name'].startswith('hello-'):
            hello_agents.append(task['slave_id'])
        elif task['name'].startswith('world-'):
            world_agents.append(task['slave_id'])
        else:
            assert False, "Unknown task: " + task['name']
    return hello_agents, world_agents
Example #6
0
def test_no_colocation_in_podtypes():
    # check that no two 'hellos' and no two 'worlds' are colocated on the same agent
    all_tasks = shakedown.get_service_tasks(PACKAGE_NAME)
    print(all_tasks)
    hello_agents = []
    world_agents = []
    for task in all_tasks:
        if task['name'].startswith('hello-'):
            hello_agents.append(task['slave_id'])
        elif task['name'].startswith('world-'):
            world_agents.append(task['slave_id'])
        else:
            assert False, "Unknown task: " + task['name']
    assert len(hello_agents) == len(set(hello_agents))
    assert len(world_agents) == len(set(world_agents))
Example #7
0
def test_bump_world_cpus():
    config.check_running(FOLDERED_SERVICE_NAME)
    world_ids = sdk_tasks.get_task_ids(FOLDERED_SERVICE_NAME, 'world')
    log.info('world ids: ' + str(world_ids))

    updated_cpus = config.bump_world_cpus(FOLDERED_SERVICE_NAME)

    sdk_tasks.check_tasks_updated(FOLDERED_SERVICE_NAME, 'world', world_ids)
    config.check_running(FOLDERED_SERVICE_NAME)

    all_tasks = shakedown.get_service_tasks(FOLDERED_SERVICE_NAME)
    running_tasks = [t for t in all_tasks if t['name'].startswith('world') and t['state'] == "TASK_RUNNING"]
    assert len(running_tasks) == config.world_task_count(FOLDERED_SERVICE_NAME)
    for t in running_tasks:
        assert close_enough(t['resources']['cpus'], updated_cpus)
Example #8
0
def get_job_tasks(job_id, run_id):
    client = metronome.create_client()
    run = client.get_run(job_id, run_id)
    taskids = []
    for task in run['tasks']:
        taskids.append(task['id'])

    job_tasks = []
    all_job_tasks = shakedown.get_service_tasks('metronome')
    for task in all_job_tasks:
        for taskid in taskids:
            if taskid == task['id']:
                job_tasks.append(task)

    return job_tasks
def test_bump_hello_cpus():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    config.check_running(foldered_name)
    hello_ids = sdk_tasks.get_task_ids(foldered_name, 'hello')
    log.info('hello ids: ' + str(hello_ids))

    updated_cpus = config.bump_hello_cpus(foldered_name)

    sdk_tasks.check_tasks_updated(foldered_name, 'hello', hello_ids)
    config.check_running(foldered_name)

    all_tasks = shakedown.get_service_tasks(foldered_name)
    running_tasks = [t for t in all_tasks if t['name'].startswith('hello') and t['state'] == "TASK_RUNNING"]
    assert len(running_tasks) == config.hello_task_count(foldered_name)
    for t in running_tasks:
        assert config.close_enough(t['resources']['cpus'], updated_cpus)
def test_config_update_while_partitioned():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    host = sdk_hosts.system_host(config.SERVICE_NAME, "world-0-server")
    shakedown.partition_agent(host)

    service_config = sdk_marathon.get_config(config.SERVICE_NAME)
    updated_cpus = float(service_config['env']['WORLD_CPUS']) + 0.1
    service_config['env']['WORLD_CPUS'] = str(updated_cpus)
    sdk_marathon.update_app(config.SERVICE_NAME, service_config, wait_for_completed_deployment=False)

    shakedown.reconnect_agent(host)
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()
    all_tasks = shakedown.get_service_tasks(config.SERVICE_NAME)
    running_tasks = [t for t in all_tasks if t['name'].startswith('world') and t['state'] == "TASK_RUNNING"]
    assert len(running_tasks) == config.world_task_count(config.SERVICE_NAME)
    for t in running_tasks:
        assert config.close_enough(t['resources']['cpus'], updated_cpus)
def test_bump_hello_cpus():
    def close_enough(val0, val1):
        epsilon = 0.00001
        diff = abs(val0 - val1)
        return diff < epsilon

    config.check_running(config.SERVICE_NAME)
    hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello')
    log.info('hello ids: ' + str(hello_ids))

    updated_cpus = config.bump_hello_cpus(config.SERVICE_NAME)

    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'hello', hello_ids)
    config.check_running(config.SERVICE_NAME)

    all_tasks = shakedown.get_service_tasks(config.SERVICE_NAME)
    running_tasks = [t for t in all_tasks if t['name'].startswith('hello') and t['state'] == "TASK_RUNNING"]
    for t in running_tasks:
        assert close_enough(t['resources']['cpus'], updated_cpus)
Example #12
0
 def fn():
     try:
         tasks = shakedown.get_service_tasks(service_name)
     except dcos.errors.DCOSHTTPException:
         print('Failed to get tasks for service {}'.format(service_name))
         tasks = []
     running_task_names = []
     other_tasks = []
     for t in tasks:
         if t['state'] == 'TASK_RUNNING':
             running_task_names.append(t['name'])
         else:
             other_tasks.append('{}={}'.format(t['name'], t['state']))
     print('Waiting for {} running tasks, got {} running/{} total:\n- running: {}\n- other: {}'.format(
         expected_task_count,
         len(running_task_names), len(tasks),
         running_task_names,
         other_tasks))
     return len(running_task_names) >= expected_task_count
def test_bump_world_cpus():
    check_running()
    world_ids = tasks.get_task_ids(PACKAGE_NAME, 'world')
    print('world ids: ' + str(world_ids))

    config = marathon.get_config(PACKAGE_NAME)
    cpus = float(config['env']['WORLD_CPUS'])
    updated_cpus = cpus + 0.1
    config['env']['WORLD_CPUS'] = str(updated_cpus)
    marathon.update_app(PACKAGE_NAME, config)

    tasks.check_tasks_updated(PACKAGE_NAME, 'world', world_ids)
    check_running()

    all_tasks = shakedown.get_service_tasks(PACKAGE_NAME)
    running_tasks = [t for t in all_tasks if t['name'].startswith('world') and t['state'] == "TASK_RUNNING"]
    assert len(running_tasks) == world_task_count()
    for t in running_tasks:
        assert close_enough(t['resources']['cpus'], updated_cpus)
Example #14
0
def test_bump_world_cpus():
    check_running()
    world_ids = tasks.get_task_ids(PACKAGE_NAME, 'world')
    print('world ids: ' + str(world_ids))

    config = marathon.get_config(PACKAGE_NAME)
    cpus = float(config['env']['WORLD_CPUS'])
    updated_cpus = cpus + 0.1
    config['env']['WORLD_CPUS'] = str(updated_cpus)
    marathon.update_app(PACKAGE_NAME, config)

    tasks.check_tasks_updated(PACKAGE_NAME, 'world', world_ids)
    check_running()

    all_tasks = shakedown.get_service_tasks(PACKAGE_NAME)
    running_tasks = [
        t for t in all_tasks
        if t['name'].startswith('world') and t['state'] == "TASK_RUNNING"
    ]
    assert len(running_tasks) == world_task_count()
    for t in running_tasks:
        assert close_enough(t['resources']['cpus'], updated_cpus)
Example #15
0
def _submit_job_and_verify_users(user, use_ucr_for_spark_submit, extra_args=[]):
    app_name = "MockTaskRunner"

    submit_args = ["--conf spark.cores.max=1",
                   "--class {}".format(app_name)] + extra_args

    driver_task_id = utils.submit_job(service_name=SERVICE_NAME,
                                      app_url=utils.dcos_test_jar_url(),
                                      app_args="1 300",
                                      args=submit_args)
    try:
        sdk_tasks.check_running(app_name, 1, timeout_seconds=300)
        driver_task = shakedown.get_task(driver_task_id, completed=False)
        executor_tasks = shakedown.get_service_tasks(app_name)

        for task in [driver_task] + executor_tasks:
            log.info(f"Checking task '{task['id']}'")
            _check_task_user(task, user, use_ucr_for_spark_submit)

    finally:
        log.info(f"Cleaning up. Attempting to kill driver: {driver_task_id}")
        utils.kill_driver(driver_task_id, service_name=SERVICE_NAME)
Example #16
0
def get_metrics(service_name, task_name):
    """Return a list of metrics datapoints.

    Keyword arguments:
    service_name -- the name of the service to get metrics for
    task_name -- the name of the task whose agent to run metrics commands from
    """
    tasks = shakedown.get_service_tasks(service_name)
    for task in tasks:
        if task['name'] == task_name:
            task_to_check = task

    if task_to_check is None:
        raise Exception("Could not find task")

    agent_id = task_to_check['slave_id']
    executor_id = task_to_check['executor_id']

    # Fetch the list of containers for the agent
    containers_url = "{}/system/v1/agent/{}/metrics/v0/containers".format(
        shakedown.dcos_url(), agent_id)
    containers_response = cmd.request("GET", containers_url, retry=False)
    if containers_response.ok is None:
        log.info("Unable to fetch containers list")
        raise Exception(
            "Unable to fetch containers list: {}".format(containers_url))

    for container in json.loads(containers_response.text):
        app_url = "{}/system/v1/agent/{}/metrics/v0/containers/{}/app".format(
            shakedown.dcos_url(), agent_id, container)
        app_response = cmd.request("GET", app_url, retry=False)
        if app_response.ok is None:
            continue

        app_json = json.loads(app_response.text)
        if app_json['dimensions']['executor_id'] == executor_id:
            return app_json['datapoints']

    raise Exception("No metrics found")
def test_config_update_while_partitioned():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    host = sdk_hosts.system_host(config.SERVICE_NAME, "world-0-server")
    shakedown.partition_agent(host)

    service_config = sdk_marathon.get_config(config.SERVICE_NAME)
    updated_cpus = float(service_config['env']['WORLD_CPUS']) + 0.1
    service_config['env']['WORLD_CPUS'] = str(updated_cpus)
    sdk_marathon.update_app(config.SERVICE_NAME,
                            service_config,
                            wait_for_completed_deployment=False)

    shakedown.reconnect_agent(host)
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()
    all_tasks = shakedown.get_service_tasks(config.SERVICE_NAME)
    running_tasks = [
        t for t in all_tasks
        if t['name'].startswith('world') and t['state'] == "TASK_RUNNING"
    ]
    assert len(running_tasks) == config.world_task_count(config.SERVICE_NAME)
    for t in running_tasks:
        assert config.close_enough(t['resources']['cpus'], updated_cpus)
Example #18
0
def test_shuffle_job(submit_args=[],
                     use_ucr_for_spark_submit=True,
                     use_cli_for_spark_submit=True,
                     check_network_labels=False):

    if use_ucr_for_spark_submit:
        submit_args = submit_args + ["--conf spark.mesos.containerizer=mesos"]

    driver_task_id = _submit_shuffle_job(use_cli=use_cli_for_spark_submit,
                                         sleep=300,
                                         extra_args=submit_args)

    sdk_tasks.check_running(SHUFFLE_JOB_FW_NAME,
                            SHUFFLE_JOB_NUM_EXECUTORS,
                            timeout_seconds=600)
    driver_task = shakedown.get_task(driver_task_id, completed=False)
    _check_task_network(driver_task, is_ucr=use_ucr_for_spark_submit)

    if check_network_labels and use_ucr_for_spark_submit:
        _check_task_network_labels(driver_task)

    executor_tasks = shakedown.get_service_tasks(SHUFFLE_JOB_FW_NAME)
    for task in executor_tasks:
        _check_task_network(task, is_ucr=use_ucr_for_spark_submit)
        if check_network_labels and use_ucr_for_spark_submit:
            _check_task_network_labels(task)

    try:
        utils.wait_for_running_job_output(
            driver_task_id,
            "Groups count: {}".format(SHUFFLE_JOB_EXPECTED_GROUPS_COUNT))
    finally:
        log.info("Cleaning up. Attempting to kill driver: {}".format(
            driver_task_id))
        utils.kill_driver(driver_task_id,
                          service_name=CNI_DISPATCHER_SERVICE_NAME)
Example #19
0
def tasks_running_success_predicate(task_count):
    tasks = shakedown.get_service_tasks(PACKAGE_NAME)
    running_tasks = [t for t in tasks if t['state'] == TASK_RUNNING_STATE]
    print('Waiting for {} healthy tasks, got {}/{}'.format(
        task_count, len(running_tasks), len(tasks)))
    return len(running_tasks) == task_count
Example #20
0
def test_overlay_network():
    """Verify that the current deploy plan matches the expected plan from the spec."""

    deployment_plan = plan.wait_for_completed_deployment(PACKAGE_NAME)
    utils.out("deployment_plan: " + str(deployment_plan))

    # test that the deployment plan is correct
    assert (len(deployment_plan['phases']) == 5)
    assert (deployment_plan['phases'][0]['name'] == 'hello-overlay-vip-deploy')
    assert (deployment_plan['phases'][1]['name'] == 'hello-overlay-deploy')
    assert (deployment_plan['phases'][2]['name'] == 'hello-host-vip-deploy')
    assert (deployment_plan['phases'][3]['name'] == 'hello-host-deploy')
    assert (deployment_plan["phases"][4]["name"] == "getter-deploy")
    assert (len(deployment_plan['phases'][0]['steps']) == 1)
    assert (len(deployment_plan["phases"][1]["steps"]) == 1)
    assert (len(deployment_plan["phases"][2]["steps"]) == 1)
    assert (len(deployment_plan["phases"][3]["steps"]) == 1)
    assert (len(deployment_plan["phases"][4]["steps"]) == 4)

    # test that the tasks are all up, which tests the overlay DNS
    framework_tasks = [
        task
        for task in shakedown.get_service_tasks(PACKAGE_NAME, completed=False)
    ]
    framework_task_names = [t["name"] for t in framework_tasks]
    expected_tasks = [
        'getter-0-get-host', 'getter-0-get-overlay',
        'getter-0-get-overlay-vip', 'getter-0-get-host-vip',
        'hello-host-vip-0-server', 'hello-overlay-vip-0-server',
        'hello-host-0-server', 'hello-overlay-0-server'
    ]

    for expected_task in expected_tasks:
        assert (expected_task
                in framework_task_names), "Missing {expected}".format(
                    expected=expected_task)

    for task in framework_tasks:
        name = task["name"]
        if "getter" in name:  # don't check the "getter" tasks because they don't use ports
            continue
        resources = task["resources"]
        if "host" in name:
            assert "ports" in resources.keys(
            ), "Task {} should have port resources".format(name)
        if "overlay" in name:
            assert "ports" not in resources.keys(
            ), "Task {} should NOT have port resources".format(name)

    networks.check_task_network("hello-overlay-0-server")
    networks.check_task_network("hello-overlay-vip-0-server")
    networks.check_task_network("hello-host-0-server",
                                expected_network_name=None)
    networks.check_task_network("hello-host-vip-0-server",
                                expected_network_name=None)

    endpoints_result, _, rc = shakedown.run_dcos_command(
        "{pkg} endpoints".format(pkg=PACKAGE_NAME))
    endpoints_result = json.loads(endpoints_result)
    assert rc == 0, "Getting endpoints failed"
    assert len(endpoints_result
               ) == 2, "Wrong number of endpoints got {} should be 2".format(
                   len(endpoints_result))

    overlay_endpoints_result, _, rc = shakedown.run_dcos_command(
        "{pkg} endpoints overlay-vip".format(pkg=PACKAGE_NAME))
    assert rc == 0, "Getting overlay endpoints failed"
    overlay_endpoints_result = json.loads(overlay_endpoints_result)
    assert "address" in overlay_endpoints_result.keys(), "overlay endpoints missing 'address'"\
           "{}".format(overlay_endpoints_result)
    assert len(overlay_endpoints_result["address"]) == 1
    assert overlay_endpoints_result["address"][0].startswith("9")
    overlay_port = overlay_endpoints_result["address"][0].split(":")[-1]
    assert overlay_port == "4044"
    assert "dns" in overlay_endpoints_result.keys()
    assert len(overlay_endpoints_result["dns"]) == 1
    assert overlay_endpoints_result["dns"][0] == hosts.autoip_host(
        PACKAGE_NAME, "hello-overlay-vip-0-server", 4044)

    host_endpoints_result, _, rc = shakedown.run_dcos_command(
        "{pkg} endpoints host-vip".format(pkg=PACKAGE_NAME))
    assert rc == 0, "Getting host endpoints failed"
    host_endpoints_result = json.loads(host_endpoints_result)
    assert "address" in host_endpoints_result.keys(), "overlay endpoints missing 'address'"\
           "{}".format(host_endpoints_result)
    assert len(host_endpoints_result["address"]) == 1
    assert host_endpoints_result["address"][0].startswith("10")
    host_port = host_endpoints_result["address"][0].split(":")[-1]
    assert host_port == "4044"
    assert "dns" in host_endpoints_result.keys()
    assert len(host_endpoints_result["dns"]) == 1
    assert host_endpoints_result["dns"][0] == hosts.autoip_host(
        PACKAGE_NAME, "hello-host-vip-0-server", 4044)
Example #21
0
def get_task_ids(prefix):
    tasks = shakedown.get_service_tasks(PACKAGE_NAME)
    prefixed_tasks = [t for t in tasks if t['name'].startswith(prefix)]
    task_ids = [t['id'] for t in prefixed_tasks]
    return task_ids
Example #22
0
def get_task_ids():
    tasks = shakedown.get_service_tasks(PACKAGE_NAME)
    return [t['id'] for t in tasks]
Example #23
0
def test_overlay_network():
    """Verify that the current deploy plan matches the expected plan from the spec."""

    deployment_plan = sdk_plan.wait_for_completed_deployment(
        config.SERVICE_NAME)
    log.info("deployment_plan: " + str(deployment_plan))

    # test that the deployment plan is correct
    assert (len(deployment_plan['phases']) == 5)
    assert (deployment_plan['phases'][0]['name'] == 'hello-overlay-deploy')
    assert (deployment_plan['phases'][1]['name'] == 'hello-overlay-vip-deploy')
    assert (deployment_plan['phases'][2]['name'] == 'hello-host-vip-deploy')
    assert (deployment_plan['phases'][3]['name'] == 'hello-host-deploy')
    assert (deployment_plan["phases"][4]["name"] == "getter-deploy")
    assert (len(deployment_plan['phases'][0]['steps']) == 1)
    assert (len(deployment_plan["phases"][1]["steps"]) == 1)
    assert (len(deployment_plan["phases"][2]["steps"]) == 1)
    assert (len(deployment_plan["phases"][3]["steps"]) == 1)
    assert (len(deployment_plan["phases"][4]["steps"]) == 1)

    # Due to DNS resolution flakiness, some of the deployed tasks can fail. If so,
    # we wait for them to redeploy, but if they don't fail we still want to proceed.
    try:
        sdk_plan.wait_for_in_progress_recovery(config.SERVICE_NAME,
                                               timeout_seconds=60)
        sdk_plan.wait_for_completed_recovery(config.SERVICE_NAME,
                                             timeout_seconds=60)
    except retrying.RetryError:
        pass

    # test that the tasks are all up, which tests the overlay DNS
    framework_tasks = [
        task for task in shakedown.get_service_tasks(config.SERVICE_NAME,
                                                     completed=False)
    ]
    framework_task_names = [t["name"] for t in framework_tasks]

    for expected_task in EXPECTED_TASKS:
        assert (expected_task
                in framework_task_names), "Missing {expected}".format(
                    expected=expected_task)

    for task in framework_tasks:
        name = task["name"]
        if "getter" in name:  # don't check the "getter" tasks because they don't use ports
            continue
        resources = task["resources"]
        if "host" in name:
            assert "ports" in resources.keys(
            ), "Task {} should have port resources".format(name)
        if "overlay" in name:
            assert "ports" not in resources.keys(
            ), "Task {} should NOT have port resources".format(name)

    sdk_networks.check_task_network("hello-overlay-0-server")
    sdk_networks.check_task_network("hello-overlay-vip-0-server")
    sdk_networks.check_task_network("hello-host-0-server",
                                    expected_network_name=None)
    sdk_networks.check_task_network("hello-host-vip-0-server",
                                    expected_network_name=None)

    endpoints_result = sdk_cmd.svc_cli(config.PACKAGE_NAME,
                                       config.SERVICE_NAME,
                                       'endpoints',
                                       json=True)
    assert len(endpoints_result
               ) == 2, "Wrong number of endpoints got {} should be 2".format(
                   len(endpoints_result))

    overlay_endpoints_result = sdk_cmd.svc_cli(config.PACKAGE_NAME,
                                               config.SERVICE_NAME,
                                               'endpoints overlay-vip',
                                               json=True)
    assert "address" in overlay_endpoints_result.keys(), "overlay endpoints missing 'address'"\
           "{}".format(overlay_endpoints_result)
    assert len(overlay_endpoints_result["address"]) == 1
    assert overlay_endpoints_result["address"][0].startswith("9")
    overlay_port = overlay_endpoints_result["address"][0].split(":")[-1]
    assert overlay_port == "4044"
    assert "dns" in overlay_endpoints_result.keys()
    assert len(overlay_endpoints_result["dns"]) == 1
    assert overlay_endpoints_result["dns"][0] == sdk_hosts.autoip_host(
        config.SERVICE_NAME, "hello-overlay-vip-0-server", 4044)

    host_endpoints_result = sdk_cmd.svc_cli(config.PACKAGE_NAME,
                                            config.SERVICE_NAME,
                                            'endpoints host-vip',
                                            json=True)
    assert "address" in host_endpoints_result.keys(), "overlay endpoints missing 'address'"\
           "{}".format(host_endpoints_result)
    assert len(host_endpoints_result["address"]) == 1
    assert host_endpoints_result["address"][0].startswith("10")
    host_port = host_endpoints_result["address"][0].split(":")[-1]
    assert host_port == "4044"
    assert "dns" in host_endpoints_result.keys()
    assert len(host_endpoints_result["dns"]) == 1
    assert host_endpoints_result["dns"][0] == sdk_hosts.autoip_host(
        config.SERVICE_NAME, "hello-host-vip-0-server", 4044)
Example #24
0
 def fn():
     try:
         tasks = shakedown.get_service_tasks(PACKAGE_NAME)
         return [t for t in tasks if t['state'] == TASK_RUNNING_STATE and t['name'] == broker_name]
     except dcos.errors.DCOSHTTPException:
         return []
Example #25
0
def _is_hdfs_ready(expected_tasks=DEFAULT_HDFS_TASK_COUNT):
    running_tasks = [t for t in shakedown.get_service_tasks(HDFS_SERVICE_NAME) \
                     if t['state'] == 'TASK_RUNNING']
    return len(running_tasks) >= expected_tasks
 def fn():
     try:
         return shakedown.get_service_tasks(PACKAGE_NAME)
     except dcos.errors.DCOSHTTPException:
         return []
Example #27
0
def get_metrics(package_name, service_name, task_name):
    """Return a list of metrics datapoints.

    Keyword arguments:
    package_name -- the name of the package the service is using
    service_name -- the name of the service to get metrics for
    task_name -- the name of the task whose agent to run metrics commands from
    """
    tasks = shakedown.get_service_tasks(service_name)
    for task in tasks:
        if task['name'] == task_name:
            task_to_check = task

    if task_to_check is None:
        raise Exception("Could not find task")

    agent_id = task_to_check['slave_id']
    executor_id = task_to_check['executor_id']

    pod_name = '-'.join(task_name.split("-")[:2])
    pod_info = sdk_cmd.svc_cli(package_name,
                               service_name,
                               "pod info {}".format(pod_name),
                               json=True)
    task_info = None
    for task in pod_info:
        if task["info"]["name"] == task_name:
            task_info = task
            break

    if not task_info:
        return []

    task_container_id = task_info["status"]["containerStatus"]["containerId"][
        "value"]

    # Not related to functionality but consuming this
    # endpoint to verify downstream integrity
    containers_url = "{}/system/v1/agent/{}/metrics/v0/containers".format(
        shakedown.dcos_url(), agent_id)
    containers_response = sdk_cmd.request("GET", containers_url, retry=False)
    if containers_response.ok is None:
        log.info("Unable to fetch containers list")
        raise Exception(
            "Unable to fetch containers list: {}".format(containers_url))
    reported_container_ids = json.loads(containers_response.text)

    container_id_reported = False
    for container_id in reported_container_ids:
        if container_id == task_container_id:
            container_id_reported = True

    if not container_id_reported:
        raise ValueError(
            "The metrics /container endpoint returned {}, expecting {} to be returned as well"
            .format(reported_container_ids, task_container_id))

    app_url = "{}/system/v1/agent/{}/metrics/v0/containers/{}/app".format(
        shakedown.dcos_url(), agent_id, task_container_id)
    app_response = sdk_cmd.request("GET", app_url, retry=False)
    if app_response.ok is None:
        raise ValueError("Failed to get metrics from container")

    app_json = json.loads(app_response.text)
    if app_json['dimensions']['executor_id'] == executor_id:
        return app_json['datapoints']

    raise Exception("No metrics found")
 def fn():
     return shakedown.get_service_tasks(PACKAGE_NAME)
Example #29
0
def get_task_ids(service_name, task_prefix):
    tasks = shakedown.get_service_tasks(service_name)
    matching_tasks = [t for t in tasks if t['name'].startswith(task_prefix)]
    return [t['id'] for t in matching_tasks]
 def fn():
     try:
         tasks = shakedown.get_service_tasks(PACKAGE_NAME)
         return [t for t in tasks if t['state'] == TASK_RUNNING_STATE and t['name'] == broker_name]
     except dcos.errors.DCOSHTTPException:
         return []
Example #31
0
 def fn():
     try:
         return shakedown.get_service_tasks(PACKAGE_NAME)
     except dcos.errors.DCOSHTTPException:
         return []
def get_task_ids(service_name, task_prefix):
    tasks = shakedown.get_service_tasks(service_name)
    matching_tasks = [t for t in tasks if t['name'].startswith(task_prefix)]
    return [t['id'] for t in matching_tasks]
Example #33
0
def test_overlay_network():
    """Verify that the current deploy plan matches the expected plan from the spec."""

    deployment_plan = sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
    log.info("deployment_plan: " + str(deployment_plan))

    # test that the deployment plan is correct
    assert(len(deployment_plan['phases']) == 5)
    assert(deployment_plan['phases'][0]['name'] == 'hello-overlay-deploy')
    assert(deployment_plan['phases'][1]['name'] == 'hello-overlay-vip-deploy')
    assert(deployment_plan['phases'][2]['name'] == 'hello-host-vip-deploy')
    assert(deployment_plan['phases'][3]['name'] == 'hello-host-deploy')
    assert(deployment_plan["phases"][4]["name"] == "getter-deploy")
    assert(len(deployment_plan['phases'][0]['steps']) == 1)
    assert(len(deployment_plan["phases"][1]["steps"]) == 1)
    assert(len(deployment_plan["phases"][2]["steps"]) == 1)
    assert(len(deployment_plan["phases"][3]["steps"]) == 1)
    assert(len(deployment_plan["phases"][4]["steps"]) == 1)

    # Due to DNS resolution flakiness, some of the deployed tasks can fail. If so,
    # we wait for them to redeploy, but if they don't fail we still want to proceed.
    try:
        sdk_plan.wait_for_in_progress_recovery(config.SERVICE_NAME, timeout_seconds=60)
        sdk_plan.wait_for_completed_recovery(config.SERVICE_NAME, timeout_seconds=60)
    except TimeoutExpired:
        pass

    # test that the tasks are all up, which tests the overlay DNS
    framework_tasks = [task for task in shakedown.get_service_tasks(config.SERVICE_NAME, completed=False)]
    framework_task_names = [t["name"] for t in framework_tasks]

    for expected_task in EXPECTED_TASKS:
        assert(expected_task in framework_task_names), "Missing {expected}".format(expected=expected_task)

    for task in framework_tasks:
        name = task["name"]
        if "getter" in name:  # don't check the "getter" tasks because they don't use ports
            continue
        resources = task["resources"]
        if "host" in name:
            assert "ports" in resources.keys(), "Task {} should have port resources".format(name)
        if "overlay" in name:
            assert "ports" not in resources.keys(), "Task {} should NOT have port resources".format(name)

    sdk_networks.check_task_network("hello-overlay-0-server")
    sdk_networks.check_task_network("hello-overlay-vip-0-server")
    sdk_networks.check_task_network("hello-host-0-server", expected_network_name=None)
    sdk_networks.check_task_network("hello-host-vip-0-server", expected_network_name=None)

    endpoints_result = sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'endpoints', json=True)
    assert len(endpoints_result) == 2, "Wrong number of endpoints got {} should be 2".format(len(endpoints_result))

    overlay_endpoints_result = sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'endpoints overlay-vip', json=True)
    assert "address" in overlay_endpoints_result.keys(), "overlay endpoints missing 'address'"\
           "{}".format(overlay_endpoints_result)
    assert len(overlay_endpoints_result["address"]) == 1
    assert overlay_endpoints_result["address"][0].startswith("9")
    overlay_port = overlay_endpoints_result["address"][0].split(":")[-1]
    assert overlay_port == "4044"
    assert "dns" in overlay_endpoints_result.keys()
    assert len(overlay_endpoints_result["dns"]) == 1
    assert overlay_endpoints_result["dns"][0] == sdk_hosts.autoip_host(config.SERVICE_NAME, "hello-overlay-vip-0-server", 4044)

    host_endpoints_result = sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'endpoints host-vip', json=True)
    assert "address" in host_endpoints_result.keys(), "overlay endpoints missing 'address'"\
           "{}".format(host_endpoints_result)
    assert len(host_endpoints_result["address"]) == 1
    assert host_endpoints_result["address"][0].startswith("10")
    host_port = host_endpoints_result["address"][0].split(":")[-1]
    assert host_port == "4044"
    assert "dns" in host_endpoints_result.keys()
    assert len(host_endpoints_result["dns"]) == 1
    assert host_endpoints_result["dns"][0] == sdk_hosts.autoip_host(config.SERVICE_NAME, "hello-host-vip-0-server", 4044)
Example #34
0
def is_service_ready(service_name, expected_tasks):
    running_tasks = [t for t in shakedown.get_service_tasks(service_name) \
                     if t['state'] == 'TASK_RUNNING']
    LOGGER.info("Waiting for {n} tasks got {m} for service {s}".format(
        n=expected_tasks, m=len(running_tasks), s=service_name))
    return len(running_tasks) >= expected_tasks
Example #35
0
 def fn():
     return shakedown.get_service_tasks(PACKAGE_NAME)