Esempio n. 1
0
def destroy_app(app_name):
    sdk_cmd.request('delete', api_url_with_param('apps', app_name))
    # Make sure the scheduler has been destroyed

    def fn():
        return shakedown.get_service(app_name) is None
    sdk_spin.time_wait_noisy(lambda: fn())
Esempio n. 2
0
def test_deploy():
    wait_time = 30
    # taskcfg.yml will initially fail to deploy because several options are missing in the default
    # marathon.json.mustache. verify that tasks are failing for 30s before continuing.
    print('Checking that tasks are failing to launch for at least {}s'.format(wait_time))

    # we can get brief blips of TASK_RUNNING but they shouldnt last more than 2-3s:
    consecutive_task_running = 0
    def fn():
        nonlocal consecutive_task_running
        svc_tasks = shakedown.get_service_tasks(PACKAGE_NAME)
        states = [t['state'] for t in svc_tasks]
        print('Task states: {}'.format(states))
        if 'TASK_RUNNING' in states:
            consecutive_task_running += 1
            assert consecutive_task_running <= 3
        else:
            consecutive_task_running = 0
        return False

    try:
        spin.time_wait_noisy(lambda: fn(), timeout_seconds=wait_time)
    except shakedown.TimeoutExpired:
        print('Timeout reached as expected')

    # add the needed envvars in marathon and confirm that the deployment succeeds:
    config = marathon.get_config(PACKAGE_NAME)
    env = config['env']
    del env['SLEEP_DURATION']
    env['TASKCFG_ALL_OUTPUT_FILENAME'] = 'output'
    env['TASKCFG_ALL_SLEEP_DURATION'] = '1000'
    cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config)

    check_running()
Esempio n. 3
0
def test_bump_world_cpus():
    check_running()
    world_ids = tasks.get_task_ids(PACKAGE_NAME, 'world')
    print('world ids: ' + str(world_ids))

    config = marathon.get_config(PACKAGE_NAME)
    cpus = float(config['env']['WORLD_CPUS'])
    config['env']['WORLD_CPUS'] = str(cpus + 0.1)
    cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config)

    tasks.check_tasks_updated(PACKAGE_NAME, 'world', world_ids)
    check_running()
Esempio n. 4
0
def test_bump_hello_cpus():
    check_running()
    hello_ids = tasks.get_task_ids(PACKAGE_NAME, 'hello')
    print('hello ids: ' + str(hello_ids))

    config = marathon.get_config(PACKAGE_NAME)
    cpus = float(config['env']['HELLO_CPUS'])
    config['env']['HELLO_CPUS'] = str(cpus + 0.1)
    cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config)

    tasks.check_tasks_updated(PACKAGE_NAME, 'hello', hello_ids)
    check_running()
Esempio n. 5
0
def test_bump_hello_cpus():
    check_running()
    hello_ids = tasks.get_task_ids(PACKAGE_NAME, 'hello')
    print('hello ids: ' + str(hello_ids))

    config = marathon.get_config(PACKAGE_NAME)
    cpus = float(config['env']['HELLO_CPUS'])
    config['env']['HELLO_CPUS'] = str(cpus + 0.1)
    cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config)

    tasks.check_tasks_updated(PACKAGE_NAME, 'hello', hello_ids)
    check_running()
Esempio n. 6
0
def test_bump_data_nodes():
    check_healthy()

    data_ids = tasks.get_task_ids(PACKAGE_NAME, 'data')
    print('data ids: ' + str(data_ids))

    config = marathon.get_config(PACKAGE_NAME)
    node_count = int(config['env']['DATA_COUNT']) + 1
    config['env']['DATA_COUNT'] = str(node_count)
    cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config)

    check_healthy(DEFAULT_TASK_COUNT + 1)
    tasks.check_tasks_not_updated(PACKAGE_NAME, 'data', data_ids)
Esempio n. 7
0
def test_bump_hello_nodes():
    check_running()

    hello_ids = tasks.get_task_ids(PACKAGE_NAME, 'hello')
    print('hello ids: ' + str(hello_ids))

    config = marathon.get_config(PACKAGE_NAME)
    node_count = int(config['env']['HELLO_COUNT']) + 1
    config['env']['HELLO_COUNT'] = str(node_count)
    cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config)

    check_running()
    tasks.check_tasks_not_updated(PACKAGE_NAME, 'hello', hello_ids)
Esempio n. 8
0
def test_bump_hello_nodes():
    check_running()

    hello_ids = tasks.get_task_ids(PACKAGE_NAME, 'hello')
    print('hello ids: ' + str(hello_ids))

    config = marathon.get_config(PACKAGE_NAME)
    node_count = int(config['env']['HELLO_COUNT']) + 1
    config['env']['HELLO_COUNT'] = str(node_count)
    cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config)

    check_running()
    tasks.check_tasks_not_updated(PACKAGE_NAME, 'hello', hello_ids)
Esempio n. 9
0
def test_bump_journal_cpus():
    check_healthy()
    journal_ids = tasks.get_task_ids(PACKAGE_NAME, 'journal')
    print('journal ids: ' + str(journal_ids))

    config = marathon.get_config(PACKAGE_NAME)
    print('marathon config: ')
    print(config)
    cpus = float(config['env']['JOURNAL_CPUS'])
    config['env']['JOURNAL_CPUS'] = str(cpus + 0.1)
    cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config)

    tasks.check_tasks_updated(PACKAGE_NAME, 'journal', journal_ids)
    check_healthy()
Esempio n. 10
0
def restart_app(app_name):
    log.info("Restarting {}...".format(app_name))
    response = sdk_cmd.request('post',
                               api_url('apps/{}/restart'.format(app_name)))
    log.info(response)
    assert response.ok
    log.info("Restarted {}.".format(app_name))
Esempio n. 11
0
def test_tls_basic_artifacts(hello_world_service):
    task_id = sdk_tasks.get_task_ids(PACKAGE_NAME, 'artifacts')[0]
    assert task_id

    # Load end-entity certificate from keystore and root CA cert from truststore
    end_entity_cert = x509.load_pem_x509_certificate(
        task_exec(task_id, 'cat secure-tls-pod.crt').encode('ascii'),
        DEFAULT_BACKEND)

    root_ca_cert_in_truststore = _export_cert_from_task_keystore(
        task_id, 'keystore.truststore', 'dcos-root')

    # Check that certificate subject maches the service name
    common_name = end_entity_cert.subject.get_attributes_for_oid(
        NameOID.COMMON_NAME)[0].value
    assert common_name in sdk_hosts.autoip_host(PACKAGE_NAME, 'artifacts-0-node')

    san_extension = end_entity_cert.extensions.get_extension_for_oid(
        ExtensionOID.SUBJECT_ALTERNATIVE_NAME)
    sans = san_extension.value._general_names._general_names
    assert len(sans) == 1

    cluster_root_ca_cert = x509.load_pem_x509_certificate(
        sdk_cmd.request(
            'get', shakedown.dcos_url_path('/ca/dcos-ca.crt')).content,
        DEFAULT_BACKEND)

    assert root_ca_cert_in_truststore.signature == cluster_root_ca_cert.signature
Esempio n. 12
0
def test_tls_basic_artifacts(hello_world_service):
    task_id = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'artifacts')[0]
    assert task_id

    # Load end-entity certificate from keystore and root CA cert from truststore
    end_entity_cert = x509.load_pem_x509_certificate(
        task_exec(task_id, 'cat secure-tls-pod.crt').encode('ascii'),
        DEFAULT_BACKEND)

    root_ca_cert_in_truststore = _export_cert_from_task_keystore(
        task_id, 'keystore.truststore', 'dcos-root')

    # Check that certificate subject maches the service name
    common_name = end_entity_cert.subject.get_attributes_for_oid(
        NameOID.COMMON_NAME)[0].value
    assert common_name in sdk_hosts.autoip_host(config.SERVICE_NAME, 'artifacts-0-node')

    san_extension = end_entity_cert.extensions.get_extension_for_oid(
        ExtensionOID.SUBJECT_ALTERNATIVE_NAME)
    sans = san_extension.value._general_names._general_names
    assert len(sans) == 1

    cluster_root_ca_cert = x509.load_pem_x509_certificate(
        sdk_cmd.request(
            'get', shakedown.dcos_url_path('/ca/dcos-ca.crt')).content,
        DEFAULT_BACKEND)

    assert root_ca_cert_in_truststore.signature == cluster_root_ca_cert.signature
Esempio n. 13
0
def test_modify_app_config():
    check_healthy()
    app_config_field = 'TASKCFG_ALL_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_EXPIRY_MS'

    journal_ids = tasks.get_task_ids(PACKAGE_NAME, 'journal')
    name_ids = tasks.get_task_ids(PACKAGE_NAME, 'name')
    zkfc_ids = tasks.get_task_ids(PACKAGE_NAME, 'zkfc')
    data_ids = tasks.get_task_ids(PACKAGE_NAME, 'data')
    print('journal ids: ' + str(journal_ids))
    print('name ids: ' + str(name_ids))
    print('zkfc ids: ' + str(zkfc_ids))
    print('data ids: ' + str(data_ids))

    config = marathon.get_config(PACKAGE_NAME)
    print('marathon config: ')
    print(config)
    expiry_ms = int(config['env'][app_config_field])
    config['env'][app_config_field] = str(expiry_ms + 1)
    r = cmd.request('put',
                    marathon.api_url('apps/' + PACKAGE_NAME),
                    json=config)

    # All tasks should be updated because hdfs-site.xml has changed
    tasks.check_tasks_updated(PACKAGE_NAME, 'journal', journal_ids)
    tasks.check_tasks_updated(PACKAGE_NAME, 'name', name_ids)
    tasks.check_tasks_updated(PACKAGE_NAME, 'zkfc', zkfc_ids)
    tasks.check_tasks_updated(PACKAGE_NAME, 'data', journal_ids)

    check_healthy()
Esempio n. 14
0
def test_modify_app_config_rollback():
    check_healthy()
    app_config_field = 'TASKCFG_ALL_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_EXPIRY_MS'

    journal_ids = tasks.get_task_ids(PACKAGE_NAME, 'journal')
    name_ids = tasks.get_task_ids(PACKAGE_NAME, 'name')
    zkfc_ids = tasks.get_task_ids(PACKAGE_NAME, 'zkfc')
    data_ids = tasks.get_task_ids(PACKAGE_NAME, 'data')
    print('journal ids: ' + str(journal_ids))
    print('name ids: ' + str(name_ids))
    print('zkfc ids: ' + str(zkfc_ids))
    print('data ids: ' + str(data_ids))

    old_config = marathon.get_config(PACKAGE_NAME)
    config = marathon.get_config(PACKAGE_NAME)
    print('marathon config: ')
    print(config)
    expiry_ms = int(config['env'][app_config_field])
    print('expiry ms: ' + str(expiry_ms))
    config['env'][app_config_field] = str(expiry_ms + 1)
    r = cmd.request('put',
                    marathon.api_url('apps/' + PACKAGE_NAME),
                    json=config)

    # Wait for journal nodes to be affected by the change
    tasks.check_tasks_updated(PACKAGE_NAME, 'journal', journal_ids)
    journal_ids = tasks.get_task_ids(PACKAGE_NAME, 'journal')

    print('old config: ')
    print(old_config)
    # Put the old config back (rollback)
    r = cmd.request('put',
                    marathon.api_url('apps/' + PACKAGE_NAME),
                    json=old_config)

    # Wait for the journal nodes to return to their old configuration
    tasks.check_tasks_updated(PACKAGE_NAME, 'journal', journal_ids)
    check_healthy()

    config = marathon.get_config(PACKAGE_NAME)
    assert int(config['env'][app_config_field]) == expiry_ms

    # ZKFC and Data tasks should not have been affected
    tasks.check_tasks_not_updated(PACKAGE_NAME, 'zkfc', zkfc_ids)
    tasks.check_tasks_not_updated(PACKAGE_NAME, 'data', data_ids)
Esempio n. 15
0
def test_state_refresh_disable_cache():
    '''Disables caching via a scheduler envvar'''
    check_running()
    task_ids = tasks.get_task_ids(PACKAGE_NAME, '')

    # caching enabled by default:
    stdout = cmd.run_cli('hello-world state refresh_cache')
    assert "Received cmd: refresh" in stdout

    config = marathon.get_config(PACKAGE_NAME)
    cpus = float(config['env']['HELLO_CPUS'])
    config['env']['DISABLE_STATE_CACHE'] = 'any-text-here'
    cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config)

    tasks.check_tasks_not_updated(PACKAGE_NAME, '', task_ids)
    check_running()

    # caching disabled, refresh_cache should fail with a 409 error (eventually, once scheduler is up):
    def check_cache_refresh_fails_409conflict():
        try:
            cmd.run_cli('hello-world state refresh_cache')
        except Exception as e:
            if "failed: 409 Conflict" in e.args[0]:
                return True
        return False

    spin.time_wait_noisy(lambda: check_cache_refresh_fails_409conflict(),
                         timeout_seconds=120.)

    config = marathon.get_config(PACKAGE_NAME)
    cpus = float(config['env']['HELLO_CPUS'])
    del config['env']['DISABLE_STATE_CACHE']
    cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config)

    tasks.check_tasks_not_updated(PACKAGE_NAME, '', task_ids)
    check_running()

    # caching reenabled, refresh_cache should succeed (eventually, once scheduler is up):
    def check_cache_refresh():
        return cmd.run_cli('hello-world state refresh_cache')

    stdout = spin.time_wait_return(lambda: check_cache_refresh(),
                                   timeout_seconds=120.)
    assert "Received cmd: refresh" in stdout
Esempio n. 16
0
def get_metrics(package_name, service_name, task_name):
    """Return a list of metrics datapoints.

    Keyword arguments:
    service_name -- the name of the service to get metrics for
    task_name -- the name of the task whose agent to run metrics commands from
    """
    tasks = shakedown.get_service_tasks(service_name)
    for task in tasks:
        if task['name'] == task_name:
            task_to_check = task

    if task_to_check is None:
        raise Exception("Could not find task")

    agent_id = task_to_check['slave_id']
    executor_id = task_to_check['executor_id']

    # TODO: uncomment the following block of comments when the /containers endpoint reports the correct container IDs
    # and remove the code following the comments that gets the correct container ID via 'pod info'
    ## Fetch the list of containers for the agent
    #containers_url = "{}/system/v1/agent/{}/metrics/v0/containers".format(shakedown.dcos_url(), agent_id)
    #containers_response = sdk_cmd.request("GET", containers_url, retry=False)
    #if containers_response.ok is None:
    #    log.info("Unable to fetch containers list")
    #    raise Exception("Unable to fetch containers list: {}".format(containers_url))

    # instead of receiving the pod name in this function's parameter list, extract
    # the name of the pod from the task name to not break the code when the
    # above comment-block is uncommented
    pod_name = '-'.join(task_name.split("-")[:2])
    pod_info = sdk_cmd.svc_cli(package_name, service_name, "pod info {}".format(pod_name), json=True)
    task_info = None
    for task in pod_info:
        if task["info"]["name"] == task_name:
            task_info = task
            break

    if not task_info:
        return []

    container_id = task_info["status"]["containerStatus"]["containerId"]["value"]

    #for container_id in json.loads(containers_response.text):
    app_url = "{}/system/v1/agent/{}/metrics/v0/containers/{}/app".format(
        shakedown.dcos_url(), agent_id, container_id)
    app_response = sdk_cmd.request("GET", app_url, retry=False)
    if app_response.ok is None:
        raise("Failed to get metrics from container")
        #continue

    app_json = json.loads(app_response.text)
    if app_json['dimensions']['executor_id'] == executor_id:
        return app_json['datapoints']

    raise Exception("No metrics found")
Esempio n. 17
0
def update_app(app_name, config, timeout=600):
    if "env" in config:
        log.info("Environment for marathon app {} ({} values):".format(app_name, len(config["env"])))
        for k in sorted(config["env"]):
            log.info("  {}={}".format(k, config["env"][k]))
    response = sdk_cmd.request('put', api_url('apps/{}'.format(app_name)), log_args=False, json=config)

    assert response.ok, "Marathon configuration update failed for {} with config {}".format(app_name, config)

    log.info("Waiting for Marathon deployment of {} to complete...".format(app_name))
    shakedown.deployment_wait(app_id=app_name, timeout=timeout)
Esempio n. 18
0
def update_app(app_name, config, timeout=TIMEOUT_SECONDS, wait_for_completed_deployment=True):
    if "env" in config:
        log.info("Environment for marathon app {} ({} values):".format(app_name, len(config["env"])))
        for k in sorted(config["env"]):
            log.info("  {}={}".format(k, config["env"][k]))
    response = sdk_cmd.request('put', api_url('apps/{}'.format(app_name)), log_args=False, json=config)

    assert response.ok, "Marathon configuration update failed for {} with config {}".format(app_name, config)

    if wait_for_completed_deployment:
        log.info("Waiting for Marathon deployment of {} to complete...".format(app_name))
        shakedown.deployment_wait(app_id=app_name, timeout=timeout)
Esempio n. 19
0
def test_state_refresh_disable_cache():
    '''Disables caching via a scheduler envvar'''
    check_running()
    task_ids = tasks.get_task_ids(PACKAGE_NAME, '')

    # caching enabled by default:
    stdout = cmd.run_cli('hello-world state refresh_cache')
    assert "Received cmd: refresh" in stdout

    config = marathon.get_config(PACKAGE_NAME)
    cpus = float(config['env']['HELLO_CPUS'])
    config['env']['DISABLE_STATE_CACHE'] = 'any-text-here'
    cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config)

    tasks.check_tasks_not_updated(PACKAGE_NAME, '', task_ids)
    check_running()

    # caching disabled, refresh_cache should fail with a 409 error (eventually, once scheduler is up):
    def check_cache_refresh_fails_409conflict():
        try:
            cmd.run_cli('hello-world state refresh_cache')
        except Exception as e:
            if "failed: 409 Conflict" in e.args[0]:
                return True
        return False
    spin.time_wait_noisy(lambda: check_cache_refresh_fails_409conflict(), timeout_seconds=120.)

    config = marathon.get_config(PACKAGE_NAME)
    cpus = float(config['env']['HELLO_CPUS'])
    del config['env']['DISABLE_STATE_CACHE']
    cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config)

    tasks.check_tasks_not_updated(PACKAGE_NAME, '', task_ids)
    check_running()

    # caching reenabled, refresh_cache should succeed (eventually, once scheduler is up):
    def check_cache_refresh():
        return cmd.run_cli('hello-world state refresh_cache')
    stdout = spin.time_wait_return(lambda: check_cache_refresh(), timeout_seconds=120.)
    assert "Received cmd: refresh" in stdout
Esempio n. 20
0
def get_metrics(service_name, task_name):
    """Return a list of metrics datapoints.

    Keyword arguments:
    service_name -- the name of the service to get metrics for
    task_name -- the name of the task whose agent to run metrics commands from
    """
    tasks = shakedown.get_service_tasks(service_name)
    for task in tasks:
        if task['name'] == task_name:
            task_to_check = task

    if task_to_check is None:
        raise Exception("Could not find task")

    agent_id = task_to_check['slave_id']
    executor_id = task_to_check['executor_id']

    # Fetch the list of containers for the agent
    containers_url = "{}/system/v1/agent/{}/metrics/v0/containers".format(
        shakedown.dcos_url(), agent_id)
    containers_response = cmd.request("GET", containers_url, retry=False)
    if containers_response.ok is None:
        log.info("Unable to fetch containers list")
        raise Exception(
            "Unable to fetch containers list: {}".format(containers_url))

    for container in json.loads(containers_response.text):
        app_url = "{}/system/v1/agent/{}/metrics/v0/containers/{}/app".format(
            shakedown.dcos_url(), agent_id, container)
        app_response = cmd.request("GET", app_url, retry=False)
        if app_response.ok is None:
            continue

        app_json = json.loads(app_response.text)
        if app_json['dimensions']['executor_id'] == executor_id:
            return app_json['datapoints']

    raise Exception("No metrics found")
Esempio n. 21
0
def test_deploy():
    wait_time = 30
    # taskcfg.yml will initially fail to deploy because several options are missing in the default
    # marathon.json.mustache. verify that tasks are failing for 30s before continuing.
    print('Checking that tasks are failing to launch for at least {}s'.format(
        wait_time))

    # we can get brief blips of TASK_RUNNING but they shouldnt last more than 2-3s:
    consecutive_task_running = 0

    def fn():
        nonlocal consecutive_task_running
        svc_tasks = shakedown.get_service_tasks(PACKAGE_NAME)
        states = [t['state'] for t in svc_tasks]
        print('Task states: {}'.format(states))
        if 'TASK_RUNNING' in states:
            consecutive_task_running += 1
            assert consecutive_task_running <= 3
        else:
            consecutive_task_running = 0
        return False

    try:
        spin.time_wait_noisy(lambda: fn(), timeout_seconds=wait_time)
    except shakedown.TimeoutExpired:
        print('Timeout reached as expected')

    # add the needed envvars in marathon and confirm that the deployment succeeds:
    config = marathon.get_config(PACKAGE_NAME)
    env = config['env']
    del env['SLEEP_DURATION']
    env['TASKCFG_ALL_OUTPUT_FILENAME'] = 'output'
    env['TASKCFG_ALL_SLEEP_DURATION'] = '1000'
    cmd.request('put', marathon.api_url('apps/' + PACKAGE_NAME), json=config)

    check_running()
Esempio n. 22
0
def _get_master_public_ip() -> str:
    """
    :return (str): The public IP of the master node in the DC/OS cluster.
    """
    dcos_url, headers = sdk_security.get_dcos_credentials()
    cluster_metadata_url = "{cluster_url}/metadata".format(cluster_url=dcos_url)
    response = sdk_cmd.request("GET", cluster_metadata_url, verify=False)
    if not response.ok:
        raise RuntimeError("Unable to get the master node's public IP address: {err}".format(err=repr(response)))

    response = response.json()
    if "PUBLIC_IPV4" not in response:
        raise KeyError("Cluster metadata does not include master's public ip: {response}".format(
            response=repr(response)))

    public_ip = response["PUBLIC_IPV4"]
    log.info("Master public ip is {public_ip}".format(public_ip=public_ip))
    return public_ip
Esempio n. 23
0
def test_httpd():
    cmd.request('get', '{}/pyhttpsd'.format(shakedown.dcos_service_url('proxylite')))
Esempio n. 24
0
def _get_config_once(app_name):
    return sdk_cmd.request('get', api_url('apps/{}'.format(app_name)), retry=False, log_args=False)
Esempio n. 25
0
def test_google():
    cmd.request('get',
                '{}/google'.format(shakedown.dcos_service_url('proxylite')))
Esempio n. 26
0
 def fn():
     return sdk_cmd.request('get',
                            api_url('apps/{}'.format(app_name)),
                            retry=False)
Esempio n. 27
0
def update_app(app_name, config):
    response = sdk_cmd.request('put',
                               api_url('apps/{}'.format(app_name)),
                               json=config)
    assert response.ok, "Marathon configuration update failed for {} with config {}".format(
        app_name, config)
Esempio n. 28
0
def test_httpd():
    cmd.request('get',
                '{}/pyhttpsd'.format(shakedown.dcos_service_url('proxylite')))
Esempio n. 29
0
def get_metrics(package_name, service_name, task_name):
    """Return a list of metrics datapoints.

    Keyword arguments:
    package_name -- the name of the package the service is using
    service_name -- the name of the service to get metrics for
    task_name -- the name of the task whose agent to run metrics commands from
    """
    tasks = shakedown.get_service_tasks(service_name)
    for task in tasks:
        if task['name'] == task_name:
            task_to_check = task

    if task_to_check is None:
        raise Exception("Could not find task")

    agent_id = task_to_check['slave_id']
    executor_id = task_to_check['executor_id']

    pod_name = '-'.join(task_name.split("-")[:2])
    pod_info = sdk_cmd.svc_cli(package_name,
                               service_name,
                               "pod info {}".format(pod_name),
                               json=True)
    task_info = None
    for task in pod_info:
        if task["info"]["name"] == task_name:
            task_info = task
            break

    if not task_info:
        return []

    task_container_id = task_info["status"]["containerStatus"]["containerId"][
        "value"]

    # Not related to functionality but consuming this
    # endpoint to verify downstream integrity
    containers_url = "{}/system/v1/agent/{}/metrics/v0/containers".format(
        shakedown.dcos_url(), agent_id)
    containers_response = sdk_cmd.request("GET", containers_url, retry=False)
    if containers_response.ok is None:
        log.info("Unable to fetch containers list")
        raise Exception(
            "Unable to fetch containers list: {}".format(containers_url))
    reported_container_ids = json.loads(containers_response.text)

    container_id_reported = False
    for container_id in reported_container_ids:
        if container_id == task_container_id:
            container_id_reported = True

    if not container_id_reported:
        raise ValueError(
            "The metrics /container endpoint returned {}, expecting {} to be returned as well"
            .format(reported_container_ids, task_container_id))

    app_url = "{}/system/v1/agent/{}/metrics/v0/containers/{}/app".format(
        shakedown.dcos_url(), agent_id, task_container_id)
    app_response = sdk_cmd.request("GET", app_url, retry=False)
    if app_response.ok is None:
        raise ValueError("Failed to get metrics from container")

    app_json = json.loads(app_response.text)
    if app_json['dimensions']['executor_id'] == executor_id:
        return app_json['datapoints']

    raise Exception("No metrics found")
Esempio n. 30
0
def test_google():
    cmd.request('get', '{}/google'.format(shakedown.dcos_service_url('proxylite')))
Esempio n. 31
0
def update_app(app_name, config):
    response = sdk_cmd.request('put', api_url('apps/{}'.format(app_name)), json=config)
    assert response.ok, "Marathon configuration update failed for {} with config {}".format(app_name, config)
Esempio n. 32
0
def restart_app(app_name):
    log.info("Restarting {}...".format(app_name))
    response = sdk_cmd.request('post', api_url('apps/{}/restart'.format(app_name)))
    log.info(response)
    assert response.ok
    log.info("Restarted {}.".format(app_name))
Esempio n. 33
0
 def fn():
     return sdk_cmd.request('get', api_url('apps/{}'.format(app_name)), retry=False, log_args=False)