Beispiel #1
0
def test_create_pod_with_private_image():
    """Deploys a pod with a private Docker image, using Mesos containerizer."""

    if not common.is_enterprise_cli_package_installed():
        common.install_enterprise_cli_package()

    username = os.environ['DOCKER_HUB_USERNAME']
    password = os.environ['DOCKER_HUB_PASSWORD']

    secret_name = "pullconfig"
    secret_value_json = common.create_docker_pull_config_json(
        username, password)
    secret_value = json.dumps(secret_value_json)

    pod_def = pods.private_docker_pod()
    pod_id = pod_def['id']
    common.create_secret(secret_name, secret_value)
    client = marathon.create_client()

    try:
        client.add_pod(pod_def)
        common.deployment_wait(timeout=timedelta(minutes=5).total_seconds(),
                               service_id=pod_id)
        pod = client.show_pod(pod_id)
        assert pod is not None, "The pod has not been created"
    finally:
        common.delete_secret(secret_name)
Beispiel #2
0
def test_pod_health_failed_check():
    """Deploys a pod with correct health checks, then partitions the network and verifies that
       the tasks get restarted with new task IDs.
    """

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    host = common.ip_other_than_mom()
    common.pin_pod_to_host(pod_def, host)

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    pod = client.list_pod()[0]
    container1 = pod['instances'][0]['containers'][0]
    port = container1['endpoints'][0]['allocatedHostPort']

    common.save_iptables(host)
    common.block_port(host, port)
    time.sleep(7)
    common.restore_iptables(host)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    for task in tasks:
        assert task[
            'id'] != initial_id1, "One of the tasks has not been restarted"
        assert task[
            'id'] != initial_id2, "One of the tasks has not been restarted"
Beispiel #3
0
def test_pod_with_persistent_volume():
    pod_def = pods.persistent_volume_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)

    host = tasks[0]['statuses'][0]['container_status']['network_infos'][0][
        'ip_addresses'][0]['ip_address']
    port1 = tasks[0]['discovery']['ports']['ports'][0]["number"]
    port2 = tasks[1]['discovery']['ports']['ports'][0]["number"]
    dir1 = tasks[0]['container']['volumes'][0]['container_path']
    dir2 = tasks[1]['container']['volumes'][0]['container_path']
    print(host, port1, port2, dir1, dir2)

    time.sleep(1)

    cmd = "curl {}:{}/{}/foo".format(host, port1, dir1)
    run, data = shakedown.run_command_on_master(cmd)
    assert run, "{} did not succeed".format(cmd)
    assert data == 'hello\n', "'{}' was not equal to hello\\n".format(data)

    cmd = "curl {}:{}/{}/foo".format(host, port2, dir2)
    run, data = shakedown.run_command_on_master(cmd)
    assert run, "{} did not succeed".format(cmd)
    assert data == 'hello\n', "'{}' was not equal to hello\\n".format(data)
def test_pod_with_persistent_volume():
    pod_def = pods.persistent_volume_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)

    host = common.running_status_network_info(tasks[0]['statuses'])['ip_addresses'][0]['ip_address']
    port1 = tasks[0]['discovery']['ports']['ports'][0]["number"]
    port2 = tasks[1]['discovery']['ports']['ports'][0]["number"]
    path1 = tasks[0]['container']['volumes'][0]['container_path']
    path2 = tasks[1]['container']['volumes'][0]['container_path']
    logger.info('Deployd two containers on {}:{}/{} and {}:{}/{}'.format(host, port1, path1, host, port2, path2))

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=60, retry_on_exception=common.ignore_exception)
    def check_http_endpoint(port, path):
        cmd = "curl {}:{}/{}/foo".format(host, port, path)
        run, data = shakedown.run_command_on_master(cmd)
        assert run, "{} did not succeed".format(cmd)
        assert data == 'hello\n', "'{}' was not equal to hello\\n".format(data)

    check_http_endpoint(port1, path1)
    check_http_endpoint(port2, path2)
Beispiel #5
0
def test_pod_with_container_bridge_network():
    """Tests creation of a pod with a "container/bridge" network, and its HTTP endpoint accessibility."""

    pod_def = pods.container_bridge_pod()
    pod_id = pod_def['id']

    # In strict mode all tasks are started as user `nobody` by default and `nobody`
    # doesn't have permissions to write to /var/log within the container.
    if shakedown.ee_version() == 'strict':
        pod_def['user'] = '******'
        common.add_dcos_marathon_user_acls()

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    task = common.task_by_name(common.get_pod_tasks(pod_id), "nginx")
    network_info = common.running_status_network_info(task['statuses'])
    assert network_info['name'] == "mesos-bridge", \
        "The network is {}, but mesos-bridge was expected".format(network_info['name'])

    # get the port on the host
    port = task['discovery']['ports']['ports'][0]['number']

    # the agent IP:port will be routed to the bridge IP:port
    # test against the agent_ip, however it is hard to get.. translating from
    # slave_id
    agent_ip = common.agent_hostname_by_id(task['slave_id'])
    assert agent_ip is not None, "Failed to get the agent IP address"
    container_ip = network_info['ip_addresses'][0]['ip_address']
    assert agent_ip != container_ip, "The container IP address is the same as the agent one"

    url = "http://{}:{}/".format(agent_ip, port)
    common.assert_http_code(url)
def test_pod_health_failed_check():
    """Deploys a pod with correct health checks, then partitions the network and verifies that
       the tasks get restarted with new task IDs.
    """

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    host = common.ip_other_than_mom()
    common.pin_pod_to_host(pod_def, host)

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    pod = client.list_pod()[0]
    container1 = pod['instances'][0]['containers'][0]
    port = container1['endpoints'][0]['allocatedHostPort']

    common.block_iptable_rules_for_seconds(host, port, 7, block_input=True, block_output=False)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    for new_task in tasks:
        new_task_id = new_task['id']
        assert new_task_id != initial_id1, f"Task {new_task_id} has not been restarted" # NOQA E999
        assert new_task_id != initial_id2, f"Task {new_task_id} has not been restarted"
def test_mom_when_mom_agent_bounced():
    """Launch an app from MoM and restart the node MoM is on."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    mom_ip = common.ip_of_mom()
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        common.deployment_wait(service_id=app_id)
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        shakedown.restart_agent(mom_ip)

        @retrying.retry(wait_fixed=1000,
                        stop_max_attempt_number=30,
                        retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0][
                'id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
def test_mom_when_mom_process_killed():
    """Launched a task from MoM then killed MoM."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        common.deployment_wait(service_id=app_id)
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        common.kill_process_on_host(common.ip_of_mom(), 'marathon-assembly')
        shakedown.wait_for_task('marathon', 'marathon-user', 300)
        common.wait_for_service_endpoint('marathon-user', path="ping")

        @retrying.retry(wait_fixed=1000,
                        stop_max_attempt_number=30,
                        retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0][
                'id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
Beispiel #9
0
def test_pod_secret_env_var(secret_fixture):

    secret_name, secret_value = secret_fixture

    pod_id = '/{}'.format(uuid.uuid4().hex)
    pod_def = {
        "id":
        pod_id,
        "containers": [{
            "name":
            "container-1",
            "resources": {
                "cpus": 0.5,
                "mem": 64
            },
            "endpoints": [{
                "name": "http",
                "hostPort": 0,
                "protocol": ["tcp"]
            }],
            "exec": {
                "command": {
                    "shell":
                    "echo $SECRET_ENV && "
                    "echo $SECRET_ENV >> $MESOS_SANDBOX/secret-env && "
                    "/opt/mesosphere/bin/python -m http.server $ENDPOINT_HTTP"
                }
            }
        }],
        "environment": {
            "SECRET_ENV": {
                "secret": "secret1"
            }
        },
        "networks": [{
            "mode": "host"
        }],
        "secrets": {
            "secret1": {
                "source": secret_name
            }
        }
    }

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    instances = client.show_pod(pod_id)['instances']
    assert len(
        instances) == 1, 'Failed to start the secret environment variable pod'

    port = instances[0]['containers'][0]['endpoints'][0]['allocatedHostPort']
    host = instances[0]['networks'][0]['addresses'][0]
    cmd = "curl {}:{}/secret-env".format(host, port)
    status, data = shakedown.run_command_on_master(cmd)

    assert status, "{} did not succeed. status = {}, data = {}".format(
        cmd, status, data)
    assert data.rstrip() == secret_value, "Got an unexpected secret data"
def test_two_pods_with_shared_volume():
    """Confirms that 1 container can read data in a volume that was written from the other container.
       The reading container fails if it can't read the file. So if there are 2 tasks after
       4 seconds we are good.
    """

    pod_def = pods.ephemeral_volume_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    assert len(
        tasks
    ) == 2, "The number of tasks is {} after deployment, but 2 was expected".format(
        len(tasks))

    time.sleep(4)

    tasks = common.get_pod_tasks(pod_id)
    assert len(
        tasks
    ) == 2, "The number of tasks is {} after sleeping, but 2 was expected".format(
        len(tasks))
def test_create_pod_with_private_image():
    """Deploys a pod with a private Docker image, using Mesos containerizer.
        This method relies on the global `install_enterprise_cli` fixture to install the
        enterprise-cli-package.
    """

    username = os.environ['DOCKER_HUB_USERNAME']
    password = os.environ['DOCKER_HUB_PASSWORD']

    secret_name = "pullconfig"
    secret_value_json = common.create_docker_pull_config_json(
        username, password)
    secret_value = json.dumps(secret_value_json)

    pod_def = pods.private_docker_pod()
    pod_id = pod_def['id']
    common.create_secret(secret_name, secret_value)
    client = marathon.create_client()

    try:
        client.add_pod(pod_def)
        common.deployment_wait(service_id=pod_id, max_attempts=300)
        pod = client.show_pod(pod_id)
        assert pod is not None, "The pod has not been created"
    finally:
        common.delete_secret(secret_name)
def test_create_and_update_pod():
    """Versions and reverting with pods"""

    pod_def = pods.simple_pod()
    pod_def["scaling"]["instances"] = 1
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    pod_def["scaling"]["instances"] = 3
    client.update_pod(pod_id, pod_def)
    common.deployment_wait(service_id=pod_id)

    versions = get_pod_versions(pod_id)
    assert len(
        versions
    ) == 2, "The number of versions is {}, but 2 was expected".format(
        len(versions))

    version1 = get_pod_version(pod_id, versions[0])
    version2 = get_pod_version(pod_id, versions[1])
    assert version1["scaling"]["instances"] != version2["scaling"]["instances"], \
        "Two pod versions have the same number of instances: {}, but they should not".format(
            version1["scaling"]["instances"])
Beispiel #13
0
def test_launch_docker_grace_period(marathon_service_name):
    """Tests 'taskKillGracePeriodSeconds' option using a Docker container in a Marathon environment.
       Read more details about this test in `test_root_marathon.py::test_launch_mesos_root_marathon_grace_period`
    """

    app_id = '/launch-docker-grace-period-app'
    app_def = apps.docker_http_server(app_id)
    app_def['container']['docker']['image'] = 'kensipe/python-test'

    default_grace_period = 3
    grace_period = 20
    app_def['taskKillGracePeriodSeconds'] = grace_period
    app_def['cmd'] = 'python test.py'
    task_name = app_id.lstrip('/')

    client = marathon.create_client()
    client.add_app(app_def)
    common.deployment_wait(service_id=app_id)

    tasks = get_service_task(marathon_service_name, task_name)
    assert tasks is not None

    client.scale_app(app_id, 0)
    tasks = get_service_task(marathon_service_name, task_name)
    assert tasks is not None

    # tasks should still be here after the default_graceperiod
    time.sleep(default_grace_period + 1)
    tasks = get_service_task(marathon_service_name, task_name)
    assert tasks is not None

    # but not after the set grace_period
    time.sleep(grace_period)
    assert_that(lambda: get_service_task(marathon_service_name, task_name),
                eventually(equal_to(None), max_attempts=30))
Beispiel #14
0
def test_pod_with_container_network():
    """Tests creation of a pod with a "container" network, and its HTTP endpoint accessibility."""

    pod_def = pods.container_net_pod()
    pod_id = pod_def['id']

    # In strict mode all tasks are started as user `nobody` by default and `nobody`
    # doesn't have permissions to write to /var/log within the container.
    if shakedown.ee_version() == 'strict':
        pod_def['user'] = '******'
        common.add_dcos_marathon_user_acls()

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    task = common.task_by_name(common.get_pod_tasks(pod_id), "nginx")

    network_info = common.running_status_network_info(task['statuses'])
    assert network_info['name'] == "dcos", \
        "The network name is {}, but 'dcos' was expected".format(network_info['name'])

    container_ip = network_info['ip_addresses'][0]['ip_address']
    assert container_ip is not None, "No IP address has been assigned to the pod's container"

    url = "http://{}:80/".format(container_ip)
    common.assert_http_code(url)
async def test_event_channel_for_pods(sse_events):
    """Tests the Marathon event channel specific to pod events."""

    await common.assert_event('event_stream_attached', sse_events)

    pod_def = pods.simple_pod()
    pod_id = pod_def['id']

    # In strict mode all tasks are started as user `nobody` by default and `nobody`
    # doesn't have permissions to write files.
    if shakedown.ee_version() == 'strict':
        pod_def['user'] = '******'
        common.add_dcos_marathon_user_acls()

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    await common.assert_event('pod_created_event', sse_events)
    await common.assert_event('deployment_step_success', sse_events)

    pod_def["scaling"]["instances"] = 3
    client.update_pod(pod_id, pod_def)
    common.deployment_wait(service_id=pod_id)

    await common.assert_event('pod_updated_event', sse_events)
def assert_mom_ee(version, security_mode='permissive'):
    ensure_service_account()
    ensure_permissions()
    ensure_sa_secret(strict=True if security_mode == 'strict' else False)
    ensure_docker_config_secret()

    # In strict mode all tasks are started as user `nobody` by default. However we start
    # MoM-EE as 'root' and for that we need to give root marathon ACLs to start
    # tasks as 'root'.
    if security_mode == 'strict':
        common.add_dcos_marathon_user_acls()

    # Deploy MoM-EE in permissive mode
    app_def_file = '{}/mom-ee-{}-{}.json'.format(fixtures.fixtures_dir(),
                                                 security_mode, version)
    assert os.path.isfile(
        app_def_file
    ), "Couldn't find appropriate MoM-EE definition: {}".format(app_def_file)

    image = mom_ee_image(version)
    logger.info('Deploying {} definition with {} image'.format(
        app_def_file, image))

    app_def = get_resource(app_def_file)
    app_def['container']['docker'][
        'image'] = 'mesosphere/marathon-dcos-ee:{}'.format(image)
    app_id = app_def["id"]

    client = marathon.create_client()
    client.add_app(app_def)
    common.deployment_wait(service_id=app_id)
    common.wait_for_service_endpoint(mom_ee_endpoint(version, security_mode),
                                     path="ping")
Beispiel #17
0
def test_pod_health_failed_check():
    """Deploys a pod with correct health checks, then partitions the network and verifies that
       the tasks get restarted with new task IDs.
    """

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    host = common.ip_other_than_mom()
    common.pin_pod_to_host(pod_def, host)

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    pod = client.list_pod()[0]
    container1 = pod['instances'][0]['containers'][0]
    port = container1['endpoints'][0]['allocatedHostPort']

    common.block_iptable_rules_for_seconds(host, port, 7, block_input=True, block_output=False)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    for new_task in tasks:
        new_task_id = new_task['id']
        assert new_task_id != initial_id1, f"Task {new_task_id} has not been restarted" # NOQA E999
        assert new_task_id != initial_id2, f"Task {new_task_id} has not been restarted"
Beispiel #18
0
def test_task_gets_restarted_due_to_network_split():
    """Verifies that a health check fails in presence of a network partition."""

    app_def = apps.http_server()
    app_id = app_def["id"]
    app_def['healthChecks'] = [common.health_check()]
    common.pin_to_host(app_def, common.ip_other_than_mom())

    client = marathon.create_client()
    client.add_app(app_def)

    common.deployment_wait(service_id=app_id)

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1, \
        "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning'])
    assert app['tasksHealthy'] == 1, \
        "The number of healthy tasks is {}, but 1 was expected".format(app['tasksHealthy'])

    tasks = client.get_tasks(app_id)
    task_id = tasks[0]['id']
    host = tasks[0]['host']
    port = tasks[0]['ports'][0]

    # introduce a network partition
    common.block_iptable_rules_for_seconds(host,
                                           port,
                                           sleep_seconds=10,
                                           block_input=True,
                                           block_output=False)

    common.deployment_wait(service_id=app_id)

    app = client.get_app(app_id)
    tasks = client.get_tasks(app_id)
    new_task_id = tasks[0]['id']
    assert task_id != new_task_id, "The task didn't get killed because of a failed health check"

    assert app['tasksRunning'] == 1, \
        "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning'])
    assert app['tasksHealthy'] == 1, \
        "The number of healthy tasks is {}, but 0 was expected".format(app['tasksHealthy'])

    # network partition should cause a task restart
    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_health_message():
        tasks = client.get_tasks(app_id)
        new_task_id = tasks[0]['id']
        assert task_id != new_task_id, "The task has not been restarted: {}".format(
            task_id)

        app = client.get_app(app_id)
        assert app['tasksRunning'] == 1, \
            "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning'])
        assert app['tasksHealthy'] == 1, \
            "The number of healthy tasks is {}, but 1 was expected".format(app['tasksHealthy'])

    check_health_message()
Beispiel #19
0
async def test_event_channel_for_pods(sse_events):
    """Tests the Marathon event channel specific to pod events."""

    await common.assert_event('event_stream_attached', sse_events)

    pod_def = pods.simple_pod()
    pod_id = pod_def['id']

    # In strict mode all tasks are started as user `nobody` by default and `nobody`
    # doesn't have permissions to write files.
    if shakedown.ee_version() == 'strict':
        pod_def['user'] = '******'
        common.add_dcos_marathon_user_acls()

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    await common.assert_event('pod_created_event', sse_events)
    await common.assert_event('deployment_step_success', sse_events)

    pod_def["scaling"]["instances"] = 3
    client.update_pod(pod_id, pod_def)
    common.deployment_wait(service_id=pod_id)

    await common.assert_event('pod_updated_event', sse_events)
Beispiel #20
0
def test_vip_mesos_cmd(marathon_service_name):
    """Validates the creation of an app with a VIP label and the accessibility of the service via the VIP."""

    app_def = apps.http_server()
    app_id = app_def["id"]

    vip_name = app_id.lstrip("/")
    fqn = '{}.{}.l4lb.thisdcos.directory'.format(vip_name,
                                                 marathon_service_name)

    app_def['portDefinitions'] = [{
        "port": 0,
        "protocol": "tcp",
        "name": "{}".format(vip_name),
        "labels": {
            "VIP_0": "/{}:10000".format(vip_name)
        }
    }]

    client = marathon.create_client()
    client.add_app(app_def)

    common.deployment_wait(service_id=app_id)

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def http_output_check():
        time.sleep(1)
        common.assert_http_code('{}:{}'.format(fqn, 10000))

    http_output_check()
Beispiel #21
0
def test_docker_dns_mapping(marathon_service_name):
    """Tests that a running Docker task is accessible via DNS."""

    app_def = apps.docker_http_server(app_id='/docker-dns-mapping-app')
    app_id = app_def["id"]

    client = marathon.create_client()
    client.add_app(app_def)
    common.deployment_wait(service_id=app_id)

    bad_cmd = 'ping -c 1 docker-test.marathon-user.mesos-bad'
    status, output = shakedown.run_command_on_master(bad_cmd)
    assert not status

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_dns():
        dnsname = '{}.{}.mesos'.format(app_id.lstrip('/'),
                                       marathon_service_name)
        cmd = 'ping -c 1 {}'.format(dnsname)
        shakedown.wait_for_dns(dnsname)
        status, output = shakedown.run_command_on_master(cmd)
        assert status, "ping failed for app using DNS lookup: {}".format(
            dnsname)

    check_dns()
Beispiel #22
0
def test_marathon_when_task_agent_bounced():
    """Launch an app and restart the node the task is running on."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]

    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    common.deployment_wait(service_id=app_id)
    tasks = client.get_tasks(app_id)
    original_task_id = tasks[0]['id']
    shakedown.restart_agent(host)

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_task_is_back():
        tasks = client.get_tasks(app_id)
        assert tasks[0]['id'] == original_task_id, \
            "The task {} got replaced with {}".format(original_task_id, tasks[0]['id'])

    check_task_is_back()
Beispiel #23
0
def test_marathon_when_disconnected_from_zk():
    """Launches an app from Marathon, then knocks out access to ZK from Marathon.
       Verifies the task is preserved.
    """

    app_def = apps.sleep_app()
    app_id = app_def["id"]

    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    common.deployment_wait(service_id=app_id)
    tasks = client.get_tasks(app_id)
    original_task_id = tasks[0]['id']

    common.block_iptable_rules_for_seconds(host,
                                           2181,
                                           sleep_seconds=10,
                                           block_input=True,
                                           block_output=False)

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_task_is_back():
        tasks = client.get_tasks(app_id)
        assert tasks[0]['id'] == original_task_id, \
            "The task {} got replaced with {}".format(original_task_id, tasks[0]['id'])

    check_task_is_back()
Beispiel #24
0
def test_marathon_with_master_process_failure(marathon_service_name):
    """Launches an app and restarts the master. It is expected that the service endpoint eventually comes back and
       the task ID stays the same.
    """

    app_def = apps.sleep_app()
    app_id = app_def["id"]

    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)
    common.deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    original_task_id = tasks[0]['id']

    common.systemctl_master('restart')
    common.wait_for_service_endpoint(marathon_service_name, path="ping")

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_task_recovery():
        tasks = client.get_tasks(app_id)
        assert len(
            tasks
        ) == 1, "The number of tasks is {} after master restart, but 1 was expected".format(
            len(tasks))
        assert tasks[0]['id'] == original_task_id, \
            "Task {} has not recovered, it got replaced with another one: {}".format(original_task_id, tasks[0]['id'])

    check_task_recovery()
def test_pod_restarts_on_nonzero_exit_code():
    """Verifies that a pod get restarted in case one of its containers exits with a non-zero code.
       As a result, after restart, there should be two new tasks for different IDs.
    """

    pod_def = pods.simple_pod()
    pod_id = pod_def['id']
    pod_def["scaling"]["instances"] = 1
    pod_def['containers'][0]['exec']['command'][
        'shell'] = 'sleep 5; echo -n leaving; exit 2'

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    time.sleep(
        6)  # 1 sec past the 5 sec sleep in one of the container's command
    tasks = common.get_pod_tasks(pod_id)
    for task in tasks:
        assert task['id'] != initial_id1, "Got the same task ID"
        assert task['id'] != initial_id2, "Got the same task ID"
Beispiel #26
0
def test_pinned_task_scales_on_host_only():
    """Tests that a pinned app scales only on the pinned node."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    common.deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    assert len(
        tasks
    ) == 1, "The number of tasks is {} after deployment, but 1 was expected".format(
        len(tasks))
    assert tasks[0]['host'] == host, \
        "The task is on {}, but it is supposed to be on {}".format(tasks[0]['host'], host)

    client.scale_app(app_id, 10)
    common.deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    assert len(
        tasks
    ) == 10, "The number of tasks is {} after scale, but 10 was expected".format(
        len(tasks))
    for task in tasks:
        assert task[
            'host'] == host, "The task is on {}, but it is supposed to be on {}".format(
                task['host'], host)
def test_pod_with_container_network():
    """Tests creation of a pod with a "container" network, and its HTTP endpoint accessibility."""

    pod_def = pods.container_net_pod()
    pod_id = pod_def['id']

    # In strict mode all tasks are started as user `nobody` by default and `nobody`
    # doesn't have permissions to write to /var/log within the container.
    if shakedown.ee_version() == 'strict':
        pod_def['user'] = '******'
        common.add_dcos_marathon_user_acls()

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    task = common.task_by_name(common.get_pod_tasks(pod_id), "nginx")

    network_info = common.running_status_network_info(task['statuses'])
    assert network_info['name'] == "dcos", \
        "The network name is {}, but 'dcos' was expected".format(network_info['name'])

    container_ip = network_info['ip_addresses'][0]['ip_address']
    assert container_ip is not None, "No IP address has been assigned to the pod's container"

    url = "http://{}:80/".format(container_ip)
    common.assert_http_code(url)
Beispiel #28
0
def test_pinned_task_recovers_on_host():
    """Tests that when a pinned task gets killed, it recovers on the node it was pinned to."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    common.deployment_wait(service_id=app_id)
    tasks = client.get_tasks(app_id)

    common.kill_process_on_host(host, '[s]leep')
    common.deployment_wait(service_id=app_id)

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_for_new_task():
        new_tasks = client.get_tasks(app_id)
        assert tasks[0]['id'] != new_tasks[0][
            'id'], "The task did not get killed: {}".format(tasks[0]['id'])
        assert new_tasks[0]['host'] == host, \
            "The task got restarted on {}, but it was supposed to stay on {}".format(new_tasks[0]['host'], host)

    check_for_new_task()
Beispiel #29
0
def test_create_pod_with_private_image():
    """Deploys a pod with a private Docker image, using Mesos containerizer."""

    if not common.is_enterprise_cli_package_installed():
        common.install_enterprise_cli_package()

    username = os.environ['DOCKER_HUB_USERNAME']
    password = os.environ['DOCKER_HUB_PASSWORD']

    secret_name = "pullconfig"
    secret_value_json = common.create_docker_pull_config_json(username, password)
    secret_value = json.dumps(secret_value_json)

    pod_def = pods.private_docker_pod()
    pod_id = pod_def['id']
    common.create_secret(secret_name, secret_value)
    client = marathon.create_client()

    try:
        client.add_pod(pod_def)
        common.deployment_wait(timeout=timedelta(minutes=5).total_seconds(), service_id=pod_id)
        pod = client.show_pod(pod_id)
        assert pod is not None, "The pod has not been created"
    finally:
        common.delete_secret(secret_name)
Beispiel #30
0
def test_pinned_task_does_not_scale_to_unpinned_host():
    """Tests when a task lands on a pinned node (and barely fits) and it is asked to scale past
       the resources of that node, no tasks will be launched on any other node.
    """

    app_def = apps.sleep_app()
    app_id = app_def['id']

    host = common.ip_other_than_mom()
    print('Constraint set to host: {}'.format(host))
    # the size of cpus is designed to be greater than 1/2 of a node
    # such that only 1 task can land on the node.
    cores = common.cpus_on_agent(host)
    app_def['cpus'] = max(0.6, cores - 0.5)
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    common.deployment_wait(service_id=app_id)
    client.scale_app(app_id, 2)

    time.sleep(5)
    deployments = client.get_deployments(app_id=app_id)
    tasks = client.get_tasks(app_id)

    # still deploying
    assert len(
        deployments
    ) == 1, "The number of deployments is {}, but 1 was expected".format(
        len(deployments))
    assert len(
        tasks) == 1, "The number of tasks is {}, but 1 was expected".format(
            len(tasks))
Beispiel #31
0
def test_pod_with_persistent_volume():
    pod_def = pods.persistent_volume_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)

    host = common.running_status_network_info(tasks[0]['statuses'])['ip_addresses'][0]['ip_address']
    port1 = tasks[0]['discovery']['ports']['ports'][0]["number"]
    port2 = tasks[1]['discovery']['ports']['ports'][0]["number"]
    path1 = tasks[0]['container']['volumes'][0]['container_path']
    path2 = tasks[1]['container']['volumes'][0]['container_path']
    print(host, port1, port2, path1, path2)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=60, retry_on_exception=common.ignore_exception)
    def check_http_endpoint(port, path):
        cmd = "curl {}:{}/{}/foo".format(host, port, path)
        run, data = shakedown.run_command_on_master(cmd)
        assert run, "{} did not succeed".format(cmd)
        assert data == 'hello\n', "'{}' was not equal to hello\\n".format(data)

    check_http_endpoint(port1, path1)
    check_http_endpoint(port2, path2)
Beispiel #32
0
def test_app_update():
    """Tests that an app gets successfully updated."""

    app_def = apps.mesos_app(app_id='/update-app')
    app_id = app_def["id"]

    client = marathon.create_client()
    client.add_app(app_def)

    common.deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    assert len(
        tasks
    ) == 1, "The number of tasks is {} after deployment, but 1 was expected".format(
        len(tasks))

    app_def['cpus'] = 1
    app_def['instances'] = 2

    client.update_app(app_id, app_def)
    common.deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    assert len(
        tasks
    ) == 2, "The number of tasks is {} after deployment, but 2 was expected".format(
        len(tasks))
Beispiel #33
0
def test_pod_with_container_bridge_network():
    """Tests creation of a pod with a "container/bridge" network, and its HTTP endpoint accessibility."""

    pod_def = pods.container_bridge_pod()
    pod_id = pod_def['id']

    # In strict mode all tasks are started as user `nobody` by default and `nobody`
    # doesn't have permissions to write to /var/log within the container.
    if shakedown.ee_version() == 'strict':
        pod_def['user'] = '******'
        common.add_dcos_marathon_user_acls()

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    task = common.task_by_name(common.get_pod_tasks(pod_id), "nginx")
    network_info = common.running_status_network_info(task['statuses'])
    assert network_info['name'] == "mesos-bridge", \
        "The network is {}, but mesos-bridge was expected".format(network_info['name'])

    # get the port on the host
    port = task['discovery']['ports']['ports'][0]['number']

    # the agent IP:port will be routed to the bridge IP:port
    # test against the agent_ip, however it is hard to get.. translating from
    # slave_id
    agent_ip = common.agent_hostname_by_id(task['slave_id'])
    assert agent_ip is not None, "Failed to get the agent IP address"
    container_ip = network_info['ip_addresses'][0]['ip_address']
    assert agent_ip != container_ip, "The container IP address is the same as the agent one"

    url = "http://{}:{}/".format(agent_ip, port)
    common.assert_http_code(url)
def test_app_with_persistent_volume_recovers():
    """Tests that when an app task with a persistent volume gets killed,
       it recovers on the node it was launched on, and it gets attached
       to the same persistent-volume."""

    app_def = apps.persistent_volume_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)

    common.deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    assert len(
        tasks
    ) == 1, "The number of tasks is {} after deployment, but 1 was expected".format(
        len(tasks))

    task_id = tasks[0]['id']
    port = tasks[0]['ports'][0]
    host = tasks[0]['host']
    cmd = "curl {}:{}/data/foo".format(host, port)

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_task(cmd, target_data):
        run, data = shakedown.run_command_on_master(cmd)

        assert run, "{} did not succeed".format(cmd)
        assert target_data in data, "'{}' not found in {}".format(
            target_data, data)

    check_task(cmd, target_data='hello\n')

    shakedown.kill_process_on_host(host, '[h]ttp.server')

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_task_recovery():
        tasks = client.get_tasks(app_id)
        assert len(
            tasks
        ) == 1, "The number of tasks is {} after recovery, but 1 was expected".format(
            len(tasks))

        new_task_id = tasks[0]['id']
        assert task_id != new_task_id, "The task ID has not changed, and is still {}".format(
            task_id)

    check_task_recovery()

    port = tasks[0]['ports'][0]
    host = tasks[0]['host']
    cmd = "curl {}:{}/data/foo".format(host, port)

    check_task(cmd, target_data='hello\nhello\n')
Beispiel #35
0
def test_install_universe_package(package):
    """ Marathon is responsible for installing packages from the universe.
        This test confirms that several packages are installed into a healty state.
    """

    install_package_and_wait(package)
    assert package_installed(package), 'Package failed to install'

    common.deployment_wait(max_attempts=300)
    assert service_healthy(package)
Beispiel #36
0
def test_app_secret_env_var(secret_fixture):

    secret_name, secret_value = secret_fixture

    app_id = '/app-secret-env-var-{}'.format(uuid.uuid4().hex)
    app_def = {
        "id":
        app_id,
        "instances":
        1,
        "cpus":
        0.5,
        "mem":
        64,
        "cmd":
        "echo $SECRET_ENV >> $MESOS_SANDBOX/secret-env && /opt/mesosphere/bin/python -m http.server $PORT_API",
        "env": {
            "SECRET_ENV": {
                "secret": "secret1"
            }
        },
        "portDefinitions": [{
            "port": 0,
            "protocol": "tcp",
            "name": "api",
            "labels": {}
        }],
        "secrets": {
            "secret1": {
                "source": secret_name
            }
        }
    }

    client = marathon.create_client()
    client.add_app(app_def)
    common.deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    assert len(
        tasks) == 1, 'Failed to start the secret environment variable app'

    port = tasks[0]['ports'][0]
    host = tasks[0]['host']
    cmd = "curl {}:{}/secret-env".format(host, port)

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def value_check():
        status, data = run_command_on_master(cmd)
        assert status, "{} did not succeed".format(cmd)
        assert data.rstrip() == secret_value

    value_check()
Beispiel #37
0
def test_create_pod():
    """Launch simple pod in DC/OS root marathon."""

    pod_def = pods.simple_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    pod = client.show_pod(pod_id)
    assert pod is not None, "The pod has not been created"
def test_app_with_persistent_volume_recovers():
    """Tests that when an app task with a persistent volume gets killed,
       it recovers on the node it was launched on, and it gets attached
       to the same persistent-volume."""

    app_def = apps.persistent_volume_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)

    common.deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    assert len(tasks) == 1, "The number of tasks is {} after deployment, but 1 was expected".format(len(tasks))

    task_id = tasks[0]['id']
    port = tasks[0]['ports'][0]
    host = tasks[0]['host']
    cmd = "curl {}:{}/data/foo".format(host, port)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_task(cmd, target_data):
        run, data = shakedown.run_command_on_master(cmd)

        assert run, "{} did not succeed".format(cmd)
        assert target_data in data, "'{}' not found in {}".format(target_data, data)

    check_task(cmd, target_data='hello\n')

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def kill_task(host, pattern):
        pids = common.kill_process_on_host(host, pattern)
        assert len(pids) != 0, "no task got killed on {} for pattern {}".format(host, pattern)

    kill_task(host, '[h]ttp\\.server')

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_task_recovery():
        tasks = client.get_tasks(app_id)
        assert len(tasks) == 1, "The number of tasks is {} after recovery, but 1 was expected".format(len(tasks))

        new_task_id = tasks[0]['id']
        assert task_id != new_task_id, "The task ID has not changed, and is still {}".format(task_id)

    check_task_recovery()

    port = tasks[0]['ports'][0]
    host = tasks[0]['host']
    cmd = "curl {}:{}/data/foo".format(host, port)

    check_task(cmd, target_data='hello\nhello\n')
Beispiel #39
0
def test_multi_instance_pod():
    """Launches a pod with multiple instances."""

    pod_def = pods.simple_pod()
    pod_id = pod_def['id']
    pod_def["scaling"]["instances"] = 3

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    status = get_pod_status(pod_id)
    assert len(status["instances"]) == 3, \
        "The number of instances is {}, but 3 was expected".format(len(status["instances"]))
Beispiel #40
0
def test_pod_health_check():
    """Tests that health checks work for pods."""

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    for task in tasks:
        health = common.running_task_status(task['statuses'])['healthy']
        assert health, "One of the pod's tasks (%s) is unhealthy" % (task['name'])
Beispiel #41
0
def test_pin_pod():
    """Tests that a pod can be pinned to a specific host."""

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    host = common.ip_other_than_mom()
    common.pin_pod_to_host(pod_def, host)

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    assert len(tasks) == 2, "The number of tasks is {} after deployment, but 2 was expected".format(len(tasks))

    pod = client.list_pod()[0]
    assert pod['instances'][0]['agentHostname'] == host, "The pod didn't get pinned to {}".format(host)
Beispiel #42
0
def test_pod_multi_port():
    """A pod with two containers is properly provisioned so that each container has a unique port."""

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    pod = client.show_pod(pod_id)

    container1 = pod['instances'][0]['containers'][0]
    port1 = container1['endpoints'][0]['allocatedHostPort']
    container2 = pod['instances'][0]['containers'][1]
    port2 = container2['endpoints'][0]['allocatedHostPort']

    assert port1 != port2, "Containers' ports are equal, but they should be different"
def test_restart_container_with_persistent_volume():
    """A task with a persistent volume, which writes to a file in the persistent volume, is launched.
       The app is killed and restarted and we can still read from the persistent volume what was written to it.
    """

    app_def = apps.persistent_volume_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)

    common.deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    assert len(tasks) == 1, "The number of tasks is {} after deployment, but 1 was expected".format(len(tasks))

    host = tasks[0]['host']
    port = tasks[0]['ports'][0]
    cmd = "curl {}:{}/data/foo".format(host, port)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_task(cmd, target_data):
        run, data = shakedown.run_command_on_master(cmd)

        assert run, "{} did not succeed".format(cmd)
        assert data == target_data, "'{}' was not equal to {}".format(data, target_data)

    check_task(cmd, target_data='hello\n')

    client.restart_app(app_id)
    common.deployment_wait(service_id=app_id)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_task_recovery():
        tasks = client.get_tasks(app_id)
        assert len(tasks) == 1, "The number of tasks is {} after recovery, but 1 was expected".format(len(tasks))

    check_task_recovery()

    host = tasks[0]['host']
    port = tasks[0]['ports'][0]
    cmd = "curl {}:{}/data/foo".format(host, port)

    check_task(cmd, target_data='hello\nhello\n')
Beispiel #44
0
def test_remove_pod():
    """Launches a pod and then removes it."""

    pod_def = pods.simple_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    client.remove_pod(pod_id)
    common.deployment_wait(service_id=pod_id)

    try:
        client.show_pod(pod_id)
    except Exception:
        pass
    else:
        assert False, "The pod has not been removed"
Beispiel #45
0
def test_pod_port_communication():
    """ Test that 1 container can establish a socket connection to the other container in the same pod.
    """

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    cmd = 'sleep 2; ' \
          'curl -m 2 localhost:$ENDPOINT_HTTPENDPOINT; ' \
          'if [ $? -eq 7 ]; then exit; fi; ' \
          '/opt/mesosphere/bin/python -m http.server $ENDPOINT_HTTPENDPOINT2'
    pod_def['containers'][1]['exec']['command']['shell'] = cmd

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    assert len(tasks) == 2, "The number of tasks is {} after deployment, but 2 was expected".format(len(tasks))
Beispiel #46
0
def test_two_pods_with_shared_volume():
    """Confirms that 1 container can read data in a volume that was written from the other container.
       The reading container fails if it can't read the file. So if there are 2 tasks after
       4 seconds we are good.
    """

    pod_def = pods.ephemeral_volume_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    assert len(tasks) == 2, "The number of tasks is {} after deployment, but 2 was expected".format(len(tasks))

    time.sleep(4)

    tasks = common.get_pod_tasks(pod_id)
    assert len(tasks) == 2, "The number of tasks is {} after sleeping, but 2 was expected".format(len(tasks))
Beispiel #47
0
def test_scale_down_pod():
    """Scales down a pod from 3 to 1 instance."""

    pod_def = pods.simple_pod()
    pod_def["scaling"]["instances"] = 3
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    status = get_pod_status(pod_id)
    assert len(status["instances"]) == 3, \
        "The number of instances is {}, but 3 was expected".format(len(status["instances"]))

    pod_def["scaling"]["instances"] = 1
    client.update_pod(pod_id, pod_def)
    common.deployment_wait(service_id=pod_id)

    status = get_pod_status(pod_id)
    assert len(status["instances"]) == 1, \
        "The number of instances is {}, but 1 was expected".format(len(status["instances"]))
Beispiel #48
0
def test_create_and_update_pod():
    """Versions and reverting with pods"""

    pod_def = pods.simple_pod()
    pod_def["scaling"]["instances"] = 1
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    pod_def["scaling"]["instances"] = 3
    client.update_pod(pod_id, pod_def)
    common.deployment_wait(service_id=pod_id)

    versions = get_pod_versions(pod_id)
    assert len(versions) == 2, "The number of versions is {}, but 2 was expected".format(len(versions))

    version1 = get_pod_version(pod_id, versions[0])
    version2 = get_pod_version(pod_id, versions[1])
    assert version1["scaling"]["instances"] != version2["scaling"]["instances"], \
        "Two pod versions have the same number of instances: {}, but they should not".format(
            version1["scaling"]["instances"])
Beispiel #49
0
def test_pod_restarts_on_nonzero_exit_code():
    """Verifies that a pod get restarted in case one of its containers exits with a non-zero code.
       As a result, after restart, there should be two new tasks for different IDs.
    """

    pod_def = pods.simple_pod()
    pod_id = pod_def['id']
    pod_def["scaling"]["instances"] = 1
    pod_def['containers'][0]['exec']['command']['shell'] = 'sleep 5; echo -n leaving; exit 2'

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    time.sleep(6)  # 1 sec past the 5 sec sleep in one of the container's command
    tasks = common.get_pod_tasks(pod_id)
    for task in tasks:
        assert task['id'] != initial_id1, "Got the same task ID"
        assert task['id'] != initial_id2, "Got the same task ID"
Beispiel #50
0
def test_pod_with_persistent_volume_recovers():
    pod_def = pods.persistent_volume_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    assert len(tasks) == 2, "The number of pod tasks is {}, but is expected to be 2".format(len(tasks))

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def wait_for_status_network_info():
        tasks = common.get_pod_tasks(pod_id)
        # the following command throws exceptions if there are no tasks in TASK_RUNNING state
        common.running_status_network_info(tasks[0]['statuses'])

    wait_for_status_network_info()
    host = common.running_status_network_info(tasks[0]['statuses'])['ip_addresses'][0]['ip_address']

    task_id1 = tasks[0]['id']
    task_id2 = tasks[1]['id']

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def kill_task(host, pattern):
        pids = common.kill_process_on_host(host, pattern)
        assert len(pids) != 0, "no task got killed on {} for pattern {}".format(host, pattern)

    kill_task(host, '[h]ttp\\.server')

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def wait_for_pod_recovery():
        tasks = common.get_pod_tasks(pod_id)
        assert len(tasks) == 2, "The number of tasks is {} after recovery, but 2 was expected".format(len(tasks))

        old_task_ids = [task_id1, task_id2]
        new_task_id1 = tasks[0]['id']
        new_task_id2 = tasks[1]['id']

        assert new_task_id1 not in old_task_ids, \
            "The task ID has not changed, and is still {}".format(new_task_id1)
        assert new_task_id2 not in old_task_ids, \
            "The task ID has not changed, and is still {}".format(new_task_id2)

    wait_for_pod_recovery()
    wait_for_status_network_info()

    tasks = common.get_pod_tasks(pod_id)
    assert host == common.running_status_network_info(tasks[0]['statuses'])['ip_addresses'][0]['ip_address'], \
        "the pod has been restarted on another host"

    port1 = tasks[0]['discovery']['ports']['ports'][0]["number"]
    port2 = tasks[1]['discovery']['ports']['ports'][0]["number"]
    path1 = tasks[0]['container']['volumes'][0]['container_path']
    path2 = tasks[1]['container']['volumes'][0]['container_path']
    print(host, port1, port2, path1, path2)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_data(port, path):
        cmd = "curl {}:{}/{}/foo".format(host, port, path)
        run, data = shakedown.run_command_on_master(cmd)
        assert run, "{} did not succeed".format(cmd)
        assert 'hello\nhello\n' in data, "'hello\nhello\n' not found in '{}'n".format(data)

    check_data(port1, path1)
    check_data(port2, path2)