Esempio n. 1
0
def test_pod_health_failed_check():
    """Deploys a pod with correct health checks, then partitions the network and verifies that
       the tasks get restarted with new task IDs.
    """

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    host = common.ip_other_than_mom()
    common.pin_pod_to_host(pod_def, host)

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    pod = client.list_pod()[0]
    container1 = pod['instances'][0]['containers'][0]
    port = container1['endpoints'][0]['allocatedHostPort']

    common.block_iptable_rules_for_seconds(host, port, 7, block_input=True, block_output=False)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    for new_task in tasks:
        new_task_id = new_task['id']
        assert new_task_id != initial_id1, f"Task {new_task_id} has not been restarted" # NOQA E999
        assert new_task_id != initial_id2, f"Task {new_task_id} has not been restarted"
Esempio n. 2
0
def test_two_pods_with_shared_volume():
    """Confirms that 1 container can read data in a volume that was written from the other container.
       The reading container fails if it can't read the file. So if there are 2 tasks after
       4 seconds we are good.
    """

    pod_def = pods.ephemeral_volume_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    assert len(
        tasks
    ) == 2, "The number of tasks is {} after deployment, but 2 was expected".format(
        len(tasks))

    time.sleep(4)

    tasks = common.get_pod_tasks(pod_id)
    assert len(
        tasks
    ) == 2, "The number of tasks is {} after sleeping, but 2 was expected".format(
        len(tasks))
Esempio n. 3
0
def test_health_failed_check():
    """ Deploys a pod with good health checks, then partitions the network and verifies
        the tasks return with new task ids.
    """
    client = marathon.create_client()

    pod_id = "/pod-ken".format(uuid.uuid4().hex)

    pod_json = _pods_json('pod-ports.json')
    pod_json["id"] = pod_id
    host = ip_other_than_mom()
    pin_pod_to_host(pod_json, host)
    client.add_pod(pod_json)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    pod = client.list_pod()[0]
    container1 = pod['instances'][0]['containers'][0]
    port = container1['endpoints'][0]['allocatedHostPort']

    save_iptables(host)
    block_port(host, port)
    time.sleep(7)
    restore_iptables(host)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    for task in tasks:
        assert task['id'] != initial_id1
        assert task['id'] != initial_id2
Esempio n. 4
0
def test_pod_restarts_on_nonzero_exit_code():
    """Verifies that a pod get restarted in case one of its containers exits with a non-zero code.
       As a result, after restart, there should be two new tasks for different IDs.
    """

    pod_def = pods.simple_pod()
    pod_id = pod_def['id']
    pod_def["scaling"]["instances"] = 1
    pod_def['containers'][0]['exec']['command'][
        'shell'] = 'sleep 5; echo -n leaving; exit 2'

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    time.sleep(
        6)  # 1 sec past the 5 sec sleep in one of the container's command
    tasks = common.get_pod_tasks(pod_id)
    for task in tasks:
        assert task['id'] != initial_id1, "Got the same task ID"
        assert task['id'] != initial_id2, "Got the same task ID"
Esempio n. 5
0
def test_pod_health_failed_check():
    """Deploys a pod with correct health checks, then partitions the network and verifies that
       the tasks get restarted with new task IDs.
    """

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    host = common.ip_other_than_mom()
    common.pin_pod_to_host(pod_def, host)

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    pod = client.list_pod()[0]
    container1 = pod['instances'][0]['containers'][0]
    port = container1['endpoints'][0]['allocatedHostPort']

    common.save_iptables(host)
    common.block_port(host, port)
    time.sleep(7)
    common.restore_iptables(host)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    for task in tasks:
        assert task[
            'id'] != initial_id1, "One of the tasks has not been restarted"
        assert task[
            'id'] != initial_id2, "One of the tasks has not been restarted"
Esempio n. 6
0
def test_pod_restarts_on_nonzero_exit():
    """ Confirm that pods will relaunch if 1 of the containers exits non-zero.
        2 new tasks with new task_ids will result.
    """
    client = marathon.create_client()

    pod_id = "/pod-{}".format(uuid.uuid4().hex)

    pod_json = _pods_json()
    pod_json["id"] = pod_id
    pod_json["scaling"]["instances"] = 1
    pod_json['containers'][0]['exec']['command'][
        'shell'] = 'sleep 5; echo -n leaving; exit 2'
    client.add_pod(pod_json)
    shakedown.deployment_wait()
    #
    tasks = get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    time.sleep(6)  # 1 sec past the 5 sec sleep in test containers command
    tasks = get_pod_tasks(pod_id)
    for task in tasks:
        assert task['id'] != initial_id1
        assert task['id'] != initial_id2
Esempio n. 7
0
def test_pod_health_failed_check():
    """Deploys a pod with correct health checks, then partitions the network and verifies that
       the tasks get restarted with new task IDs.
    """

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    host = common.ip_other_than_mom()
    common.pin_pod_to_host(pod_def, host)

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    pod = client.list_pod()[0]
    container1 = pod['instances'][0]['containers'][0]
    port = container1['endpoints'][0]['allocatedHostPort']

    common.block_iptable_rules_for_seconds(host,
                                           port,
                                           7,
                                           block_input=True,
                                           block_output=False)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    for new_task in tasks:
        new_task_id = new_task['id']
        assert new_task_id != initial_id1, f"Task {new_task_id} has not been restarted"  # NOQA E999
        assert new_task_id != initial_id2, f"Task {new_task_id} has not been restarted"
Esempio n. 8
0
def test_health_failed_check():
    """ Deploys a pod with good health checks, then partitions the network and verifies
        the tasks return with new task ids.
    """
    client = marathon.create_client()

    pod_id = "/pod-ken".format(uuid.uuid4().hex)

    pod_json = _pods_json('pod-ports.json')
    pod_json["id"] = pod_id
    host = ip_other_than_mom()
    pin_pod_to_host(pod_json, host)
    client.add_pod(pod_json)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    pod = client.list_pod()[0]
    container1 = pod['instances'][0]['containers'][0]
    port = container1['endpoints'][0]['allocatedHostPort']

    save_iptables(host)
    block_port(host, port)
    time.sleep(7)
    restore_iptables(host)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    for task in tasks:
        assert task['id'] != initial_id1
        assert task['id'] != initial_id2
Esempio n. 9
0
def test_pod_with_persistent_volume():
    pod_def = pods.persistent_volume_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)

    host = common.running_status_network_info(
        tasks[0]['statuses'])['ip_addresses'][0]['ip_address']
    port1 = tasks[0]['discovery']['ports']['ports'][0]["number"]
    port2 = tasks[1]['discovery']['ports']['ports'][0]["number"]
    path1 = tasks[0]['container']['volumes'][0]['container_path']
    path2 = tasks[1]['container']['volumes'][0]['container_path']
    print(host, port1, port2, path1, path2)

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=60,
                    retry_on_exception=common.ignore_exception)
    def check_http_endpoint(port, path):
        cmd = "curl {}:{}/{}/foo".format(host, port, path)
        run, data = shakedown.run_command_on_master(cmd)
        assert run, "{} did not succeed".format(cmd)
        assert data == 'hello\n', "'{}' was not equal to hello\\n".format(data)

    check_http_endpoint(port1, path1)
    check_http_endpoint(port2, path2)
Esempio n. 10
0
def test_pod_with_container_network():
    """Tests creation of a pod with a "container" network, and its HTTP endpoint accessibility."""

    pod_def = pods.container_net_pod()
    pod_id = pod_def['id']

    # In strict mode all tasks are started as user `nobody` by default and `nobody`
    # doesn't have permissions to write to /var/log within the container.
    if shakedown.ee_version() == 'strict':
        pod_def['user'] = '******'
        common.add_dcos_marathon_user_acls()

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)

    network_info = tasks[0]['statuses'][0]['container_status'][
        'network_infos'][0]
    assert network_info['name'] == "dcos", \
        "The network name is {}, but 'dcos' was expected".format(network_info['name'])

    container_ip = network_info['ip_addresses'][0]['ip_address']
    assert container_ip is not None, "No IP address has been assigned to the pod's container"

    url = "http://{}:80/".format(container_ip)
    common.assert_http_code(url)
Esempio n. 11
0
def test_pod_with_container_bridge_network():
    """Tests creation of a pod with a "container/bridge" network, and its HTTP endpoint accessibility."""

    pod_def = pods.container_bridge_pod()
    pod_id = pod_def['id']

    # In strict mode all tasks are started as user `nobody` by default and `nobody`
    # doesn't have permissions to write to /var/log within the container.
    if shakedown.ee_version() == 'strict':
        pod_def['user'] = '******'
        common.add_dcos_marathon_user_acls()

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    task = common.get_pod_tasks(pod_id)[0]
    network_info = task['statuses'][0]['container_status']['network_infos'][0]
    assert network_info['name'] == "mesos-bridge", \
        "The network is {}, but mesos-bridge was expected".format(network_info['name'])

    # get the port on the host
    port = task['discovery']['ports']['ports'][0]['number']

    # the agent IP:port will be routed to the bridge IP:port
    # test against the agent_ip, however it is hard to get.. translating from
    # slave_id
    agent_ip = common.agent_hostname_by_id(task['slave_id'])
    assert agent_ip is not None, "Failed to get the agent IP address"
    container_ip = network_info['ip_addresses'][0]['ip_address']
    assert agent_ip != container_ip, "The container IP address is the same as the agent one"

    url = "http://{}:{}/".format(agent_ip, port)
    common.assert_http_code(url)
Esempio n. 12
0
def test_pod_container_bridge():
    """ Tests using "container" network (using default network "dcos")
    """
    client = marathon.create_client()
    pod_id = "/pod-container-bridge-{}".format(uuid.uuid4().hex)
    pod_json = _pods_json('pod-container-bridge.json')
    pod_json["id"] = pod_id

    client.add_pod(pod_json)
    shakedown.deployment_wait()

    task = get_pod_tasks(pod_id)[0]

    network_info = task['statuses'][0]['container_status']['network_infos'][0]
    assert network_info['name'] == "mesos-bridge"

    # port on the host
    port = task['discovery']['ports']['ports'][0]['number']
    # the agent IP:port will be routed to the bridge IP:port
    # test against the agent_ip, however it is hard to get.. translating from
    # slave_id
    agent_ip = common.agent_hostname_by_id(task['slave_id'])
    assert agent_ip is not None
    container_ip = network_info['ip_addresses'][0]['ip_address']
    assert agent_ip != container_ip

    # assert container_ip is not None
    #
    url = "http://{}:{}/".format(agent_ip, port)
    common.assert_http_code(url)
Esempio n. 13
0
def test_pod_with_container_bridge_network():
    """Tests creation of a pod with a "container/bridge" network, and its HTTP endpoint accessibility."""

    pod_def = pods.container_bridge_pod()
    pod_id = pod_def['id']

    # In strict mode all tasks are started as user `nobody` by default and `nobody`
    # doesn't have permissions to write to /var/log within the container.
    if shakedown.ee_version() == 'strict':
        pod_def['user'] = '******'
        common.add_dcos_marathon_user_acls()

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    task = common.task_by_name(common.get_pod_tasks(pod_id), "nginx")
    network_info = common.running_status_network_info(task['statuses'])
    assert network_info['name'] == "mesos-bridge", \
        "The network is {}, but mesos-bridge was expected".format(network_info['name'])

    # get the port on the host
    port = task['discovery']['ports']['ports'][0]['number']

    # the agent IP:port will be routed to the bridge IP:port
    # test against the agent_ip, however it is hard to get.. translating from
    # slave_id
    agent_ip = common.agent_hostname_by_id(task['slave_id'])
    assert agent_ip is not None, "Failed to get the agent IP address"
    container_ip = network_info['ip_addresses'][0]['ip_address']
    assert agent_ip != container_ip, "The container IP address is the same as the agent one"

    url = "http://{}:{}/".format(agent_ip, port)
    common.assert_http_code(url)
Esempio n. 14
0
def test_pod_with_persistent_volume():
    pod_def = pods.persistent_volume_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)

    host = common.running_status_network_info(tasks[0]['statuses'])['ip_addresses'][0]['ip_address']

    # Container with the name 'container1' appends its taskId to the file. So we search for the
    # taskId of that container which is not always the tasks[0]
    expected_data = next((t['id'] for t in tasks if t['name'] == 'container1'), None)
    assert expected_data, f"Hasn't found a container with the name 'container1' in the pod {tasks}"

    port1 = tasks[0]['discovery']['ports']['ports'][0]["number"]
    port2 = tasks[1]['discovery']['ports']['ports'][0]["number"]
    path1 = tasks[0]['container']['volumes'][0]['container_path']
    path2 = tasks[1]['container']['volumes'][0]['container_path']
    logger.info('Deployd two containers on {}:{}/{} and {}:{}/{}'.format(host, port1, path1, host, port2, path2))

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=60, retry_on_exception=common.ignore_exception)
    def check_http_endpoint(port, path, expected):
        cmd = "curl {}:{}/{}/foo".format(host, port, path)
        run, data = run_command_on_master(cmd)
        assert run, "{} did not succeed".format(cmd)
        assert expected in data, "'{}' was not found in '{}'".format(data, expected)

    check_http_endpoint(port1, path1, expected_data)
    check_http_endpoint(port2, path2, expected_data)
Esempio n. 15
0
def test_pod_with_container_network():
    """Tests creation of a pod with a "container" network, and its HTTP endpoint accessibility."""

    pod_def = pods.container_net_pod()
    pod_id = pod_def['id']

    # In strict mode all tasks are started as user `nobody` by default and `nobody`
    # doesn't have permissions to write to /var/log within the container.
    if shakedown.ee_version() == 'strict':
        pod_def['user'] = '******'
        common.add_dcos_marathon_user_acls()

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    task = common.task_by_name(common.get_pod_tasks(pod_id), "nginx")

    network_info = common.running_status_network_info(task['statuses'])
    assert network_info['name'] == "dcos", \
        "The network name is {}, but 'dcos' was expected".format(network_info['name'])

    container_ip = network_info['ip_addresses'][0]['ip_address']
    assert container_ip is not None, "No IP address has been assigned to the pod's container"

    url = "http://{}:80/".format(container_ip)
    common.assert_http_code(url)
Esempio n. 16
0
def test_pod_with_persistent_volume():
    pod_def = pods.persistent_volume_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)

    host = common.running_status_network_info(tasks[0]['statuses'])['ip_addresses'][0]['ip_address']
    port1 = tasks[0]['discovery']['ports']['ports'][0]["number"]
    port2 = tasks[1]['discovery']['ports']['ports'][0]["number"]
    path1 = tasks[0]['container']['volumes'][0]['container_path']
    path2 = tasks[1]['container']['volumes'][0]['container_path']
    print(host, port1, port2, path1, path2)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=60, retry_on_exception=common.ignore_exception)
    def check_http_endpoint(port, path):
        cmd = "curl {}:{}/{}/foo".format(host, port, path)
        run, data = shakedown.run_command_on_master(cmd)
        assert run, "{} did not succeed".format(cmd)
        assert data == 'hello\n', "'{}' was not equal to hello\\n".format(data)

    check_http_endpoint(port1, path1)
    check_http_endpoint(port2, path2)
Esempio n. 17
0
def test_pod_with_persistent_volume():
    pod_def = pods.persistent_volume_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)

    host = common.running_status_network_info(
        tasks[0]['statuses'])['ip_addresses'][0]['ip_address']
    port1 = tasks[0]['discovery']['ports']['ports'][0]["number"]
    port2 = tasks[1]['discovery']['ports']['ports'][0]["number"]
    dir1 = tasks[0]['container']['volumes'][0]['container_path']
    dir2 = tasks[1]['container']['volumes'][0]['container_path']
    print(host, port1, port2, dir1, dir2)

    time.sleep(1)

    cmd = "curl {}:{}/{}/foo".format(host, port1, dir1)
    run, data = shakedown.run_command_on_master(cmd)
    assert run, "{} did not succeed".format(cmd)
    assert data == 'hello\n', "'{}' was not equal to hello\\n".format(data)

    cmd = "curl {}:{}/{}/foo".format(host, port2, dir2)
    run, data = shakedown.run_command_on_master(cmd)
    assert run, "{} did not succeed".format(cmd)
    assert data == 'hello\n', "'{}' was not equal to hello\\n".format(data)
Esempio n. 18
0
def test_pod_with_container_bridge_network():
    """Tests creation of a pod with a "container/bridge" network, and its HTTP endpoint accessibility."""

    pod_def = pods.container_bridge_pod()

    client = marathon.create_client()
    client.add_pod(pod_def)
    shakedown.deployment_wait()

    task = common.get_pod_tasks(pod_def["id"])[0]
    network_info = task['statuses'][0]['container_status']['network_infos'][0]
    assert network_info['name'] == "mesos-bridge", \
        "The network is {}, but mesos-bridge was expected".format(network_info['name'])

    # get the port on the host
    port = task['discovery']['ports']['ports'][0]['number']

    # the agent IP:port will be routed to the bridge IP:port
    # test against the agent_ip, however it is hard to get.. translating from
    # slave_id
    agent_ip = common.agent_hostname_by_id(task['slave_id'])
    assert agent_ip is not None, "Failed to get the agent IP address"
    container_ip = network_info['ip_addresses'][0]['ip_address']
    assert agent_ip != container_ip, "The container IP address is the same as the agent one"

    url = "http://{}:{}/".format(agent_ip, port)
    common.assert_http_code(url)
Esempio n. 19
0
def test_two_pods_with_shared_volume():
    """Confirms that 1 container can read data in a volume that was written from the other container.
       The reading container fails if it can't read the file. So if there are 2 tasks after
       4 seconds we are good.
    """

    pod_def = pods.ephemeral_volume_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    assert len(tasks) == 2, "The number of tasks is {} after deployment, but 2 was expected".format(len(tasks))

    time.sleep(4)

    tasks = common.get_pod_tasks(pod_id)
    assert len(tasks) == 2, "The number of tasks is {} after sleeping, but 2 was expected".format(len(tasks))
Esempio n. 20
0
    def wait_for_pod_recovery():
        tasks = common.get_pod_tasks(pod_id)
        assert len(tasks) == 2, "The number of tasks is {} after recovery, but 2 was expected".format(len(tasks))

        old_task_ids = [task_id1, task_id2]
        new_task_id1 = tasks[0]['id']
        new_task_id2 = tasks[1]['id']

        assert new_task_id1 not in old_task_ids, \
            "The task ID has not changed, and is still {}".format(new_task_id1)
        assert new_task_id2 not in old_task_ids, \
            "The task ID has not changed, and is still {}".format(new_task_id2)
Esempio n. 21
0
    def wait_for_pod_recovery():
        tasks = common.get_pod_tasks(pod_id)
        assert len(tasks) == 2, "The number of tasks is {} after recovery, but 2 was expected".format(len(tasks))

        old_task_ids = [task_id1, task_id2]
        new_task_id1 = tasks[0]['id']
        new_task_id2 = tasks[1]['id']

        assert new_task_id1 not in old_task_ids, \
            "The task ID has not changed, and is still {}".format(new_task_id1)
        assert new_task_id2 not in old_task_ids, \
            "The task ID has not changed, and is still {}".format(new_task_id2)
Esempio n. 22
0
def test_pod_health_check():
    """Tests that health checks work for pods."""

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    for task in tasks:
        health = common.running_task_status(task['statuses'])['healthy']
        assert health, "One of the pod's tasks (%s) is unhealthy" % (task['name'])
Esempio n. 23
0
def test_pod_health_check():
    """Tests that health checks work for pods."""

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    for task in tasks:
        health = common.running_task_status(task['statuses'])['healthy']
        assert health, "One of the pod's tasks (%s) is unhealthy" % (task['name'])
Esempio n. 24
0
def test_pod_restarts_on_nonzero_exit_code():
    """Verifies that a pod get restarted in case one of its containers exits with a non-zero code.
       As a result, after restart, there should be two new tasks for different IDs.
    """

    pod_def = pods.simple_pod()
    pod_id = pod_def['id']
    pod_def["scaling"]["instances"] = 1
    pod_def['containers'][0]['exec']['command']['shell'] = 'sleep 5; echo -n leaving; exit 2'

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    time.sleep(6)  # 1 sec past the 5 sec sleep in one of the container's command
    tasks = common.get_pod_tasks(pod_id)
    for task in tasks:
        assert task['id'] != initial_id1, "Got the same task ID"
        assert task['id'] != initial_id2, "Got the same task ID"
Esempio n. 25
0
def test_pod_health_check():
    """Tests that health checks work for pods."""

    pod_def = pods.ports_pod()

    client = marathon.create_client()
    client.add_pod(pod_def)
    shakedown.deployment_wait()

    tasks = common.get_pod_tasks(pod_def["id"])
    c1_health = tasks[0]['statuses'][0]['healthy']
    c2_health = tasks[1]['statuses'][0]['healthy']

    assert c1_health, "One of the pod's tasks is unhealthy"
    assert c2_health, "One of the pod's tasks is unhealthy"
Esempio n. 26
0
def test_pod_restarts_on_nonzero_exit():
    """ Confirm that pods will relaunch if 1 of the containers exits non-zero.
        2 new tasks with new task_ids will result.
    """
    client = marathon.create_client()

    pod_id = "/pod-{}".format(uuid.uuid4().hex)

    pod_json = _pods_json()
    pod_json["id"] = pod_id
    pod_json["scaling"]["instances"] = 1
    pod_json['containers'][0]['exec']['command']['shell'] = 'sleep 5; echo -n leaving; exit 2'
    client.add_pod(pod_json)
    shakedown.deployment_wait()
    #
    tasks = get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    time.sleep(6)  # 1 sec past the 5 sec sleep in test containers command
    tasks = get_pod_tasks(pod_id)
    for task in tasks:
        assert task['id'] != initial_id1
        assert task['id'] != initial_id2
Esempio n. 27
0
def test_pin_pod():
    """Tests that a pod can be pinned to a specific host."""

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    host = common.ip_other_than_mom()
    common.pin_pod_to_host(pod_def, host)

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    assert len(tasks) == 2, "The number of tasks is {} after deployment, but 2 was expected".format(len(tasks))

    pod = client.list_pod()[0]
    assert pod['instances'][0]['agentHostname'] == host, "The pod didn't get pinned to {}".format(host)
Esempio n. 28
0
def test_pin_pod():
    """Tests that a pod can be pinned to a specific host."""

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    host = common.ip_other_than_mom()
    common.pin_pod_to_host(pod_def, host)

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    assert len(tasks) == 2, "The number of tasks is {} after deployment, but 2 was expected".format(len(tasks))

    pod = client.list_pod()[0]
    assert pod['instances'][0]['agentHostname'] == host, "The pod didn't get pinned to {}".format(host)
Esempio n. 29
0
def test_pod_port_communication():
    """ Test that 1 container can establish a socket connection to the other container in the same pod.
    """
    client = marathon.create_client()

    pod_id = "/pod-{}".format(uuid.uuid4().hex)

    pod_json = _pods_json('pod-ports.json')
    pod_json["id"] = pod_id

    # sleeps 2, then container 2 checks communication with container 1.
    # if that timesout, the task completes resulting in 1 container running
    # otherwise it is expected that 2 containers are running.
    pod_json['containers'][1]['exec']['command']['shell'] = 'sleep 2; curl -m 2 localhost:$ENDPOINT_HTTPENDPOINT; if [ $? -eq 7 ]; then exit; fi; /opt/mesosphere/bin/python -m http.server $ENDPOINT_HTTPENDPOINT2'  # NOQA
    client.add_pod(pod_json)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    assert len(tasks) == 2
Esempio n. 30
0
def test_health_check():
    """ Tests that health checks work in pods.
    """
    client = marathon.create_client()

    pod_id = "/pod-{}".format(uuid.uuid4().hex)

    pod_json = _pods_json('pod-ports.json')
    pod_json["id"] = pod_id

    client.add_pod(pod_json)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    c1_health = tasks[0]['statuses'][0]['healthy']
    c2_health = tasks[1]['statuses'][0]['healthy']

    assert c1_health
    assert c2_health
Esempio n. 31
0
def test_pod_port_communication():
    """ Test that 1 container can establish a socket connection to the other container in the same pod.
    """
    client = marathon.create_client()

    pod_id = "/pod-{}".format(uuid.uuid4().hex)

    pod_json = _pods_json('pod-ports.json')
    pod_json["id"] = pod_id

    # sleeps 2, then container 2 checks communication with container 1.
    # if that timesout, the task completes resulting in 1 container running
    # otherwise it is expected that 2 containers are running.
    pod_json['containers'][1]['exec']['command']['shell'] = 'sleep 2; curl -m 2 localhost:$ENDPOINT_HTTPENDPOINT; if [ $? -eq 7 ]; then exit; fi; /opt/mesosphere/bin/python -m http.server $ENDPOINT_HTTPENDPOINT2'  # NOQA
    client.add_pod(pod_json)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    assert len(tasks) == 2
Esempio n. 32
0
def test_pod_port_communication():
    """ Test that 1 container can establish a socket connection to the other container in the same pod.
    """

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    cmd = 'sleep 2; ' \
          'curl -m 2 localhost:$ENDPOINT_HTTPENDPOINT; ' \
          'if [ $? -eq 7 ]; then exit; fi; ' \
          '/opt/mesosphere/bin/python -m http.server $ENDPOINT_HTTPENDPOINT2'
    pod_def['containers'][1]['exec']['command']['shell'] = cmd

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    assert len(tasks) == 2, "The number of tasks is {} after deployment, but 2 was expected".format(len(tasks))
Esempio n. 33
0
def test_pod_port_communication():
    """ Test that 1 container can establish a socket connection to the other container in the same pod.
    """

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    cmd = 'sleep 2; ' \
          'curl -m 2 localhost:$ENDPOINT_HTTPENDPOINT; ' \
          'if [ $? -eq 7 ]; then exit; fi; ' \
          '/opt/mesosphere/bin/python -m http.server $ENDPOINT_HTTPENDPOINT2'
    pod_def['containers'][1]['exec']['command']['shell'] = cmd

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    assert len(tasks) == 2, "The number of tasks is {} after deployment, but 2 was expected".format(len(tasks))
Esempio n. 34
0
def test_health_check():
    """ Tests that health checks work in pods.
    """
    client = marathon.create_client()

    pod_id = "/pod-{}".format(uuid.uuid4().hex)

    pod_json = _pods_json('pod-ports.json')
    pod_json["id"] = pod_id

    client.add_pod(pod_json)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    c1_health = tasks[0]['statuses'][0]['healthy']
    c2_health = tasks[1]['statuses'][0]['healthy']

    assert c1_health
    assert c2_health
Esempio n. 35
0
def test_pod_comm_via_volume():
    """ Confirms that 1 container can read data from a volume that was written
        from the other container.  Most of the test is in the `vol-pods.json`.
        The reading container will die if it can't read the file. So if there are 2 tasks after
        4 secs were are good.
    """
    client = marathon.create_client()

    pod_id = "/pod-{}".format(uuid.uuid4().hex)

    # pods setup to have c1 write, ct2 read after 2 sec
    # there are 2 tasks, unless the file doesnt' exist, then there is 1
    pod_json = _pods_json('vol-pods.json')
    pod_json["id"] = pod_id
    client.add_pod(pod_json)
    shakedown.deployment_wait()
    tasks = get_pod_tasks(pod_id)
    assert len(tasks) == 2
    time.sleep(4)
    assert len(tasks) == 2
Esempio n. 36
0
def test_pin_pod():
    """ Tests that we can pin a pod to a host.
    """
    client = marathon.create_client()

    pod_id = "/pod-{}".format(uuid.uuid4().hex)

    pod_json = _pods_json('pod-ports.json')
    pod_json["id"] = pod_id

    host = ip_other_than_mom()
    pin_pod_to_host(pod_json, host)
    client.add_pod(pod_json)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    assert len(tasks) == 2

    pod = client.list_pod()[0]
    assert pod['instances'][0]['agentHostname'] == host
Esempio n. 37
0
def test_pod_comm_via_volume():
    """ Confirms that 1 container can read data from a volume that was written
        from the other container.  Most of the test is in the `vol-pods.json`.
        The reading container will die if it can't read the file. So if there are 2 tasks after
        4 secs were are good.
    """
    client = marathon.create_client()

    pod_id = "/pod-{}".format(uuid.uuid4().hex)

    # pods setup to have c1 write, ct2 read after 2 sec
    # there are 2 tasks, unless the file doesnt' exist, then there is 1
    pod_json = _pods_json('vol-pods.json')
    pod_json["id"] = pod_id
    client.add_pod(pod_json)
    shakedown.deployment_wait()
    tasks = get_pod_tasks(pod_id)
    assert len(tasks) == 2
    time.sleep(4)
    assert len(tasks) == 2
Esempio n. 38
0
def test_pod_container_network():
    """ Tests using "container" network (using default network "dcos")
    """
    client = marathon.create_client()
    pod_id = "/pod-container-net-{}".format(uuid.uuid4().hex)
    pod_json = _pods_json('pod-container-net.json')
    pod_json["id"] = pod_id

    client.add_pod(pod_json)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)

    network_info = tasks[0]['statuses'][0]['container_status']['network_infos'][0]
    assert network_info['name'] == "dcos"
    container_ip = network_info['ip_addresses'][0]['ip_address']
    assert container_ip is not None

    url = "http://{}:80/".format(container_ip)
    common.assert_http_code(url)
Esempio n. 39
0
def test_pin_pod():
    """ Tests that we can pin a pod to a host.
    """
    client = marathon.create_client()

    pod_id = "/pod-{}".format(uuid.uuid4().hex)

    pod_json = _pods_json('pod-ports.json')
    pod_json["id"] = pod_id

    host = ip_other_than_mom()
    pin_pod_to_host(pod_json, host)
    client.add_pod(pod_json)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    assert len(tasks) == 2

    pod = client.list_pod()[0]
    assert pod['instances'][0]['agentHostname'] == host
Esempio n. 40
0
def test_pod_with_container_network():
    """Tests creation of a pod with a "container" network, and its HTTP endpoint accessibility."""

    pod_def = pods.container_net_pod()

    client = marathon.create_client()
    client.add_pod(pod_def)
    shakedown.deployment_wait()

    tasks = common.get_pod_tasks(pod_def["id"])

    network_info = tasks[0]['statuses'][0]['container_status'][
        'network_infos'][0]
    assert network_info['name'] == "dcos", \
        "The network name is {}, but 'dcos' was expected".format(network_info['name'])

    container_ip = network_info['ip_addresses'][0]['ip_address']
    assert container_ip is not None, "No IP address has been assigned to the pod's container"

    url = "http://{}:80/".format(container_ip)
    common.assert_http_code(url)
Esempio n. 41
0
 def wait_for_status_network_info():
     tasks = common.get_pod_tasks(pod_id)
     # the following command throws exceptions if there are no tasks in TASK_RUNNING state
     common.running_status_network_info(tasks[0]['statuses'])
Esempio n. 42
0
 def assert_all_pods_healthy(pod_id):
     tasks = common.get_pod_tasks(pod_id)
     for task in tasks:
         health = common.running_task_status(task['statuses'])['healthy']
         assert health, "One of the pod's tasks (%s) is unhealthy" % (task['name'])
Esempio n. 43
0
 def wait_for_status_network_info():
     tasks = common.get_pod_tasks(pod_id)
     # the following command throws exceptions if there are no tasks in TASK_RUNNING state
     common.running_status_network_info(tasks[0]['statuses'])
Esempio n. 44
0
def test_pod_with_persistent_volume_recovers():
    pod_def = pods.persistent_volume_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    assert len(tasks) == 2, "The number of pod tasks is {}, but is expected to be 2".format(len(tasks))

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def wait_for_status_network_info():
        tasks = common.get_pod_tasks(pod_id)
        # the following command throws exceptions if there are no tasks in TASK_RUNNING state
        common.running_status_network_info(tasks[0]['statuses'])

    wait_for_status_network_info()
    host = common.running_status_network_info(tasks[0]['statuses'])['ip_addresses'][0]['ip_address']

    task_id1 = tasks[0]['id']
    task_id2 = tasks[1]['id']

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def kill_task(host, pattern):
        pids = common.kill_process_on_host(host, pattern)
        assert len(pids) != 0, "no task got killed on {} for pattern {}".format(host, pattern)

    kill_task(host, '[h]ttp\\.server')

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def wait_for_pod_recovery():
        tasks = common.get_pod_tasks(pod_id)
        assert len(tasks) == 2, "The number of tasks is {} after recovery, but 2 was expected".format(len(tasks))

        old_task_ids = [task_id1, task_id2]
        new_task_id1 = tasks[0]['id']
        new_task_id2 = tasks[1]['id']

        assert new_task_id1 not in old_task_ids, \
            "The task ID has not changed, and is still {}".format(new_task_id1)
        assert new_task_id2 not in old_task_ids, \
            "The task ID has not changed, and is still {}".format(new_task_id2)

    wait_for_pod_recovery()
    wait_for_status_network_info()

    tasks = common.get_pod_tasks(pod_id)
    assert host == common.running_status_network_info(tasks[0]['statuses'])['ip_addresses'][0]['ip_address'], \
        "the pod has been restarted on another host"

    port1 = tasks[0]['discovery']['ports']['ports'][0]["number"]
    port2 = tasks[1]['discovery']['ports']['ports'][0]["number"]
    path1 = tasks[0]['container']['volumes'][0]['container_path']
    path2 = tasks[1]['container']['volumes'][0]['container_path']
    print(host, port1, port2, path1, path2)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_data(port, path):
        cmd = "curl {}:{}/{}/foo".format(host, port, path)
        run, data = shakedown.run_command_on_master(cmd)
        assert run, "{} did not succeed".format(cmd)
        assert 'hello\nhello\n' in data, "'hello\nhello\n' not found in '{}'n".format(data)

    check_data(port1, path1)
    check_data(port2, path2)
Esempio n. 45
0
 def assert_all_pods_healthy(pod_id):
     tasks = common.get_pod_tasks(pod_id)
     for task in tasks:
         health = common.running_task_status(task['statuses'])['healthy']
         assert health, "One of the pod's tasks (%s) is unhealthy" % (task['name'])
Esempio n. 46
0
def test_pod_with_persistent_volume_recovers():
    pod_def = pods.persistent_volume_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    assert len(tasks) == 2, "The number of pod tasks is {}, but is expected to be 2".format(len(tasks))

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def wait_for_status_network_info():
        tasks = common.get_pod_tasks(pod_id)
        # the following command throws exceptions if there are no tasks in TASK_RUNNING state
        common.running_status_network_info(tasks[0]['statuses'])

    wait_for_status_network_info()
    host = common.running_status_network_info(tasks[0]['statuses'])['ip_addresses'][0]['ip_address']

    task_id1 = tasks[0]['id']
    task_id2 = tasks[1]['id']
    port1 = tasks[0]['discovery']['ports']['ports'][0]["number"]
    path1 = tasks[0]['container']['volumes'][0]['container_path']

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_data(port, path, expected):
        cmd = "curl {}:{}/{}/foo".format(host, port, path)
        run, data = run_command_on_master(cmd)
        assert run, "{} did not succeed".format(cmd)
        assert expected in data, "{} not found in '{}'n".format(expected, data)

    # Container with the name 'container1' appends its taskId to the file. So we search for the
    # taskId of that container which is not always the tasks[0]
    expected_data1 = next((t['id'] for t in tasks if t['name'] == 'container1'), None)
    assert expected_data1, f"Hasn't found a container with the name 'container1' in the pod {tasks}"

    check_data(port1, path1, expected_data1)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def kill_task(host, pattern):
        pids = common.kill_process_on_host(host, pattern)
        assert len(pids) != 0, "no task got killed on {} for pattern {}".format(host, pattern)

    kill_task(host, '[h]ttp\\.server')

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def wait_for_pod_recovery():
        tasks = common.get_pod_tasks(pod_id)
        assert len(tasks) == 2, "The number of tasks is {} after recovery, but 2 was expected".format(len(tasks))

        old_task_ids = [task_id1, task_id2]
        new_task_id1 = tasks[0]['id']
        new_task_id2 = tasks[1]['id']

        assert new_task_id1 not in old_task_ids, \
            "The task ID has not changed, and is still {}".format(new_task_id1)
        assert new_task_id2 not in old_task_ids, \
            "The task ID has not changed, and is still {}".format(new_task_id2)

    wait_for_pod_recovery()
    wait_for_status_network_info()

    tasks = common.get_pod_tasks(pod_id)
    assert host == common.running_status_network_info(tasks[0]['statuses'])['ip_addresses'][0]['ip_address'], \
        "the pod has been restarted on another host"

    port1 = tasks[0]['discovery']['ports']['ports'][0]["number"]
    port2 = tasks[1]['discovery']['ports']['ports'][0]["number"]
    path1 = tasks[0]['container']['volumes'][0]['container_path']
    path2 = tasks[1]['container']['volumes'][0]['container_path']
    logger.info('Deployd two containers on {}:{}/{} and {}:{}/{}'.format(host, port1, path1, host, port2, path2))

    # Container with the name 'container1' appends its taskId to the file. So we search for the
    # taskId of that container which is not always the tasks[0]
    expected_data2 = next((t['id'] for t in tasks if t['name'] == 'container1'), None)
    assert expected_data2, f"Hasn't found a container with the name 'container1' in the pod {tasks}"

    check_data(port1, path1, f"{expected_data1}\n{expected_data2}\n")
    check_data(port2, path2, f"{expected_data1}\n{expected_data2}\n")
Esempio n. 47
0
def test_pod_with_persistent_volume_recovers():
    pod_def = pods.persistent_volume_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    assert len(
        tasks
    ) == 2, "The number of pod tasks is {}, but is expected to be 2".format(
        len(tasks))

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def wait_for_status_network_info():
        tasks = common.get_pod_tasks(pod_id)
        # the following command throws exceptions if there are no tasks in TASK_RUNNING state
        common.running_status_network_info(tasks[0]['statuses'])

    wait_for_status_network_info()
    host = common.running_status_network_info(
        tasks[0]['statuses'])['ip_addresses'][0]['ip_address']

    task_id1 = tasks[0]['id']
    task_id2 = tasks[1]['id']

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def kill_task(host, pattern):
        pids = common.kill_process_on_host(host, pattern)
        assert len(
            pids) != 0, "no task got killed on {} for pattern {}".format(
                host, pattern)

    kill_task(host, '[h]ttp\\.server')

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def wait_for_pod_recovery():
        tasks = common.get_pod_tasks(pod_id)
        assert len(
            tasks
        ) == 2, "The number of tasks is {} after recovery, but 2 was expected".format(
            len(tasks))

        old_task_ids = [task_id1, task_id2]
        new_task_id1 = tasks[0]['id']
        new_task_id2 = tasks[1]['id']

        assert new_task_id1 not in old_task_ids, \
            "The task ID has not changed, and is still {}".format(new_task_id1)
        assert new_task_id2 not in old_task_ids, \
            "The task ID has not changed, and is still {}".format(new_task_id2)

    wait_for_pod_recovery()
    wait_for_status_network_info()

    tasks = common.get_pod_tasks(pod_id)
    assert host == common.running_status_network_info(tasks[0]['statuses'])['ip_addresses'][0]['ip_address'], \
        "the pod has been restarted on another host"

    port1 = tasks[0]['discovery']['ports']['ports'][0]["number"]
    port2 = tasks[1]['discovery']['ports']['ports'][0]["number"]
    path1 = tasks[0]['container']['volumes'][0]['container_path']
    path2 = tasks[1]['container']['volumes'][0]['container_path']
    print(host, port1, port2, path1, path2)

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_data(port, path):
        cmd = "curl {}:{}/{}/foo".format(host, port, path)
        run, data = shakedown.run_command_on_master(cmd)
        assert run, "{} did not succeed".format(cmd)
        assert 'hello\nhello\n' in data, "'hello\nhello\n' not found in '{}'n".format(
            data)

    check_data(port1, path1)
    check_data(port2, path2)