def test_pod_health_failed_check(): """Deploys a pod with correct health checks, then partitions the network and verifies that the tasks get restarted with new task IDs. """ pod_def = pods.ports_pod() pod_id = pod_def['id'] host = common.ip_other_than_mom() common.pin_pod_to_host(pod_def, host) client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) tasks = common.get_pod_tasks(pod_id) initial_id1 = tasks[0]['id'] initial_id2 = tasks[1]['id'] pod = client.list_pod()[0] container1 = pod['instances'][0]['containers'][0] port = container1['endpoints'][0]['allocatedHostPort'] common.block_iptable_rules_for_seconds(host, port, 7, block_input=True, block_output=False) common.deployment_wait(service_id=pod_id) tasks = common.get_pod_tasks(pod_id) for new_task in tasks: new_task_id = new_task['id'] assert new_task_id != initial_id1, f"Task {new_task_id} has not been restarted" # NOQA E999 assert new_task_id != initial_id2, f"Task {new_task_id} has not been restarted"
def test_two_pods_with_shared_volume(): """Confirms that 1 container can read data in a volume that was written from the other container. The reading container fails if it can't read the file. So if there are 2 tasks after 4 seconds we are good. """ pod_def = pods.ephemeral_volume_pod() pod_id = pod_def['id'] client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) tasks = common.get_pod_tasks(pod_id) assert len( tasks ) == 2, "The number of tasks is {} after deployment, but 2 was expected".format( len(tasks)) time.sleep(4) tasks = common.get_pod_tasks(pod_id) assert len( tasks ) == 2, "The number of tasks is {} after sleeping, but 2 was expected".format( len(tasks))
def test_health_failed_check(): """ Deploys a pod with good health checks, then partitions the network and verifies the tasks return with new task ids. """ client = marathon.create_client() pod_id = "/pod-ken".format(uuid.uuid4().hex) pod_json = _pods_json('pod-ports.json') pod_json["id"] = pod_id host = ip_other_than_mom() pin_pod_to_host(pod_json, host) client.add_pod(pod_json) shakedown.deployment_wait() tasks = get_pod_tasks(pod_id) initial_id1 = tasks[0]['id'] initial_id2 = tasks[1]['id'] pod = client.list_pod()[0] container1 = pod['instances'][0]['containers'][0] port = container1['endpoints'][0]['allocatedHostPort'] save_iptables(host) block_port(host, port) time.sleep(7) restore_iptables(host) shakedown.deployment_wait() tasks = get_pod_tasks(pod_id) for task in tasks: assert task['id'] != initial_id1 assert task['id'] != initial_id2
def test_pod_restarts_on_nonzero_exit_code(): """Verifies that a pod get restarted in case one of its containers exits with a non-zero code. As a result, after restart, there should be two new tasks for different IDs. """ pod_def = pods.simple_pod() pod_id = pod_def['id'] pod_def["scaling"]["instances"] = 1 pod_def['containers'][0]['exec']['command'][ 'shell'] = 'sleep 5; echo -n leaving; exit 2' client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) tasks = common.get_pod_tasks(pod_id) initial_id1 = tasks[0]['id'] initial_id2 = tasks[1]['id'] time.sleep( 6) # 1 sec past the 5 sec sleep in one of the container's command tasks = common.get_pod_tasks(pod_id) for task in tasks: assert task['id'] != initial_id1, "Got the same task ID" assert task['id'] != initial_id2, "Got the same task ID"
def test_pod_health_failed_check(): """Deploys a pod with correct health checks, then partitions the network and verifies that the tasks get restarted with new task IDs. """ pod_def = pods.ports_pod() pod_id = pod_def['id'] host = common.ip_other_than_mom() common.pin_pod_to_host(pod_def, host) client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) tasks = common.get_pod_tasks(pod_id) initial_id1 = tasks[0]['id'] initial_id2 = tasks[1]['id'] pod = client.list_pod()[0] container1 = pod['instances'][0]['containers'][0] port = container1['endpoints'][0]['allocatedHostPort'] common.save_iptables(host) common.block_port(host, port) time.sleep(7) common.restore_iptables(host) common.deployment_wait(service_id=pod_id) tasks = common.get_pod_tasks(pod_id) for task in tasks: assert task[ 'id'] != initial_id1, "One of the tasks has not been restarted" assert task[ 'id'] != initial_id2, "One of the tasks has not been restarted"
def test_pod_restarts_on_nonzero_exit(): """ Confirm that pods will relaunch if 1 of the containers exits non-zero. 2 new tasks with new task_ids will result. """ client = marathon.create_client() pod_id = "/pod-{}".format(uuid.uuid4().hex) pod_json = _pods_json() pod_json["id"] = pod_id pod_json["scaling"]["instances"] = 1 pod_json['containers'][0]['exec']['command'][ 'shell'] = 'sleep 5; echo -n leaving; exit 2' client.add_pod(pod_json) shakedown.deployment_wait() # tasks = get_pod_tasks(pod_id) initial_id1 = tasks[0]['id'] initial_id2 = tasks[1]['id'] time.sleep(6) # 1 sec past the 5 sec sleep in test containers command tasks = get_pod_tasks(pod_id) for task in tasks: assert task['id'] != initial_id1 assert task['id'] != initial_id2
def test_pod_with_persistent_volume(): pod_def = pods.persistent_volume_pod() pod_id = pod_def['id'] client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) tasks = common.get_pod_tasks(pod_id) host = common.running_status_network_info( tasks[0]['statuses'])['ip_addresses'][0]['ip_address'] port1 = tasks[0]['discovery']['ports']['ports'][0]["number"] port2 = tasks[1]['discovery']['ports']['ports'][0]["number"] path1 = tasks[0]['container']['volumes'][0]['container_path'] path2 = tasks[1]['container']['volumes'][0]['container_path'] print(host, port1, port2, path1, path2) @retrying.retry(wait_fixed=1000, stop_max_attempt_number=60, retry_on_exception=common.ignore_exception) def check_http_endpoint(port, path): cmd = "curl {}:{}/{}/foo".format(host, port, path) run, data = shakedown.run_command_on_master(cmd) assert run, "{} did not succeed".format(cmd) assert data == 'hello\n', "'{}' was not equal to hello\\n".format(data) check_http_endpoint(port1, path1) check_http_endpoint(port2, path2)
def test_pod_with_container_network(): """Tests creation of a pod with a "container" network, and its HTTP endpoint accessibility.""" pod_def = pods.container_net_pod() pod_id = pod_def['id'] # In strict mode all tasks are started as user `nobody` by default and `nobody` # doesn't have permissions to write to /var/log within the container. if shakedown.ee_version() == 'strict': pod_def['user'] = '******' common.add_dcos_marathon_user_acls() client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) tasks = common.get_pod_tasks(pod_id) network_info = tasks[0]['statuses'][0]['container_status'][ 'network_infos'][0] assert network_info['name'] == "dcos", \ "The network name is {}, but 'dcos' was expected".format(network_info['name']) container_ip = network_info['ip_addresses'][0]['ip_address'] assert container_ip is not None, "No IP address has been assigned to the pod's container" url = "http://{}:80/".format(container_ip) common.assert_http_code(url)
def test_pod_with_container_bridge_network(): """Tests creation of a pod with a "container/bridge" network, and its HTTP endpoint accessibility.""" pod_def = pods.container_bridge_pod() pod_id = pod_def['id'] # In strict mode all tasks are started as user `nobody` by default and `nobody` # doesn't have permissions to write to /var/log within the container. if shakedown.ee_version() == 'strict': pod_def['user'] = '******' common.add_dcos_marathon_user_acls() client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) task = common.get_pod_tasks(pod_id)[0] network_info = task['statuses'][0]['container_status']['network_infos'][0] assert network_info['name'] == "mesos-bridge", \ "The network is {}, but mesos-bridge was expected".format(network_info['name']) # get the port on the host port = task['discovery']['ports']['ports'][0]['number'] # the agent IP:port will be routed to the bridge IP:port # test against the agent_ip, however it is hard to get.. translating from # slave_id agent_ip = common.agent_hostname_by_id(task['slave_id']) assert agent_ip is not None, "Failed to get the agent IP address" container_ip = network_info['ip_addresses'][0]['ip_address'] assert agent_ip != container_ip, "The container IP address is the same as the agent one" url = "http://{}:{}/".format(agent_ip, port) common.assert_http_code(url)
def test_pod_container_bridge(): """ Tests using "container" network (using default network "dcos") """ client = marathon.create_client() pod_id = "/pod-container-bridge-{}".format(uuid.uuid4().hex) pod_json = _pods_json('pod-container-bridge.json') pod_json["id"] = pod_id client.add_pod(pod_json) shakedown.deployment_wait() task = get_pod_tasks(pod_id)[0] network_info = task['statuses'][0]['container_status']['network_infos'][0] assert network_info['name'] == "mesos-bridge" # port on the host port = task['discovery']['ports']['ports'][0]['number'] # the agent IP:port will be routed to the bridge IP:port # test against the agent_ip, however it is hard to get.. translating from # slave_id agent_ip = common.agent_hostname_by_id(task['slave_id']) assert agent_ip is not None container_ip = network_info['ip_addresses'][0]['ip_address'] assert agent_ip != container_ip # assert container_ip is not None # url = "http://{}:{}/".format(agent_ip, port) common.assert_http_code(url)
def test_pod_with_container_bridge_network(): """Tests creation of a pod with a "container/bridge" network, and its HTTP endpoint accessibility.""" pod_def = pods.container_bridge_pod() pod_id = pod_def['id'] # In strict mode all tasks are started as user `nobody` by default and `nobody` # doesn't have permissions to write to /var/log within the container. if shakedown.ee_version() == 'strict': pod_def['user'] = '******' common.add_dcos_marathon_user_acls() client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) task = common.task_by_name(common.get_pod_tasks(pod_id), "nginx") network_info = common.running_status_network_info(task['statuses']) assert network_info['name'] == "mesos-bridge", \ "The network is {}, but mesos-bridge was expected".format(network_info['name']) # get the port on the host port = task['discovery']['ports']['ports'][0]['number'] # the agent IP:port will be routed to the bridge IP:port # test against the agent_ip, however it is hard to get.. translating from # slave_id agent_ip = common.agent_hostname_by_id(task['slave_id']) assert agent_ip is not None, "Failed to get the agent IP address" container_ip = network_info['ip_addresses'][0]['ip_address'] assert agent_ip != container_ip, "The container IP address is the same as the agent one" url = "http://{}:{}/".format(agent_ip, port) common.assert_http_code(url)
def test_pod_with_persistent_volume(): pod_def = pods.persistent_volume_pod() pod_id = pod_def['id'] client = marathon.create_client() client.add_pod(pod_def) deployment_wait(service_id=pod_id) tasks = common.get_pod_tasks(pod_id) host = common.running_status_network_info(tasks[0]['statuses'])['ip_addresses'][0]['ip_address'] # Container with the name 'container1' appends its taskId to the file. So we search for the # taskId of that container which is not always the tasks[0] expected_data = next((t['id'] for t in tasks if t['name'] == 'container1'), None) assert expected_data, f"Hasn't found a container with the name 'container1' in the pod {tasks}" port1 = tasks[0]['discovery']['ports']['ports'][0]["number"] port2 = tasks[1]['discovery']['ports']['ports'][0]["number"] path1 = tasks[0]['container']['volumes'][0]['container_path'] path2 = tasks[1]['container']['volumes'][0]['container_path'] logger.info('Deployd two containers on {}:{}/{} and {}:{}/{}'.format(host, port1, path1, host, port2, path2)) @retrying.retry(wait_fixed=1000, stop_max_attempt_number=60, retry_on_exception=common.ignore_exception) def check_http_endpoint(port, path, expected): cmd = "curl {}:{}/{}/foo".format(host, port, path) run, data = run_command_on_master(cmd) assert run, "{} did not succeed".format(cmd) assert expected in data, "'{}' was not found in '{}'".format(data, expected) check_http_endpoint(port1, path1, expected_data) check_http_endpoint(port2, path2, expected_data)
def test_pod_with_container_network(): """Tests creation of a pod with a "container" network, and its HTTP endpoint accessibility.""" pod_def = pods.container_net_pod() pod_id = pod_def['id'] # In strict mode all tasks are started as user `nobody` by default and `nobody` # doesn't have permissions to write to /var/log within the container. if shakedown.ee_version() == 'strict': pod_def['user'] = '******' common.add_dcos_marathon_user_acls() client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) task = common.task_by_name(common.get_pod_tasks(pod_id), "nginx") network_info = common.running_status_network_info(task['statuses']) assert network_info['name'] == "dcos", \ "The network name is {}, but 'dcos' was expected".format(network_info['name']) container_ip = network_info['ip_addresses'][0]['ip_address'] assert container_ip is not None, "No IP address has been assigned to the pod's container" url = "http://{}:80/".format(container_ip) common.assert_http_code(url)
def test_pod_with_persistent_volume(): pod_def = pods.persistent_volume_pod() pod_id = pod_def['id'] client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) tasks = common.get_pod_tasks(pod_id) host = common.running_status_network_info(tasks[0]['statuses'])['ip_addresses'][0]['ip_address'] port1 = tasks[0]['discovery']['ports']['ports'][0]["number"] port2 = tasks[1]['discovery']['ports']['ports'][0]["number"] path1 = tasks[0]['container']['volumes'][0]['container_path'] path2 = tasks[1]['container']['volumes'][0]['container_path'] print(host, port1, port2, path1, path2) @retrying.retry(wait_fixed=1000, stop_max_attempt_number=60, retry_on_exception=common.ignore_exception) def check_http_endpoint(port, path): cmd = "curl {}:{}/{}/foo".format(host, port, path) run, data = shakedown.run_command_on_master(cmd) assert run, "{} did not succeed".format(cmd) assert data == 'hello\n', "'{}' was not equal to hello\\n".format(data) check_http_endpoint(port1, path1) check_http_endpoint(port2, path2)
def test_pod_with_persistent_volume(): pod_def = pods.persistent_volume_pod() pod_id = pod_def['id'] client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) tasks = common.get_pod_tasks(pod_id) host = common.running_status_network_info( tasks[0]['statuses'])['ip_addresses'][0]['ip_address'] port1 = tasks[0]['discovery']['ports']['ports'][0]["number"] port2 = tasks[1]['discovery']['ports']['ports'][0]["number"] dir1 = tasks[0]['container']['volumes'][0]['container_path'] dir2 = tasks[1]['container']['volumes'][0]['container_path'] print(host, port1, port2, dir1, dir2) time.sleep(1) cmd = "curl {}:{}/{}/foo".format(host, port1, dir1) run, data = shakedown.run_command_on_master(cmd) assert run, "{} did not succeed".format(cmd) assert data == 'hello\n', "'{}' was not equal to hello\\n".format(data) cmd = "curl {}:{}/{}/foo".format(host, port2, dir2) run, data = shakedown.run_command_on_master(cmd) assert run, "{} did not succeed".format(cmd) assert data == 'hello\n', "'{}' was not equal to hello\\n".format(data)
def test_pod_with_container_bridge_network(): """Tests creation of a pod with a "container/bridge" network, and its HTTP endpoint accessibility.""" pod_def = pods.container_bridge_pod() client = marathon.create_client() client.add_pod(pod_def) shakedown.deployment_wait() task = common.get_pod_tasks(pod_def["id"])[0] network_info = task['statuses'][0]['container_status']['network_infos'][0] assert network_info['name'] == "mesos-bridge", \ "The network is {}, but mesos-bridge was expected".format(network_info['name']) # get the port on the host port = task['discovery']['ports']['ports'][0]['number'] # the agent IP:port will be routed to the bridge IP:port # test against the agent_ip, however it is hard to get.. translating from # slave_id agent_ip = common.agent_hostname_by_id(task['slave_id']) assert agent_ip is not None, "Failed to get the agent IP address" container_ip = network_info['ip_addresses'][0]['ip_address'] assert agent_ip != container_ip, "The container IP address is the same as the agent one" url = "http://{}:{}/".format(agent_ip, port) common.assert_http_code(url)
def test_two_pods_with_shared_volume(): """Confirms that 1 container can read data in a volume that was written from the other container. The reading container fails if it can't read the file. So if there are 2 tasks after 4 seconds we are good. """ pod_def = pods.ephemeral_volume_pod() pod_id = pod_def['id'] client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) tasks = common.get_pod_tasks(pod_id) assert len(tasks) == 2, "The number of tasks is {} after deployment, but 2 was expected".format(len(tasks)) time.sleep(4) tasks = common.get_pod_tasks(pod_id) assert len(tasks) == 2, "The number of tasks is {} after sleeping, but 2 was expected".format(len(tasks))
def wait_for_pod_recovery(): tasks = common.get_pod_tasks(pod_id) assert len(tasks) == 2, "The number of tasks is {} after recovery, but 2 was expected".format(len(tasks)) old_task_ids = [task_id1, task_id2] new_task_id1 = tasks[0]['id'] new_task_id2 = tasks[1]['id'] assert new_task_id1 not in old_task_ids, \ "The task ID has not changed, and is still {}".format(new_task_id1) assert new_task_id2 not in old_task_ids, \ "The task ID has not changed, and is still {}".format(new_task_id2)
def test_pod_health_check(): """Tests that health checks work for pods.""" pod_def = pods.ports_pod() pod_id = pod_def['id'] client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) tasks = common.get_pod_tasks(pod_id) for task in tasks: health = common.running_task_status(task['statuses'])['healthy'] assert health, "One of the pod's tasks (%s) is unhealthy" % (task['name'])
def test_pod_restarts_on_nonzero_exit_code(): """Verifies that a pod get restarted in case one of its containers exits with a non-zero code. As a result, after restart, there should be two new tasks for different IDs. """ pod_def = pods.simple_pod() pod_id = pod_def['id'] pod_def["scaling"]["instances"] = 1 pod_def['containers'][0]['exec']['command']['shell'] = 'sleep 5; echo -n leaving; exit 2' client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) tasks = common.get_pod_tasks(pod_id) initial_id1 = tasks[0]['id'] initial_id2 = tasks[1]['id'] time.sleep(6) # 1 sec past the 5 sec sleep in one of the container's command tasks = common.get_pod_tasks(pod_id) for task in tasks: assert task['id'] != initial_id1, "Got the same task ID" assert task['id'] != initial_id2, "Got the same task ID"
def test_pod_health_check(): """Tests that health checks work for pods.""" pod_def = pods.ports_pod() client = marathon.create_client() client.add_pod(pod_def) shakedown.deployment_wait() tasks = common.get_pod_tasks(pod_def["id"]) c1_health = tasks[0]['statuses'][0]['healthy'] c2_health = tasks[1]['statuses'][0]['healthy'] assert c1_health, "One of the pod's tasks is unhealthy" assert c2_health, "One of the pod's tasks is unhealthy"
def test_pod_restarts_on_nonzero_exit(): """ Confirm that pods will relaunch if 1 of the containers exits non-zero. 2 new tasks with new task_ids will result. """ client = marathon.create_client() pod_id = "/pod-{}".format(uuid.uuid4().hex) pod_json = _pods_json() pod_json["id"] = pod_id pod_json["scaling"]["instances"] = 1 pod_json['containers'][0]['exec']['command']['shell'] = 'sleep 5; echo -n leaving; exit 2' client.add_pod(pod_json) shakedown.deployment_wait() # tasks = get_pod_tasks(pod_id) initial_id1 = tasks[0]['id'] initial_id2 = tasks[1]['id'] time.sleep(6) # 1 sec past the 5 sec sleep in test containers command tasks = get_pod_tasks(pod_id) for task in tasks: assert task['id'] != initial_id1 assert task['id'] != initial_id2
def test_pin_pod(): """Tests that a pod can be pinned to a specific host.""" pod_def = pods.ports_pod() pod_id = pod_def['id'] host = common.ip_other_than_mom() common.pin_pod_to_host(pod_def, host) client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) tasks = common.get_pod_tasks(pod_id) assert len(tasks) == 2, "The number of tasks is {} after deployment, but 2 was expected".format(len(tasks)) pod = client.list_pod()[0] assert pod['instances'][0]['agentHostname'] == host, "The pod didn't get pinned to {}".format(host)
def test_pod_port_communication(): """ Test that 1 container can establish a socket connection to the other container in the same pod. """ client = marathon.create_client() pod_id = "/pod-{}".format(uuid.uuid4().hex) pod_json = _pods_json('pod-ports.json') pod_json["id"] = pod_id # sleeps 2, then container 2 checks communication with container 1. # if that timesout, the task completes resulting in 1 container running # otherwise it is expected that 2 containers are running. pod_json['containers'][1]['exec']['command']['shell'] = 'sleep 2; curl -m 2 localhost:$ENDPOINT_HTTPENDPOINT; if [ $? -eq 7 ]; then exit; fi; /opt/mesosphere/bin/python -m http.server $ENDPOINT_HTTPENDPOINT2' # NOQA client.add_pod(pod_json) shakedown.deployment_wait() tasks = get_pod_tasks(pod_id) assert len(tasks) == 2
def test_health_check(): """ Tests that health checks work in pods. """ client = marathon.create_client() pod_id = "/pod-{}".format(uuid.uuid4().hex) pod_json = _pods_json('pod-ports.json') pod_json["id"] = pod_id client.add_pod(pod_json) shakedown.deployment_wait() tasks = get_pod_tasks(pod_id) c1_health = tasks[0]['statuses'][0]['healthy'] c2_health = tasks[1]['statuses'][0]['healthy'] assert c1_health assert c2_health
def test_pod_port_communication(): """ Test that 1 container can establish a socket connection to the other container in the same pod. """ pod_def = pods.ports_pod() pod_id = pod_def['id'] cmd = 'sleep 2; ' \ 'curl -m 2 localhost:$ENDPOINT_HTTPENDPOINT; ' \ 'if [ $? -eq 7 ]; then exit; fi; ' \ '/opt/mesosphere/bin/python -m http.server $ENDPOINT_HTTPENDPOINT2' pod_def['containers'][1]['exec']['command']['shell'] = cmd client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) tasks = common.get_pod_tasks(pod_id) assert len(tasks) == 2, "The number of tasks is {} after deployment, but 2 was expected".format(len(tasks))
def test_pod_comm_via_volume(): """ Confirms that 1 container can read data from a volume that was written from the other container. Most of the test is in the `vol-pods.json`. The reading container will die if it can't read the file. So if there are 2 tasks after 4 secs were are good. """ client = marathon.create_client() pod_id = "/pod-{}".format(uuid.uuid4().hex) # pods setup to have c1 write, ct2 read after 2 sec # there are 2 tasks, unless the file doesnt' exist, then there is 1 pod_json = _pods_json('vol-pods.json') pod_json["id"] = pod_id client.add_pod(pod_json) shakedown.deployment_wait() tasks = get_pod_tasks(pod_id) assert len(tasks) == 2 time.sleep(4) assert len(tasks) == 2
def test_pin_pod(): """ Tests that we can pin a pod to a host. """ client = marathon.create_client() pod_id = "/pod-{}".format(uuid.uuid4().hex) pod_json = _pods_json('pod-ports.json') pod_json["id"] = pod_id host = ip_other_than_mom() pin_pod_to_host(pod_json, host) client.add_pod(pod_json) shakedown.deployment_wait() tasks = get_pod_tasks(pod_id) assert len(tasks) == 2 pod = client.list_pod()[0] assert pod['instances'][0]['agentHostname'] == host
def test_pod_container_network(): """ Tests using "container" network (using default network "dcos") """ client = marathon.create_client() pod_id = "/pod-container-net-{}".format(uuid.uuid4().hex) pod_json = _pods_json('pod-container-net.json') pod_json["id"] = pod_id client.add_pod(pod_json) shakedown.deployment_wait() tasks = get_pod_tasks(pod_id) network_info = tasks[0]['statuses'][0]['container_status']['network_infos'][0] assert network_info['name'] == "dcos" container_ip = network_info['ip_addresses'][0]['ip_address'] assert container_ip is not None url = "http://{}:80/".format(container_ip) common.assert_http_code(url)
def test_pod_with_container_network(): """Tests creation of a pod with a "container" network, and its HTTP endpoint accessibility.""" pod_def = pods.container_net_pod() client = marathon.create_client() client.add_pod(pod_def) shakedown.deployment_wait() tasks = common.get_pod_tasks(pod_def["id"]) network_info = tasks[0]['statuses'][0]['container_status'][ 'network_infos'][0] assert network_info['name'] == "dcos", \ "The network name is {}, but 'dcos' was expected".format(network_info['name']) container_ip = network_info['ip_addresses'][0]['ip_address'] assert container_ip is not None, "No IP address has been assigned to the pod's container" url = "http://{}:80/".format(container_ip) common.assert_http_code(url)
def wait_for_status_network_info(): tasks = common.get_pod_tasks(pod_id) # the following command throws exceptions if there are no tasks in TASK_RUNNING state common.running_status_network_info(tasks[0]['statuses'])
def assert_all_pods_healthy(pod_id): tasks = common.get_pod_tasks(pod_id) for task in tasks: health = common.running_task_status(task['statuses'])['healthy'] assert health, "One of the pod's tasks (%s) is unhealthy" % (task['name'])
def test_pod_with_persistent_volume_recovers(): pod_def = pods.persistent_volume_pod() pod_id = pod_def['id'] client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) tasks = common.get_pod_tasks(pod_id) assert len(tasks) == 2, "The number of pod tasks is {}, but is expected to be 2".format(len(tasks)) @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def wait_for_status_network_info(): tasks = common.get_pod_tasks(pod_id) # the following command throws exceptions if there are no tasks in TASK_RUNNING state common.running_status_network_info(tasks[0]['statuses']) wait_for_status_network_info() host = common.running_status_network_info(tasks[0]['statuses'])['ip_addresses'][0]['ip_address'] task_id1 = tasks[0]['id'] task_id2 = tasks[1]['id'] @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def kill_task(host, pattern): pids = common.kill_process_on_host(host, pattern) assert len(pids) != 0, "no task got killed on {} for pattern {}".format(host, pattern) kill_task(host, '[h]ttp\\.server') @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def wait_for_pod_recovery(): tasks = common.get_pod_tasks(pod_id) assert len(tasks) == 2, "The number of tasks is {} after recovery, but 2 was expected".format(len(tasks)) old_task_ids = [task_id1, task_id2] new_task_id1 = tasks[0]['id'] new_task_id2 = tasks[1]['id'] assert new_task_id1 not in old_task_ids, \ "The task ID has not changed, and is still {}".format(new_task_id1) assert new_task_id2 not in old_task_ids, \ "The task ID has not changed, and is still {}".format(new_task_id2) wait_for_pod_recovery() wait_for_status_network_info() tasks = common.get_pod_tasks(pod_id) assert host == common.running_status_network_info(tasks[0]['statuses'])['ip_addresses'][0]['ip_address'], \ "the pod has been restarted on another host" port1 = tasks[0]['discovery']['ports']['ports'][0]["number"] port2 = tasks[1]['discovery']['ports']['ports'][0]["number"] path1 = tasks[0]['container']['volumes'][0]['container_path'] path2 = tasks[1]['container']['volumes'][0]['container_path'] print(host, port1, port2, path1, path2) @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_data(port, path): cmd = "curl {}:{}/{}/foo".format(host, port, path) run, data = shakedown.run_command_on_master(cmd) assert run, "{} did not succeed".format(cmd) assert 'hello\nhello\n' in data, "'hello\nhello\n' not found in '{}'n".format(data) check_data(port1, path1) check_data(port2, path2)
def test_pod_with_persistent_volume_recovers(): pod_def = pods.persistent_volume_pod() pod_id = pod_def['id'] client = marathon.create_client() client.add_pod(pod_def) deployment_wait(service_id=pod_id) tasks = common.get_pod_tasks(pod_id) assert len(tasks) == 2, "The number of pod tasks is {}, but is expected to be 2".format(len(tasks)) @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def wait_for_status_network_info(): tasks = common.get_pod_tasks(pod_id) # the following command throws exceptions if there are no tasks in TASK_RUNNING state common.running_status_network_info(tasks[0]['statuses']) wait_for_status_network_info() host = common.running_status_network_info(tasks[0]['statuses'])['ip_addresses'][0]['ip_address'] task_id1 = tasks[0]['id'] task_id2 = tasks[1]['id'] port1 = tasks[0]['discovery']['ports']['ports'][0]["number"] path1 = tasks[0]['container']['volumes'][0]['container_path'] @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_data(port, path, expected): cmd = "curl {}:{}/{}/foo".format(host, port, path) run, data = run_command_on_master(cmd) assert run, "{} did not succeed".format(cmd) assert expected in data, "{} not found in '{}'n".format(expected, data) # Container with the name 'container1' appends its taskId to the file. So we search for the # taskId of that container which is not always the tasks[0] expected_data1 = next((t['id'] for t in tasks if t['name'] == 'container1'), None) assert expected_data1, f"Hasn't found a container with the name 'container1' in the pod {tasks}" check_data(port1, path1, expected_data1) @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def kill_task(host, pattern): pids = common.kill_process_on_host(host, pattern) assert len(pids) != 0, "no task got killed on {} for pattern {}".format(host, pattern) kill_task(host, '[h]ttp\\.server') @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def wait_for_pod_recovery(): tasks = common.get_pod_tasks(pod_id) assert len(tasks) == 2, "The number of tasks is {} after recovery, but 2 was expected".format(len(tasks)) old_task_ids = [task_id1, task_id2] new_task_id1 = tasks[0]['id'] new_task_id2 = tasks[1]['id'] assert new_task_id1 not in old_task_ids, \ "The task ID has not changed, and is still {}".format(new_task_id1) assert new_task_id2 not in old_task_ids, \ "The task ID has not changed, and is still {}".format(new_task_id2) wait_for_pod_recovery() wait_for_status_network_info() tasks = common.get_pod_tasks(pod_id) assert host == common.running_status_network_info(tasks[0]['statuses'])['ip_addresses'][0]['ip_address'], \ "the pod has been restarted on another host" port1 = tasks[0]['discovery']['ports']['ports'][0]["number"] port2 = tasks[1]['discovery']['ports']['ports'][0]["number"] path1 = tasks[0]['container']['volumes'][0]['container_path'] path2 = tasks[1]['container']['volumes'][0]['container_path'] logger.info('Deployd two containers on {}:{}/{} and {}:{}/{}'.format(host, port1, path1, host, port2, path2)) # Container with the name 'container1' appends its taskId to the file. So we search for the # taskId of that container which is not always the tasks[0] expected_data2 = next((t['id'] for t in tasks if t['name'] == 'container1'), None) assert expected_data2, f"Hasn't found a container with the name 'container1' in the pod {tasks}" check_data(port1, path1, f"{expected_data1}\n{expected_data2}\n") check_data(port2, path2, f"{expected_data1}\n{expected_data2}\n")
def test_pod_with_persistent_volume_recovers(): pod_def = pods.persistent_volume_pod() pod_id = pod_def['id'] client = marathon.create_client() client.add_pod(pod_def) common.deployment_wait(service_id=pod_id) tasks = common.get_pod_tasks(pod_id) assert len( tasks ) == 2, "The number of pod tasks is {}, but is expected to be 2".format( len(tasks)) @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def wait_for_status_network_info(): tasks = common.get_pod_tasks(pod_id) # the following command throws exceptions if there are no tasks in TASK_RUNNING state common.running_status_network_info(tasks[0]['statuses']) wait_for_status_network_info() host = common.running_status_network_info( tasks[0]['statuses'])['ip_addresses'][0]['ip_address'] task_id1 = tasks[0]['id'] task_id2 = tasks[1]['id'] @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def kill_task(host, pattern): pids = common.kill_process_on_host(host, pattern) assert len( pids) != 0, "no task got killed on {} for pattern {}".format( host, pattern) kill_task(host, '[h]ttp\\.server') @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def wait_for_pod_recovery(): tasks = common.get_pod_tasks(pod_id) assert len( tasks ) == 2, "The number of tasks is {} after recovery, but 2 was expected".format( len(tasks)) old_task_ids = [task_id1, task_id2] new_task_id1 = tasks[0]['id'] new_task_id2 = tasks[1]['id'] assert new_task_id1 not in old_task_ids, \ "The task ID has not changed, and is still {}".format(new_task_id1) assert new_task_id2 not in old_task_ids, \ "The task ID has not changed, and is still {}".format(new_task_id2) wait_for_pod_recovery() wait_for_status_network_info() tasks = common.get_pod_tasks(pod_id) assert host == common.running_status_network_info(tasks[0]['statuses'])['ip_addresses'][0]['ip_address'], \ "the pod has been restarted on another host" port1 = tasks[0]['discovery']['ports']['ports'][0]["number"] port2 = tasks[1]['discovery']['ports']['ports'][0]["number"] path1 = tasks[0]['container']['volumes'][0]['container_path'] path2 = tasks[1]['container']['volumes'][0]['container_path'] print(host, port1, port2, path1, path2) @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_data(port, path): cmd = "curl {}:{}/{}/foo".format(host, port, path) run, data = shakedown.run_command_on_master(cmd) assert run, "{} did not succeed".format(cmd) assert 'hello\nhello\n' in data, "'hello\nhello\n' not found in '{}'n".format( data) check_data(port1, path1) check_data(port2, path2)