Example #1
0
def test_health_failed_check():
    """ Deploys a pod with good health checks, then partitions the network and verifies
        the tasks return with new task ids.
    """
    client = marathon.create_client()

    pod_id = "/pod-ken".format(uuid.uuid4().hex)

    pod_json = _pods_json('pod-ports.json')
    pod_json["id"] = pod_id
    host = ip_other_than_mom()
    pin_pod_to_host(pod_json, host)
    client.add_pod(pod_json)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    pod = client.list_pod()[0]
    container1 = pod['instances'][0]['containers'][0]
    port = container1['endpoints'][0]['allocatedHostPort']

    save_iptables(host)
    block_port(host, port)
    time.sleep(7)
    restore_iptables(host)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    for task in tasks:
        assert task['id'] != initial_id1
        assert task['id'] != initial_id2
Example #2
0
def test_pod_health_failed_check():
    """Deploys a pod with correct health checks, then partitions the network and verifies that
       the tasks get restarted with new task IDs.
    """

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    host = common.ip_other_than_mom()
    common.pin_pod_to_host(pod_def, host)

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    pod = client.list_pod()[0]
    container1 = pod['instances'][0]['containers'][0]
    port = container1['endpoints'][0]['allocatedHostPort']

    common.save_iptables(host)
    common.block_port(host, port)
    time.sleep(7)
    common.restore_iptables(host)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    for task in tasks:
        assert task[
            'id'] != initial_id1, "One of the tasks has not been restarted"
        assert task[
            'id'] != initial_id2, "One of the tasks has not been restarted"
Example #3
0
def test_health_failed_check():
    """ Deploys a pod with good health checks, then partitions the network and verifies
        the tasks return with new task ids.
    """
    client = marathon.create_client()

    pod_id = "/pod-ken".format(uuid.uuid4().hex)

    pod_json = _pods_json('pod-ports.json')
    pod_json["id"] = pod_id
    host = ip_other_than_mom()
    pin_pod_to_host(pod_json, host)
    client.add_pod(pod_json)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    pod = client.list_pod()[0]
    container1 = pod['instances'][0]['containers'][0]
    port = container1['endpoints'][0]['allocatedHostPort']

    save_iptables(host)
    block_port(host, port)
    time.sleep(7)
    restore_iptables(host)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    for task in tasks:
        assert task['id'] != initial_id1
        assert task['id'] != initial_id2
Example #4
0
def test_health_failed_check():
    """ Tests a health check of an app launched by marathon.
        The health check succeeded, then failed due to a network partition.
    """
    client = marathon.create_client()
    app_def = python_http_app()
    health_list = []
    health_list.append(health_check())
    app_def['id'] = 'healthy'
    app_def['healthChecks'] = health_list

    pin_to_host(app_def, ip_other_than_mom())

    client.add_app(app_def)
    shakedown.deployment_wait()

    # healthy
    app = client.get_app('/healthy')
    assert app['tasksRunning'] == 1
    assert app['tasksHealthy'] == 1

    tasks = client.get_tasks('/healthy')
    host = tasks[0]['host']
    port = tasks[0]['ports'][0]

    # prefer to break at the agent (having issues)
    mom_ip = ip_of_mom()
    shakedown.save_iptables(host)
    block_port(host, port)
    time.sleep(7)
    restore_iptables(host)
    shakedown.deployment_wait()

    # after network failure is restored.  The task returns and is a new task ID
    @retrying.retry(wait_fixed=1000,
                    stop_max_delay=3000,
                    retry_on_exception=ignore_on_exception)
    def check_health_message():
        new_tasks = client.get_tasks('/healthy')
        assert new_tasks[0]['id'] != tasks[0]['id']
        app = client.get_app('/healthy')
        assert app['tasksRunning'] == 1
        assert app['tasksHealthy'] == 1

    check_health_message()
def test_health_failed_check():
    """ Tests a health check of an app launched by marathon.
        The health check succeeded, then failed due to a network partition.
    """
    client = marathon.create_client()
    app_def = python_http_app()
    health_list = []
    health_list.append(health_check())
    app_def['id'] = 'healthy'
    app_def['healthChecks'] = health_list

    pin_to_host(app_def, ip_other_than_mom())

    client.add_app(app_def)
    shakedown.deployment_wait()

    # healthy
    app = client.get_app('/healthy')
    assert app['tasksRunning'] == 1
    assert app['tasksHealthy'] == 1

    tasks = client.get_tasks('/healthy')
    host = tasks[0]['host']
    port = tasks[0]['ports'][0]

    # prefer to break at the agent (having issues)
    mom_ip = ip_of_mom()
    shakedown.save_iptables(host)
    block_port(host, port)
    time.sleep(7)
    restore_iptables(host)
    shakedown.deployment_wait()

    # after network failure is restored.  The task returns and is a new task ID
    @retrying.retry(wait_fixed=1000, stop_max_delay=3000)
    def check_health_message():
        new_tasks = client.get_tasks('/healthy')
        assert new_tasks[0]['id'] != tasks[0]['id']
        app = client.get_app('/healthy')
        assert app['tasksRunning'] == 1
        assert app['tasksHealthy'] == 1

    check_health_message()