Python ip_other_than_momの例、common.ip_other_than_mom Pythonの例

コード例 #1

0

ファイルを表示

def test_marathon_when_disconnected_from_zk():
    """ Launch an app from Marathon.  Then knock out access to zk from the MoM.
        Verify the task is still good.
    """
    app_def = app('zk-failure')
    host = ip_other_than_mom()
    pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()
    tasks = client.get_tasks('/zk-failure')
    original_task_id = tasks[0]['id']

    with shakedown.iptable_rules(host):
        block_port(host, 2181)
        #  time of the zk block
        time.sleep(10)

    # after access to zk is restored.
    @retrying.retry(wait_fixed=1000, stop_max_delay=3000)
    def check_task_is_back():
        tasks = client.get_tasks('/zk-failure')
        tasks[0]['id'] == original_task_id

    check_task_is_back()

コード例 #2

0

ファイルを表示

def test_pod_health_failed_check():
    """Deploys a pod with correct health checks, then partitions the network and verifies that
       the tasks get restarted with new task IDs.
    """

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    host = common.ip_other_than_mom()
    common.pin_pod_to_host(pod_def, host)

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    pod = client.list_pod()[0]
    container1 = pod['instances'][0]['containers'][0]
    port = container1['endpoints'][0]['allocatedHostPort']

    common.save_iptables(host)
    common.block_port(host, port)
    time.sleep(7)
    common.restore_iptables(host)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    for task in tasks:
        assert task[
            'id'] != initial_id1, "One of the tasks has not been restarted"
        assert task[
            'id'] != initial_id2, "One of the tasks has not been restarted"

コード例 #3

0

ファイルを表示

def test_task_gets_restarted_due_to_network_split():
    """Verifies that a health check fails in presence of a network partition."""

    app_def = apps.http_server()
    app_id = app_def["id"]
    app_def['healthChecks'] = [common.health_check()]
    common.pin_to_host(app_def, common.ip_other_than_mom())

    client = marathon.create_client()
    client.add_app(app_def)

    common.deployment_wait(service_id=app_id)

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1, \
        "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning'])
    assert app['tasksHealthy'] == 1, \
        "The number of healthy tasks is {}, but 1 was expected".format(app['tasksHealthy'])

    tasks = client.get_tasks(app_id)
    task_id = tasks[0]['id']
    host = tasks[0]['host']
    port = tasks[0]['ports'][0]

    # introduce a network partition
    common.block_iptable_rules_for_seconds(host,
                                           port,
                                           sleep_seconds=10,
                                           block_input=True,
                                           block_output=False)

    common.deployment_wait(service_id=app_id)

    app = client.get_app(app_id)
    tasks = client.get_tasks(app_id)
    new_task_id = tasks[0]['id']
    assert task_id != new_task_id, "The task didn't get killed because of a failed health check"

    assert app['tasksRunning'] == 1, \
        "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning'])
    assert app['tasksHealthy'] == 1, \
        "The number of healthy tasks is {}, but 0 was expected".format(app['tasksHealthy'])

    # network partition should cause a task restart
    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_health_message():
        tasks = client.get_tasks(app_id)
        new_task_id = tasks[0]['id']
        assert task_id != new_task_id, "The task has not been restarted: {}".format(
            task_id)

        app = client.get_app(app_id)
        assert app['tasksRunning'] == 1, \
            "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning'])
        assert app['tasksHealthy'] == 1, \
            "The number of healthy tasks is {}, but 1 was expected".format(app['tasksHealthy'])

    check_health_message()

コード例 #4

0

ファイルを表示

def test_health_failed_check():
    """ Deploys a pod with good health checks, then partitions the network and verifies
        the tasks return with new task ids.
    """
    client = marathon.create_client()

    pod_id = "/pod-ken".format(uuid.uuid4().hex)

    pod_json = _pods_json('pod-ports.json')
    pod_json["id"] = pod_id
    host = ip_other_than_mom()
    pin_pod_to_host(pod_json, host)
    client.add_pod(pod_json)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    pod = client.list_pod()[0]
    container1 = pod['instances'][0]['containers'][0]
    port = container1['endpoints'][0]['allocatedHostPort']

    save_iptables(host)
    block_port(host, port)
    time.sleep(7)
    restore_iptables(host)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    for task in tasks:
        assert task['id'] != initial_id1
        assert task['id'] != initial_id2

コード例 #5

0

ファイルを表示

ファイル: test_marathon_on_marathon.py プロジェクト: blacklab/marathon

def test_mom_when_mom_process_killed():
    """Launched a task from MoM then killed MoM."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        shakedown.kill_process_on_host(common.ip_of_mom(), 'marathon-assembly')
        shakedown.wait_for_task('marathon', 'marathon-user', 300)
        shakedown.wait_for_service_endpoint('marathon-user')

        @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0]['id'] == original_task_id, "The task ID has changed"

        check_task_is_back()

コード例 #6

0

ファイルを表示

ファイル: marathon_pods_tests.py プロジェクト: guenter/marathon

def test_pod_health_failed_check():
    """Deploys a pod with correct health checks, then partitions the network and verifies that
       the tasks get restarted with new task IDs.
    """

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    host = common.ip_other_than_mom()
    common.pin_pod_to_host(pod_def, host)

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    pod = client.list_pod()[0]
    container1 = pod['instances'][0]['containers'][0]
    port = container1['endpoints'][0]['allocatedHostPort']

    common.block_iptable_rules_for_seconds(host, port, 7, block_input=True, block_output=False)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    for new_task in tasks:
        new_task_id = new_task['id']
        assert new_task_id != initial_id1, f"Task {new_task_id} has not been restarted" # NOQA E999
        assert new_task_id != initial_id2, f"Task {new_task_id} has not been restarted"

コード例 #7

0

ファイルを表示

def test_pinned_task_does_not_scale_to_unpinned_host():
    """Tests when a task lands on a pinned node (and barely fits) and it is asked to scale past
       the resources of that node, no tasks will be launched on any other node.
    """

    app_def = apps.sleep_app()
    app_id = app_def['id']

    host = common.ip_other_than_mom()
    print('Constraint set to host: {}'.format(host))
    # the size of cpus is designed to be greater than 1/2 of a node
    # such that only 1 task can land on the node.
    cores = common.cpus_on_agent(host)
    app_def['cpus'] = max(0.6, cores - 0.5)
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    shakedown.deployment_wait(app_id=app_id)
    client.scale_app(app_id, 2)

    time.sleep(5)
    deployments = client.get_deployments(app_id=app_id)
    tasks = client.get_tasks(app_id)

    # still deploying
    assert len(
        deployments
    ) == 1, "The number of deployments is {}, but 1 was expected".format(
        len(deployments))
    assert len(
        tasks) == 1, "The number of tasks is {}, but 1 was expected".format(
            len(tasks))

コード例 #8

0

ファイルを表示

def test_marathon_when_disconnected_from_zk():
    """Launches an app from Marathon, then knocks out access to ZK from Marathon.
       Verifies the task is preserved.
    """

    app_def = apps.sleep_app()
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    shakedown.deployment_wait()
    tasks = client.get_tasks(app_def["id"])
    original_task_id = tasks[0]['id']

    common.block_iptable_rules_for_seconds(host,
                                           2181,
                                           sleep_seconds=10,
                                           block_input=True,
                                           block_output=False)

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_task_is_back():
        tasks = client.get_tasks(app_def["id"])
        assert tasks[0]['id'] == original_task_id, \
            "The task {} got replaced with {}".format(original_task_id, tasks[0]['id'])

    check_task_is_back()

コード例 #9

0

ファイルを表示

def test_marathon_with_master_process_failure(marathon_service_name):
    """Launches an app and restarts the master. It is expected that the service endpoint eventually comes back and
       the task ID stays the same.
    """

    app_def = apps.sleep_app()
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    tasks = client.get_tasks(app_def["id"])
    original_task_id = tasks[0]['id']

    common.systemctl_master('restart')
    common.wait_for_service_endpoint(marathon_service_name, path="ping")

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_task_recovery():
        tasks = client.get_tasks(app_def["id"])
        assert len(
            tasks
        ) == 1, "The number of tasks is {} after master restart, but 1 was expected".format(
            len(tasks))
        assert tasks[0]['id'] == original_task_id, \
            "Task {} has not recovered, it got replaced with another one: {}".format(original_task_id, tasks[0]['id'])

    check_task_recovery()

コード例 #10

0

ファイルを表示

ファイル: marathon_common_tests.py プロジェクト: slungzr/marathon

def test_marathon_with_master_process_failure(marathon_service_name):
    """ Launches an app from Marathon and restarts the master.
        It is expected that the service endpoint will come back and that the
        task_id is the original task_id
    """

    app_def = app('master-failure')
    host = ip_other_than_mom()
    pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()
    tasks = client.get_tasks('/master-failure')
    original_task_id = tasks[0]['id']
    common.systemctl_master()
    shakedown.wait_for_service_endpoint(marathon_service_name)

    @retrying.retry(wait_fixed=1000,
                    stop_max_delay=10000,
                    retry_on_exception=retry_on_exception)
    def check_task_recovery():
        tasks = client.get_tasks('/master-failure')
        tasks[0]['id'] == original_task_id

    check_task_recovery()

コード例 #11

0

ファイルを表示

ファイル: marathon_pods_tests.py プロジェクト: anywhy/marathon

def test_health_failed_check():
    """ Deploys a pod with good health checks, then partitions the network and verifies
        the tasks return with new task ids.
    """
    client = marathon.create_client()

    pod_id = "/pod-ken".format(uuid.uuid4().hex)

    pod_json = _pods_json('pod-ports.json')
    pod_json["id"] = pod_id
    host = ip_other_than_mom()
    pin_pod_to_host(pod_json, host)
    client.add_pod(pod_json)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    pod = client.list_pod()[0]
    container1 = pod['instances'][0]['containers'][0]
    port = container1['endpoints'][0]['allocatedHostPort']

    save_iptables(host)
    block_port(host, port)
    time.sleep(7)
    restore_iptables(host)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    for task in tasks:
        assert task['id'] != initial_id1
        assert task['id'] != initial_id2

コード例 #12

0

ファイルを表示

def test_pinned_task_does_not_scale_to_unpinned_host():
    """Tests when a task lands on a pinned node (and barely fits) and it is asked to scale past
       the resources of that node, no tasks will be launched on any other node.
    """

    app_def = apps.sleep_app()
    app_def['cpus'] = 3.5
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    shakedown.deployment_wait()
    client.scale_app(app_def["id"], 2)

    time.sleep(5)
    deployments = client.get_deployments()
    tasks = client.get_tasks(app_def["id"])

    # still deploying
    assert len(
        deployments
    ) == 1, "The number of deployments is {}, but 1 was expected".format(
        len(deployments))
    assert len(
        tasks) == 1, "The number of tasks is {}, but 1 was expected".format(
            len(tasks))

コード例 #13

0

ファイルを表示

def test_mom_when_mom_process_killed():
    """Launched a task from MoM then killed MoM."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        common.kill_process_on_host(common.ip_of_mom(), 'marathon-assembly')
        shakedown.wait_for_task('marathon', 'marathon-user', 300)
        common.wait_for_service_endpoint('marathon-user', path="ping")

        @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0]['id'] == original_task_id, "The task ID has changed"

        check_task_is_back()

コード例 #14

0

ファイルを表示

def test_pod_health_failed_check():
    """Deploys a pod with correct health checks, then partitions the network and verifies that
       the tasks get restarted with new task IDs.
    """

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    host = common.ip_other_than_mom()
    common.pin_pod_to_host(pod_def, host)

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    pod = client.list_pod()[0]
    container1 = pod['instances'][0]['containers'][0]
    port = container1['endpoints'][0]['allocatedHostPort']

    common.block_iptable_rules_for_seconds(host,
                                           port,
                                           7,
                                           block_input=True,
                                           block_output=False)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    for new_task in tasks:
        new_task_id = new_task['id']
        assert new_task_id != initial_id1, f"Task {new_task_id} has not been restarted"  # NOQA E999
        assert new_task_id != initial_id2, f"Task {new_task_id} has not been restarted"

コード例 #15

0

ファイルを表示

def test_marathon_when_task_agent_bounced():
    """Launch an app and restart the node the task is running on."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]

    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    deployment_wait(service_id=app_id)
    tasks = client.get_tasks(app_id)
    original_task_id = tasks[0]['id']
    restart_agent(host)

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_task_is_back():
        tasks = client.get_tasks(app_id)
        assert tasks[0]['id'] == original_task_id, \
            "The task {} got replaced with {}".format(original_task_id, tasks[0]['id'])

    check_task_is_back()

コード例 #16

0

ファイルを表示

def test_pinned_task_recovers_on_host():
    """Tests that when a pinned task gets killed, it recovers on the node it was pinned to."""

    app_def = apps.sleep_app()
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    shakedown.deployment_wait()
    tasks = client.get_tasks(app_def["id"])

    common.kill_process_on_host(host, '[s]leep')
    shakedown.deployment_wait()

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_for_new_task():
        new_tasks = client.get_tasks(app_def["id"])
        assert tasks[0]['id'] != new_tasks[0][
            'id'], "The task did not get killed: {}".format(tasks[0]['id'])
        assert new_tasks[0]['host'] == host, \
            "The task got restarted on {}, but it was supposed to stay on {}".format(new_tasks[0]['host'], host)

    check_for_new_task()

コード例 #17

0

ファイルを表示

ファイル: test_marathon_on_marathon.py プロジェクト: davchamoli82/marathon

def test_mom_when_mom_agent_bounced():
    """Launch an app from MoM and restart the node MoM is on."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    mom_ip = common.ip_of_mom()
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        common.deployment_wait(service_id=app_id)
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        shakedown.restart_agent(mom_ip)

        @retrying.retry(wait_fixed=1000,
                        stop_max_attempt_number=30,
                        retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0][
                'id'] == original_task_id, "The task ID has changed"

        check_task_is_back()

コード例 #18

0

ファイルを表示

ファイル: marathon_common_tests.py プロジェクト: mesosphere/marathon

def test_pinned_task_recovers_on_host():
    """Tests that when a pinned task gets killed, it recovers on the node it was pinned to."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    deployment_wait(service_id=app_id)
    tasks = client.get_tasks(app_id)

    common.kill_process_on_host(host, '[s]leep')
    deployment_wait(service_id=app_id)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_for_new_task():
        new_tasks = client.get_tasks(app_id)
        assert tasks[0]['id'] != new_tasks[0]['id'], "The task did not get killed: {}".format(tasks[0]['id'])
        assert new_tasks[0]['host'] == host, \
            "The task got restarted on {}, but it was supposed to stay on {}".format(new_tasks[0]['host'], host)

    check_for_new_task()

コード例 #19

0

ファイルを表示

ファイル: marathon_common_tests.py プロジェクト: mesosphere/marathon

def test_pinned_task_scales_on_host_only():
    """Tests that a pinned app scales only on the pinned node."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    assert len(tasks) == 1, "The number of tasks is {} after deployment, but 1 was expected".format(len(tasks))
    assert tasks[0]['host'] == host, \
        "The task is on {}, but it is supposed to be on {}".format(tasks[0]['host'], host)

    client.scale_app(app_id, 10)
    deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    assert len(tasks) == 10, "The number of tasks is {} after scale, but 10 was expected".format(len(tasks))
    for task in tasks:
        assert task['host'] == host, "The task is on {}, but it is supposed to be on {}".format(task['host'], host)

コード例 #20

0

ファイルを表示

def test_pinned_task_scales_on_host_only():
    """Tests that a pinned app scales only on the pinned node."""

    app_def = apps.sleep_app()
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    shakedown.deployment_wait()

    tasks = client.get_tasks(app_def["id"])
    assert len(
        tasks
    ) == 1, "The number of tasks is {} after deployment, but 1 was expected".format(
        len(tasks))
    assert tasks[0]['host'] == host, \
        "The task is on {}, but it is supposed to be on {}".format(tasks[0]['host'], host)

    client.scale_app(app_def["id"], 10)
    shakedown.deployment_wait()

    tasks = client.get_tasks(app_def["id"])
    assert len(
        tasks
    ) == 10, "The number of tasks is {} after scale, but 10 was expected".format(
        len(tasks))
    for task in tasks:
        assert task[
            'host'] == host, "The task is on {}, but it is supposed to be on {}".format(
                task['host'], host)

コード例 #21

0

ファイルを表示

ファイル: marathon_common_tests.py プロジェクト: mesosphere/marathon

def test_pinned_task_does_not_scale_to_unpinned_host():
    """Tests when a task lands on a pinned node (and barely fits) and it is asked to scale past
       the resources of that node, no tasks will be launched on any other node.
    """

    app_def = apps.sleep_app()
    app_id = app_def['id']

    host = common.ip_other_than_mom()
    logger.info('Constraint set to host: {}'.format(host))
    # the size of cpus is designed to be greater than 1/2 of a node
    # such that only 1 task can land on the node.
    cores = common.cpus_on_agent(host)
    app_def['cpus'] = max(0.6, cores - 0.5)
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    deployment_wait(service_id=app_id)
    client.scale_app(app_id, 2)

    time.sleep(5)
    deployments = client.get_deployments(app_id=app_id)
    tasks = client.get_tasks(app_id)

    # still deploying
    assert len(deployments) == 1, "The number of deployments is {}, but 1 was expected".format(len(deployments))
    assert len(tasks) == 1, "The number of tasks is {}, but 1 was expected".format(len(tasks))

コード例 #22

0

ファイルを表示

ファイル: marathon_common_tests.py プロジェクト: mesosphere/marathon

def test_marathon_with_master_process_failure(marathon_service_name):
    """Launches an app and restarts the master. It is expected that the service endpoint eventually comes back and
       the task ID stays the same.
    """

    app_def = apps.sleep_app()
    app_id = app_def["id"]

    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)
    deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    original_task_id = tasks[0]['id']

    common.systemctl_master('restart')
    shakedown.dcos.service.wait_for_service_endpoint(marathon_service_name, path="ping")

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_task_recovery():
        tasks = client.get_tasks(app_id)
        assert len(tasks) == 1, "The number of tasks is {} after master restart, but 1 was expected".format(len(tasks))
        assert tasks[0]['id'] == original_task_id, \
            "Task {} has not recovered, it got replaced with another one: {}".format(original_task_id, tasks[0]['id'])

    check_task_recovery()

コード例 #23

0

ファイルを表示

ファイル: marathon_common_tests.py プロジェクト: mesosphere/marathon

def test_marathon_when_disconnected_from_zk():
    """Launches an app from Marathon, then knocks out access to ZK from Marathon.
       Verifies the task is preserved.
    """

    app_def = apps.sleep_app()
    app_id = app_def["id"]

    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    deployment_wait(service_id=app_id)
    tasks = client.get_tasks(app_id)
    original_task_id = tasks[0]['id']

    common.block_iptable_rules_for_seconds(host, 2181, sleep_seconds=10, block_input=True, block_output=False)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_task_is_back():
        tasks = client.get_tasks(app_id)
        assert tasks[0]['id'] == original_task_id, \
            "The task {} got replaced with {}".format(original_task_id, tasks[0]['id'])

    check_task_is_back()

コード例 #24

0

ファイルを表示

ファイル: marathon_common_tests.py プロジェクト: blacklab/marathon

def test_task_gets_restarted_due_to_network_split():
    """Verifies that a health check fails in presence of a network partition."""

    app_def = apps.http_server()
    app_def['healthChecks'] = [common.health_check()]
    common.pin_to_host(app_def, common.ip_other_than_mom())

    client = marathon.create_client()
    client.add_app(app_def)

    shakedown.deployment_wait()

    app = client.get_app(app_def["id"])
    assert app['tasksRunning'] == 1, \
        "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning'])
    assert app['tasksHealthy'] == 1, \
        "The number of healthy tasks is {}, but 1 was expected".format(app['tasksHealthy'])

    tasks = client.get_tasks(app_def["id"])
    task_id = tasks[0]['id']
    host = tasks[0]['host']
    port = tasks[0]['ports'][0]

    # introduce a network partition
    with shakedown.iptable_rules(host):
        common.block_port(host, port)
        time.sleep(10)

    shakedown.deployment_wait()

    app = client.get_app(app_def["id"])
    tasks = client.get_tasks(app_def["id"])
    new_task_id = tasks[0]['id']
    assert task_id != new_task_id, "The task didn't get killed because of a failed health check"

    assert app['tasksRunning'] == 1, \
        "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning'])
    assert app['tasksHealthy'] == 1, \
        "The number of healthy tasks is {}, but 0 was expected".format(app['tasksHealthy'])

    # network partition should cause a task restart
    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_health_message():
        tasks = client.get_tasks(app_def["id"])
        new_task_id = tasks[0]['id']
        assert task_id != new_task_id, "The task has not been restarted: {}".format(task_id)

        app = client.get_app(app_def["id"])
        assert app['tasksRunning'] == 1, \
            "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning'])
        assert app['tasksHealthy'] == 1, \
            "The number of healthy tasks is {}, but 1 was expected".format(app['tasksHealthy'])

    check_health_message()

コード例 #25

0

ファイルを表示

def test_health_failed_check():
    """ Tests a health check of an app launched by marathon.
        The health check succeeded, then failed due to a network partition.
    """
    client = marathon.create_client()
    app_def = python_http_app()
    health_list = []
    health_list.append(health_check())
    app_def['id'] = 'healthy'
    app_def['healthChecks'] = health_list

    pin_to_host(app_def, ip_other_than_mom())

    client.add_app(app_def)
    shakedown.deployment_wait()

    # healthy
    app = client.get_app('/healthy')
    assert app['tasksRunning'] == 1
    assert app['tasksHealthy'] == 1

    tasks = client.get_tasks('/healthy')
    host = tasks[0]['host']
    port = tasks[0]['ports'][0]

    # prefer to break at the agent (having issues)
    mom_ip = ip_of_mom()
    shakedown.save_iptables(host)
    block_port(host, port)
    time.sleep(7)
    restore_iptables(host)
    shakedown.deployment_wait()

    # after network failure is restored.  The task returns and is a new task ID
    @retrying.retry(wait_fixed=1000,
                    stop_max_delay=3000,
                    retry_on_exception=ignore_on_exception)
    def check_health_message():
        new_tasks = client.get_tasks('/healthy')
        assert new_tasks[0]['id'] != tasks[0]['id']
        app = client.get_app('/healthy')
        assert app['tasksRunning'] == 1
        assert app['tasksHealthy'] == 1

    check_health_message()

コード例 #26

0

ファイルを表示

ファイル: marathon_common_tests.py プロジェクト: redshirtrob/marathon

def test_marathon_when_task_agent_bounced():
    """ Launch an app and restart the node the task is on.
    """
    app_def = app('agent-failure')
    host = ip_other_than_mom()
    pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()
    tasks = client.get_tasks('/agent-failure')
    original_task_id = tasks[0]['id']
    shakedown.restart_agent(host)

    @retrying.retry(wait_fixed=1000, stop_max_delay=3000)
    def check_task_is_back():
        tasks = client.get_tasks('/agent-failure')
        tasks[0]['id'] == original_task_id

コード例 #27

0

ファイルを表示

ファイル: marathon_common_tests.py プロジェクト: natemurthy/marathon

def test_marathon_when_task_agent_bounced():
    """ Launch an app and restart the node the task is on.
    """
    app_def = app('agent-failure')
    host = ip_other_than_mom()
    pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()
    tasks = client.get_tasks('/agent-failure')
    original_task_id = tasks[0]['id']
    shakedown.restart_agent(host)

    @retrying.retry(wait_fixed=1000, stop_max_delay=3000)
    def check_task_is_back():
        tasks = client.get_tasks('/agent-failure')
        tasks[0]['id'] == original_task_id

コード例 #28

0

ファイルを表示

ファイル: marathon_pods_tests.py プロジェクト: guenter/marathon

def test_pin_pod():
    """Tests that a pod can be pinned to a specific host."""

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    host = common.ip_other_than_mom()
    common.pin_pod_to_host(pod_def, host)

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    assert len(tasks) == 2, "The number of tasks is {} after deployment, but 2 was expected".format(len(tasks))

    pod = client.list_pod()[0]
    assert pod['instances'][0]['agentHostname'] == host, "The pod didn't get pinned to {}".format(host)

コード例 #29

0

ファイルを表示

def test_pin_pod():
    """Tests that a pod can be pinned to a specific host."""

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    host = common.ip_other_than_mom()
    common.pin_pod_to_host(pod_def, host)

    client = marathon.create_client()
    client.add_pod(pod_def)
    common.deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    assert len(tasks) == 2, "The number of tasks is {} after deployment, but 2 was expected".format(len(tasks))

    pod = client.list_pod()[0]
    assert pod['instances'][0]['agentHostname'] == host, "The pod didn't get pinned to {}".format(host)

コード例 #30

0

ファイルを表示

ファイル: test_marathon_basics.py プロジェクト: laurencechan/marathon

def test_pinned_task_does_not_find_unknown_host():
    """ Tests that a task pinned to an unknown host will not launch.
        within 10 secs it is still in deployment and 0 tasks are running.
    """
    app_def = app('pinned')
    host = ip_other_than_mom()
    pin_to_host(app_def, '10.255.255.254')
    # only 1 can fit on the node
    app_def['cpus'] = 3.5
    with marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        # deploys are within secs
        # assuming after 10 no tasks meets criteria
        time.sleep(10)

        tasks = client.get_tasks('/pinned')
        assert len(tasks) == 0

コード例 #31

0

ファイルを表示

ファイル: marathon_common_tests.py プロジェクト: joel-hamill/marathon

def test_pinned_task_does_not_find_unknown_host():
    """ Tests that a task pinned to an unknown host will not launch.
        within 10 secs it is still in deployment and 0 tasks are running.
    """

    app_def = app('pinned')
    host = ip_other_than_mom()
    pin_to_host(app_def, '10.255.255.254')
    # only 1 can fit on the node
    app_def['cpus'] = 3.5
    client = marathon.create_client()
    client.add_app(app_def)
    # deploys are within secs
    # assuming after 10 no tasks meets criteria
    time.sleep(10)

    tasks = client.get_tasks('/pinned')
    assert len(tasks) == 0

コード例 #32

0

ファイルを表示

def test_mom_when_mom_process_killed():
    """ Launched a task from MoM then killed MoM.
    """
    app_def = app('agent-failure')
    host = ip_other_than_mom()
    pin_to_host(app_def, host)
    with marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()
        tasks = client.get_tasks('/agent-failure')
        original_task_id = tasks[0]['id']

        shakedown.kill_process_on_host(ip_of_mom(), 'marathon-assembly')
        shakedown.wait_for_task('marathon', 'marathon-user', 300)
        shakedown.wait_for_service_endpoint('marathon-user')

        tasks = client.get_tasks('/agent-failure')
        tasks[0]['id'] == original_task_id

コード例 #33

0

ファイルを表示

ファイル: marathon_common_tests.py プロジェクト: joel-hamill/marathon

def test_health_failed_check():
    """ Tests a health check of an app launched by marathon.
        The health check succeeded, then failed due to a network partition.
    """
    client = marathon.create_client()
    app_def = python_http_app()
    health_list = []
    health_list.append(health_check())
    app_def['id'] = 'healthy'
    app_def['healthChecks'] = health_list

    pin_to_host(app_def, ip_other_than_mom())

    client.add_app(app_def)
    shakedown.deployment_wait()

    # healthy
    app = client.get_app('/healthy')
    assert app['tasksRunning'] == 1
    assert app['tasksHealthy'] == 1

    tasks = client.get_tasks('/healthy')
    host = tasks[0]['host']
    port = tasks[0]['ports'][0]

    # prefer to break at the agent (having issues)
    mom_ip = ip_of_mom()
    shakedown.save_iptables(host)
    block_port(host, port)
    time.sleep(7)
    restore_iptables(host)
    shakedown.deployment_wait()

    # after network failure is restored.  The task returns and is a new task ID
    @retrying.retry(wait_fixed=1000, stop_max_delay=3000)
    def check_health_message():
        new_tasks = client.get_tasks('/healthy')
        assert new_tasks[0]['id'] != tasks[0]['id']
        app = client.get_app('/healthy')
        assert app['tasksRunning'] == 1
        assert app['tasksHealthy'] == 1

    check_health_message()

コード例 #34

0

ファイルを表示

ファイル: marathon_common_tests.py プロジェクト: mesosphere/marathon

def test_task_gets_restarted_due_to_network_split():
    """Verifies that a health check fails in presence of a network partition."""

    app_def = apps.http_server("app-network-split")
    app_id = app_def["id"]
    app_def['healthChecks'] = [common.health_check()]
    common.pin_to_host(app_def, common.ip_other_than_mom())

    client = marathon.create_client()
    client.add_app(app_def)

    deployment_wait(service_id=app_id)

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1, \
        "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning'])
    assert app['tasksHealthy'] == 1, \
        "The number of healthy tasks is {}, but 1 was expected".format(app['tasksHealthy'])

    tasks = client.get_tasks(app_id)
    task_id = tasks[0]['id']
    host = tasks[0]['host']
    port = tasks[0]['ports'][0]

    # introduce a network partition
    common.block_iptable_rules_for_seconds(host, port, sleep_seconds=10, block_input=True, block_output=False)

    # Network partition should cause the task to restart N times until the partition is resolved (since we
    # pinned the task to the split agent). A new task with a new taskId should eventually be running and healthy.
    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_health_message():
        tasks = client.get_tasks(app_id)
        new_task_id = tasks[0]['id']
        assert task_id != new_task_id, "The task has not been restarted: {}".format(task_id)

        app = client.get_app(app_id)
        assert app['tasksRunning'] == 1, \
            "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning'])
        assert app['tasksHealthy'] == 1, \
            "The number of healthy tasks is {}, but 1 was expected".format(app['tasksHealthy'])

    check_health_message()

コード例 #35

0

ファイルを表示

ファイル: marathon_pods_tests.py プロジェクト: anywhy/marathon

def test_pin_pod():
    """ Tests that we can pin a pod to a host.
    """
    client = marathon.create_client()

    pod_id = "/pod-{}".format(uuid.uuid4().hex)

    pod_json = _pods_json('pod-ports.json')
    pod_json["id"] = pod_id

    host = ip_other_than_mom()
    pin_pod_to_host(pod_json, host)
    client.add_pod(pod_json)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    assert len(tasks) == 2

    pod = client.list_pod()[0]
    assert pod['instances'][0]['agentHostname'] == host

コード例 #36

0

ファイルを表示

def test_pin_pod():
    """ Tests that we can pin a pod to a host.
    """
    client = marathon.create_client()

    pod_id = "/pod-{}".format(uuid.uuid4().hex)

    pod_json = _pods_json('pod-ports.json')
    pod_json["id"] = pod_id

    host = ip_other_than_mom()
    pin_pod_to_host(pod_json, host)
    client.add_pod(pod_json)
    shakedown.deployment_wait()

    tasks = get_pod_tasks(pod_id)
    assert len(tasks) == 2

    pod = client.list_pod()[0]
    assert pod['instances'][0]['agentHostname'] == host

コード例 #37

0

ファイルを表示

ファイル: test_marathon_basics.py プロジェクト: laurencechan/marathon

def test_pinned_task_recovers_on_host():
    """ Tests that a killed pinned task will recover on the pinned node.
    """
    app_def = app('pinned')
    host = ip_other_than_mom()
    pin_to_host(app_def, host)

    with marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()
        tasks = client.get_tasks('/pinned')

        shakedown.kill_process_on_host(host, '[s]leep')
        shakedown.deployment_wait()

        @retrying.retry(wait_fixed=1000, stop_max_delay=3000)
        def check_for_new_task():
            new_tasks = client.get_tasks('/pinned')
            assert tasks[0]['id'] != new_tasks[0]['id']
            assert new_tasks[0]['host'] == host

コード例 #38

0

ファイルを表示

ファイル: marathon_common_tests.py プロジェクト: natemurthy/marathon

def test_pinned_task_recovers_on_host():
    """ Tests that a killed pinned task will recover on the pinned node.
    """

    app_def = app('pinned')
    host = ip_other_than_mom()
    pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()
    tasks = client.get_tasks('/pinned')

    shakedown.kill_process_on_host(host, '[s]leep')
    shakedown.deployment_wait()

    @retrying.retry(wait_fixed=1000, stop_max_delay=3000)
    def check_for_new_task():
        new_tasks = client.get_tasks('/pinned')
        assert tasks[0]['id'] != new_tasks[0]['id']
        assert new_tasks[0]['host'] == host

コード例 #39

0

ファイルを表示

def test_pinned_task_scales_on_host_only():
    """ Tests that scaling a pinned app scales only on the pinned node.
    """
    app_def = app('pinned')
    host = ip_other_than_mom()
    pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    tasks = client.get_tasks('/pinned')
    assert len(tasks) == 1
    assert tasks[0]['host'] == host

    client.scale_app('pinned', 10)
    shakedown.deployment_wait()

    tasks = client.get_tasks('/pinned')
    assert len(tasks) == 10
    for task in tasks:
        assert task['host'] == host

コード例 #40

0

ファイルを表示

ファイル: marathon_common_tests.py プロジェクト: natemurthy/marathon

def test_marathon_with_master_process_failure(marathon_service_name):
    """ Launches an app from Marathon and restarts the master.
        It is expected that the service endpoint will come back and that the
        task_id is the original task_id
    """

    app_def = app('master-failure')
    host = ip_other_than_mom()
    pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()
    tasks = client.get_tasks('/master-failure')
    original_task_id = tasks[0]['id']
    common.systemctl_master()
    shakedown.wait_for_service_endpoint(marathon_service_name)

    @retrying.retry(wait_fixed=1000, stop_max_delay=10000)
    def check_task_recovery():
        tasks = client.get_tasks('/master-failure')
        tasks[0]['id'] == original_task_id

コード例 #41

0

ファイルを表示

ファイル: marathon_common_tests.py プロジェクト: guenter/marathon

def test_marathon_when_task_agent_bounced():
    """Launch an app and restart the node the task is running on."""

    app_def = apps.sleep_app()
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    shakedown.deployment_wait()
    tasks = client.get_tasks(app_def["id"])
    original_task_id = tasks[0]['id']
    shakedown.restart_agent(host)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_task_is_back():
        tasks = client.get_tasks(app_def["id"])
        assert tasks[0]['id'] == original_task_id, \
            "The task {} got replaced with {}".format(original_task_id, tasks[0]['id'])

    check_task_is_back()

コード例 #42

0

ファイルを表示

ファイル: test_marathon_basics.py プロジェクト: laurencechan/marathon

def test_pinned_task_does_not_scale_to_unpinned_host():
    """ Tests when a task lands on a pinned node (and barely fits) when asked to
        scale past the resources of that node will not scale.
    """
    app_def = app('pinned')
    host = ip_other_than_mom()
    pin_to_host(app_def, host)
    # only 1 can fit on the node
    app_def['cpus'] = 3.5
    with marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()
        tasks = client.get_tasks('/pinned')
        client.scale_app('pinned', 2)
        # typical deployments are sub 3 secs
        time.sleep(5)
        deployments = client.get_deployments()
        tasks = client.get_tasks('/pinned')

        # still deploying
        assert len(deployments) == 1
        assert len(tasks) == 1

コード例 #43

0

ファイルを表示

ファイル: test_marathon_on_marathon.py プロジェクト: mesosphere/marathon

def test_mom_when_mom_agent_bounced():
    """Launch an app from MoM and restart the node MoM is on."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    mom_ip = common.ip_of_mom()
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with marathon_on_marathon() as client:
        client.add_app(app_def)
        deployment_wait(service_id=app_id, client=client)
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        restart_agent(mom_ip)

        @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0]['id'] == original_task_id, "The task ID has changed"

        check_task_is_back()

コード例 #44

0

ファイルを表示

ファイル: marathon_common_tests.py プロジェクト: joel-hamill/marathon

def test_pinned_task_does_not_scale_to_unpinned_host():
    """ Tests when a task lands on a pinned node (and barely fits) when asked to
        scale past the resources of that node will not scale.
    """

    app_def = app('pinned')
    host = ip_other_than_mom()
    pin_to_host(app_def, host)
    # only 1 can fit on the node
    app_def['cpus'] = 3.5
    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()
    tasks = client.get_tasks('/pinned')
    client.scale_app('pinned', 2)
    # typical deployments are sub 3 secs
    time.sleep(5)
    deployments = client.get_deployments()
    tasks = client.get_tasks('/pinned')

    # still deploying
    assert len(deployments) == 1
    assert len(tasks) == 1

コード例 #45

0

ファイルを表示

ファイル: marathon_common_tests.py プロジェクト: blacklab/marathon

def test_pinned_task_does_not_scale_to_unpinned_host():
    """Tests when a task lands on a pinned node (and barely fits) and it is asked to scale past
       the resources of that node, no tasks will be launched on any other node.
    """

    app_def = apps.sleep_app()
    app_def['cpus'] = 3.5
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    shakedown.deployment_wait()
    client.scale_app(app_def["id"], 2)

    time.sleep(5)
    deployments = client.get_deployments()
    tasks = client.get_tasks(app_def["id"])

    # still deploying
    assert len(deployments) == 1, "The number of deployments is {}, but 1 was expected".format(len(deployments))
    assert len(tasks) == 1, "The number of tasks is {}, but 1 was expected".format(len(tasks))