def test_mom_when_mom_agent_bounced():
    """Launch an app from MoM and restart the node MoM is on."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    mom_ip = common.ip_of_mom()
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        common.deployment_wait(service_id=app_id)
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        shakedown.restart_agent(mom_ip)

        @retrying.retry(wait_fixed=1000,
                        stop_max_attempt_number=30,
                        retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0][
                'id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
def recover_agents(hosts):
    get_and_verify_plan(
        lambda p: p['status'] == infinity_commons.PlanState.COMPLETE.value,
        assert_success=False)
    for h in hosts:
        print("Restarting mesos agent on {}".format(h))
        shakedown.restart_agent(h)
Beispiel #3
0
def test_marathon_when_task_agent_bounced():
    """Launch an app and restart the node the task is running on."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]

    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    common.deployment_wait(service_id=app_id)
    tasks = client.get_tasks(app_id)
    original_task_id = tasks[0]['id']
    shakedown.restart_agent(host)

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_task_is_back():
        tasks = client.get_tasks(app_id)
        assert tasks[0]['id'] == original_task_id, \
            "The task {} got replaced with {}".format(original_task_id, tasks[0]['id'])

    check_task_is_back()
Beispiel #4
0
def recover_failed_agents(hosts):
    log.info("Recover failed agents- {}".format(str(hosts)))
    tasks = {}
    try:
        tasks = check_health(wait_time=ONE_MINUTE, assert_success=False)
        log.info("Failed_tasks- " + str(tasks))
        failed_hosts = find_failed_hosts(hosts, tasks)
        log.info("Failed_hosts- " + failed_hosts)
        for h in failed_hosts:
            log.info("Restarting mesos agent on {}".format(h))
            shakedown.restart_agent(h)
    except Exception as e:
        log.error("error in recover_failed_agents")
        log.error(str(e))
def test_marathon_when_task_agent_bounced():
    """ Launch an app and restart the node the task is on.
    """
    app_def = app('agent-failure')
    host = ip_other_than_mom()
    pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()
    tasks = client.get_tasks('/agent-failure')
    original_task_id = tasks[0]['id']
    shakedown.restart_agent(host)

    @retrying.retry(wait_fixed=1000, stop_max_delay=3000)
    def check_task_is_back():
        tasks = client.get_tasks('/agent-failure')
        tasks[0]['id'] == original_task_id
def test_marathon_when_task_agent_bounced():
    """ Launch an app and restart the node the task is on.
    """
    app_def = app('agent-failure')
    host = ip_other_than_mom()
    pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()
    tasks = client.get_tasks('/agent-failure')
    original_task_id = tasks[0]['id']
    shakedown.restart_agent(host)

    @retrying.retry(wait_fixed=1000, stop_max_delay=3000)
    def check_task_is_back():
        tasks = client.get_tasks('/agent-failure')
        tasks[0]['id'] == original_task_id
def test_marathon_when_task_agent_bounced():
    """Launch an app and restart the node the task is running on."""

    app_def = apps.sleep_app()
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    shakedown.deployment_wait()
    tasks = client.get_tasks(app_def["id"])
    original_task_id = tasks[0]['id']
    shakedown.restart_agent(host)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_task_is_back():
        tasks = client.get_tasks(app_def["id"])
        assert tasks[0]['id'] == original_task_id, \
            "The task {} got replaced with {}".format(original_task_id, tasks[0]['id'])

    check_task_is_back()
Beispiel #8
0
def test_mom_when_mom_agent_bounced():
    """ Launch an app from MoM and restart the node MoM is on.
    """
    app_def = app('agent-failure')
    mom_ip = ip_of_mom()
    host = ip_other_than_mom()
    pin_to_host(app_def, host)
    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()
        tasks = client.get_tasks('/agent-failure')
        original_task_id = tasks[0]['id']

        shakedown.restart_agent(mom_ip)

        @retrying.retry(wait_fixed=1000,
                        stop_max_delay=3000,
                        retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks('/agent-failure')
            tasks[0]['id'] == original_task_id
def test_mom_when_mom_agent_bounced():
    """Launch an app from MoM and restart the node MoM is on."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    mom_ip = common.ip_of_mom()
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        shakedown.restart_agent(mom_ip)

        @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0]['id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
Beispiel #10
0
def recover_host_from_partitioning(host):
    # if is_dns_healthy_for_node(host):
    log.info("Restarting erlang and mesos on {}".format(host))
    restart_erlang_on_host(host)
    shakedown.restart_agent(host)
def recover_failed_agents(hosts):
    tasks = check_health(wait_time=HEALTH_WAIT_TIME, assert_success=False)
    failed_hosts = find_failed_hosts(hosts, tasks)
    for h in failed_hosts:
        print("Restarting mesos agent on {}".format(h))
        shakedown.restart_agent(h)