def test_mom_with_network_failure_bounce_master():
    """Marathon on Marathon (MoM) tests for DC/OS with network failures simulated by knocking out ports."""

    # get MoM ip
    mom_ip = common.ip_of_mom()
    logger.info("MoM IP: {}".format(mom_ip))

    app_def = apps.sleep_app()
    app_id = app_def["id"]

    with marathon_on_marathon() as client:
        client.add_app(app_def)
        wait_for_task("marathon-user", app_id.lstrip('/'))
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]["id"]
        task_ip = tasks[0]['host']
        logger.info("\nTask IP: " + task_ip)

    # PR for network partitioning in shakedown makes this better
    # take out the net
    partition_agent(mom_ip)
    partition_agent(task_ip)

    # wait for a min
    time.sleep(timedelta(minutes=1).total_seconds())

    # bounce master
    run_command_on_master("sudo systemctl restart dcos-mesos-master")

    # bring the net up
    reconnect_agent(mom_ip)
    reconnect_agent(task_ip)

    time.sleep(timedelta(minutes=1).total_seconds())
    wait_for_service_endpoint('marathon-user',
                              timedelta(minutes=10).total_seconds(),
                              path="ping")

    with marathon_on_marathon() as client:
        wait_for_task("marathon-user", app_id.lstrip('/'),
                      timedelta(minutes=10).total_seconds())

        @retrying.retry(wait_fixed=1000,
                        stop_max_attempt_number=30,
                        retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0][
                'id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
def test_mom_when_mom_agent_bounced():
    """Launch an app from MoM and restart the node MoM is on."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    mom_ip = common.ip_of_mom()
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with marathon_on_marathon() as client:
        client.add_app(app_def)
        deployment_wait(service_id=app_id, client=client)
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        restart_agent(mom_ip)

        @retrying.retry(wait_fixed=1000,
                        stop_max_attempt_number=30,
                        retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0][
                'id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
def test_mom_when_mom_process_killed():
    """Launched a task from MoM then killed MoM."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with marathon_on_marathon() as client:
        client.add_app(app_def)
        deployment_wait(service_id=app_id, client=client)
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        common.kill_process_on_host(common.ip_of_mom(), 'marathon-assembly')
        wait_for_task('marathon', 'marathon-user', 300)
        wait_for_service_endpoint('marathon-user', path="ping")

        @retrying.retry(wait_fixed=1000,
                        stop_max_attempt_number=30,
                        retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0][
                'id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
def test_mom_with_network_failure_bounce_master():
    """Marathon on Marathon (MoM) tests for DC/OS with network failures simulated by knocking out ports."""

    # get MoM ip
    mom_ip = common.ip_of_mom()
    logger.info("MoM IP: {}".format(mom_ip))

    app_def = apps.sleep_app()
    app_id = app_def["id"]

    with marathon_on_marathon() as client:
        client.add_app(app_def)
        wait_for_task("marathon-user", app_id.lstrip('/'))
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]["id"]
        task_ip = tasks[0]['host']
        logger.info("\nTask IP: " + task_ip)

    # PR for network partitioning in shakedown makes this better
    # take out the net
    partition_agent(mom_ip)
    partition_agent(task_ip)

    # wait for a min
    time.sleep(timedelta(minutes=1).total_seconds())

    # bounce master
    run_command_on_master("sudo systemctl restart dcos-mesos-master")

    # bring the net up
    reconnect_agent(mom_ip)
    reconnect_agent(task_ip)

    time.sleep(timedelta(minutes=1).total_seconds())
    wait_for_service_endpoint('marathon-user', timedelta(minutes=10).total_seconds(), path="ping")

    with marathon_on_marathon() as client:
        wait_for_task("marathon-user", app_id.lstrip('/'), timedelta(minutes=10).total_seconds())

        @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0]['id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
def wait_for_marathon_user_and_cleanup():
    wait_for_service_endpoint('marathon-user',
                              timedelta(minutes=5).total_seconds(),
                              path="ping")
    with marathon_on_marathon() as client:
        yield
        wait_for_service_endpoint('marathon-user',
                                  timedelta(minutes=5).total_seconds(),
                                  path="ping")
        common.clean_up_marathon(client)
def simple_sleep_app(mom_endpoint):
    # Deploy a simple sleep app in the MoM-EE
    with marathon_on_marathon(name=mom_endpoint) as client:
        app_def = apps.sleep_app()
        app_id = app_def["id"]

        client.add_app(app_def)
        common.deployment_wait(service_id=app_id, client=client)

        tasks = get_service_task(mom_endpoint, app_id.lstrip("/"))
        logger.info('MoM-EE tasks: {}'.format(tasks))
        return tasks is not None
def simple_sleep_app(mom_endpoint):
    # Deploy a simple sleep app in the MoM-EE
    with marathon_on_marathon(name=mom_endpoint) as client:
        app_def = apps.sleep_app()
        app_id = app_def["id"]

        client.add_app(app_def)
        deployment_wait(service_id=app_id, client=client)

        tasks = get_service_task(mom_endpoint, app_id.lstrip("/"))
        logger.info('MoM-EE tasks: {}'.format(tasks))
        return tasks is not None
def teardown_module(module):
    with marathon_on_marathon() as client:
        try:
            common.clean_up_marathon(client=client)
        except Exception:
            pass

    uninstall_package_and_wait('marathon')
    delete_zk_node('universe/marathon-user')

    # Remove everything from root marathon
    common.clean_up_marathon()
def teardown_module(module):
    with marathon_on_marathon() as client:
        try:
            common.clean_up_marathon(client=client)
        except Exception:
            pass

    uninstall_package_and_wait('marathon')
    delete_zk_node('universe/marathon-user')

    # Remove everything from root marathon
    common.clean_up_marathon()
def remove_mom_ee():
    mom_ee_versions = [('1.6', 'strict'), ('1.6', 'permissive'),
                       ('1.5', 'strict'), ('1.5', 'permissive'),
                       ('1.4', 'strict'), ('1.4', 'permissive')]
    for mom_ee in mom_ee_versions:
        endpoint = mom_ee_endpoint(mom_ee[0], mom_ee[1])
        logger.info('Checking endpoint: {}'.format(endpoint))
        if service_available_predicate(endpoint):
            logger.info('Removing {}...'.format(endpoint))
            with marathon_on_marathon(name=endpoint) as client:
                delete_all_apps(client=client)

    client = marathon.create_client()
    client.remove_app(MOM_EE_NAME)
    common.deployment_wait(MOM_EE_NAME)
    logger.info('Successfully removed {}'.format(MOM_EE_NAME))
def test_framework_unavailable_on_mom():
    """Launches an app that has elements necessary to create a service endpoint in DCOS.
       This test confirms that the endpoint is not created when launched with MoM.
    """

    app_def = apps.fake_framework()
    app_id = app_def["id"]

    with marathon_on_marathon() as client:
        client.add_app(app_def)
        deployment_wait(service_id=app_id, client=client)
    try:
        wait_for_service_endpoint('pyfw', 15)
    except Exception:
        pass
    else:
        assert False, 'MoM shoud NOT create a service endpoint'
def test_framework_unavailable_on_mom():
    """Launches an app that has elements necessary to create a service endpoint in DCOS.
       This test confirms that the endpoint is not created when launched with MoM.
    """

    app_def = apps.fake_framework()
    app_id = app_def["id"]

    with marathon_on_marathon() as client:
        client.add_app(app_def)
        deployment_wait(service_id=app_id, client=client)
    try:
        wait_for_service_endpoint('pyfw', 15)
    except Exception:
        pass
    else:
        assert False, 'MoM shoud NOT create a service endpoint'
Example #13
0
def cluster_info(mom_name='marathon-user'):
    logger.info("DC/OS: %s, in %s mode", dcos_version(), ee_version())
    agents = get_private_agents()
    logger.info("Agents: %d", len(agents))
    client = marathon.create_client()
    about = client.get_about()
    logger.info("Marathon version: %s", about.get("version"))

    if service_available_predicate(mom_name):
        with marathon_on_marathon(mom_name) as client:
            try:
                about = client.get_about()
                logger.info("Marathon MoM version: {}".format(about.get("version")))
            except Exception:
                logger.info("Marathon MoM not present")
    else:
        logger.info("Marathon MoM not present")
Example #14
0
def cluster_info(mom_name='marathon-user'):
    logger.info("DC/OS: %s, in %s mode", dcos_version(), ee_version())
    agents = get_private_agents()
    logger.info("Agents: %d", len(agents))
    client = marathon.create_client()
    about = client.get_about()
    logger.info("Marathon version: %s", about.get("version"))

    if service_available_predicate(mom_name):
        with marathon_on_marathon(mom_name) as client:
            try:
                about = client.get_about()
                logger.info("Marathon MoM version: {}".format(
                    about.get("version")))
            except Exception:
                logger.info("Marathon MoM not present")
    else:
        logger.info("Marathon MoM not present")
def remove_mom_ee():
    mom_ee_versions = [
        ('1.7', 'strict'),
        ('1.7', 'permissive'),
        ('1.6', 'strict'),
        ('1.6', 'permissive'),
        ('1.5', 'strict'),
        ('1.5', 'permissive')
    ]
    for mom_ee in mom_ee_versions:
        endpoint = mom_ee_endpoint(mom_ee[0], mom_ee[1])
        logger.info('Checking endpoint: {}'.format(endpoint))
        if service_available_predicate(endpoint):
            logger.info('Removing {}...'.format(endpoint))
            with marathon_on_marathon(name=endpoint) as client:
                delete_all_apps(client=client)

    client = marathon.create_client()
    client.remove_app(MOM_EE_NAME)
    deployment_wait(MOM_EE_NAME)
    logger.info('Successfully removed {}'.format(MOM_EE_NAME))
def test_mom_when_mom_agent_bounced():
    """Launch an app from MoM and restart the node MoM is on."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    mom_ip = common.ip_of_mom()
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with marathon_on_marathon() as client:
        client.add_app(app_def)
        deployment_wait(service_id=app_id, client=client)
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        restart_agent(mom_ip)

        @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0]['id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
def test_mom_when_mom_process_killed():
    """Launched a task from MoM then killed MoM."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with marathon_on_marathon() as client:
        client.add_app(app_def)
        deployment_wait(service_id=app_id, client=client)
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        common.kill_process_on_host(common.ip_of_mom(), 'marathon-assembly')
        wait_for_task('marathon', 'marathon-user', 300)
        wait_for_service_endpoint('marathon-user', path="ping")

        @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0]['id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
Example #18
0
def wait_for_marathon_user_and_cleanup():
    wait_for_service_endpoint('marathon-user', timedelta(minutes=5).total_seconds(), path="ping")
    with marathon_on_marathon() as client:
        yield
        wait_for_service_endpoint('marathon-user', timedelta(minutes=5).total_seconds(), path="ping")
        common.clean_up_marathon(client)
def setup_module(module):
    common.ensure_mom()
    common.cluster_info()
    with marathon_on_marathon() as client:
        common.clean_up_marathon(client=client)
def setup_module(module):
    common.ensure_mom()
    common.cluster_info()
    with marathon_on_marathon() as client:
        common.clean_up_marathon(client=client)