コード例 #1
0
def test_mom_with_network_failure_bounce_master():
    """Marathon on Marathon (MoM) tests for DC/OS with network failures simulated by knocking out ports."""

    # get MoM ip
    mom_ip = common.ip_of_mom()
    logger.info("MoM IP: {}".format(mom_ip))

    app_def = apps.sleep_app()
    app_id = app_def["id"]

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.wait_for_task("marathon-user", app_id.lstrip('/'))
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]["id"]
        task_ip = tasks[0]['host']
        logger.info("\nTask IP: " + task_ip)

    # PR for network partitioning in shakedown makes this better
    # take out the net
    partition_agent(mom_ip)
    partition_agent(task_ip)

    # wait for a min
    time.sleep(timedelta(minutes=1).total_seconds())

    # bounce master
    shakedown.run_command_on_master("sudo systemctl restart dcos-mesos-master")

    # bring the net up
    reconnect_agent(mom_ip)
    reconnect_agent(task_ip)

    time.sleep(timedelta(minutes=1).total_seconds())
    common.wait_for_service_endpoint('marathon-user',
                                     timedelta(minutes=10).total_seconds(),
                                     path="ping")

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        shakedown.wait_for_task("marathon-user", app_id.lstrip('/'),
                                timedelta(minutes=10).total_seconds())

        @retrying.retry(wait_fixed=1000,
                        stop_max_attempt_number=30,
                        retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0][
                'id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
コード例 #2
0
def assert_mom_ee(version, security_mode='permissive'):
    ensure_prerequisites_installed()
    ensure_service_account()
    ensure_permissions()
    ensure_sa_secret(strict=True if security_mode == 'strict' else False)
    ensure_docker_config_secret()

    # In strict mode all tasks are started as user `nobody` by default. However we start
    # MoM-EE as 'root' and for that we need to give root marathon ACLs to start
    # tasks as 'root'.
    if security_mode == 'strict':
        common.add_dcos_marathon_user_acls()

    # Deploy MoM-EE in permissive mode
    app_def_file = '{}/mom-ee-{}-{}.json'.format(fixtures.fixtures_dir(),
                                                 security_mode, version)
    assert os.path.isfile(
        app_def_file
    ), "Couldn't find appropriate MoM-EE definition: {}".format(app_def_file)

    image = mom_ee_image(version)
    print('Deploying {} definition with {} image'.format(app_def_file, image))

    app_def = get_resource(app_def_file)
    app_def['container']['docker'][
        'image'] = 'mesosphere/marathon-dcos-ee:{}'.format(image)
    app_id = app_def["id"]

    client = marathon.create_client()
    client.add_app(app_def)
    common.deployment_wait(service_id=app_id)
    common.wait_for_service_endpoint(mom_ee_endpoint(version, security_mode),
                                     path="ping")
コード例 #3
0
def test_mom_when_mom_agent_bounced():
    """Launch an app from MoM and restart the node MoM is on."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    mom_ip = common.ip_of_mom()
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        common.deployment_wait(service_id=app_id)
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        shakedown.restart_agent(mom_ip)

        @retrying.retry(wait_fixed=1000,
                        stop_max_attempt_number=30,
                        retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0][
                'id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
コード例 #4
0
def assert_mom_ee(version, security_mode='permissive'):
    ensure_prerequisites_installed()
    ensure_service_account()
    ensure_permissions()
    ensure_secret(strict=True if security_mode == 'strict' else False)
    ensure_docker_credentials()

    # Deploy MoM-EE in permissive mode
    app_def_file = '{}/mom-ee-{}-{}.json'.format(fixtures.fixtures_dir(),
                                                 security_mode, version)
    assert os.path.isfile(
        app_def_file
    ), "Couldn't find appropriate MoM-EE definition: {}".format(app_def_file)

    image = mom_ee_image(version)
    print('Deploying {} definition with {} image'.format(app_def_file, image))

    app_def = get_resource(app_def_file)
    app_def['container']['docker'][
        'image'] = 'mesosphere/marathon-dcos-ee:{}'.format(image)

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()
    shakedown.wait_for_service_endpoint(mom_ee_endpoint(
        version, security_mode))
コード例 #5
0
def test_mom_when_mom_process_killed():
    """Launched a task from MoM then killed MoM."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        common.kill_process_on_host(common.ip_of_mom(), 'marathon-assembly')
        shakedown.wait_for_task('marathon', 'marathon-user', 300)
        common.wait_for_service_endpoint('marathon-user', path="ping")

        @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0]['id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
コード例 #6
0
def test_mom_when_mom_process_killed():
    """Launched a task from MoM then killed MoM."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        shakedown.kill_process_on_host(common.ip_of_mom(), 'marathon-assembly')
        shakedown.wait_for_task('marathon', 'marathon-user', 300)
        shakedown.wait_for_service_endpoint('marathon-user')

        @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0]['id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
コード例 #7
0
def test_mom_with_network_failure_bounce_master():
    """Marathon on Marathon (MoM) tests for DC/OS with network failures simulated by knocking out ports."""

    # get MoM ip
    mom_ip = common.ip_of_mom()
    print("MoM IP: {}".format(mom_ip))

    app_def = apps.sleep_app()
    app_id = app_def["id"]

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.wait_for_task("marathon-user", app_id.lstrip('/'))
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]["id"]
        task_ip = tasks[0]['host']
        print("\nTask IP: " + task_ip)

    # PR for network partitioning in shakedown makes this better
    # take out the net
    partition_agent(mom_ip)
    partition_agent(task_ip)

    # wait for a min
    time.sleep(timedelta(minutes=1).total_seconds())

    # bounce master
    shakedown.run_command_on_master("sudo systemctl restart dcos-mesos-master")

    # bring the net up
    reconnect_agent(mom_ip)
    reconnect_agent(task_ip)

    time.sleep(timedelta(minutes=1).total_seconds())
    shakedown.wait_for_service_endpoint('marathon-user', timedelta(minutes=10).total_seconds())

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        shakedown.wait_for_task("marathon-user", app_id.lstrip('/'), timedelta(minutes=10).total_seconds())

        @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0]['id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
コード例 #8
0
ファイル: common.py プロジェクト: guenter/marathon
def delete_all_apps():
    client = marathon.create_client()
    apps = client.get_apps()
    for app in apps:
        if app['id'] == '/marathon-user':
            print('WARNING: not removing marathon-user, because it is special')
        else:
            client.remove_app(app['id'], True)
コード例 #9
0
def delete_all_apps():
    client = marathon.create_client()
    apps = client.get_apps()
    for app in apps:
        if app['id'] == '/marathon-user':
            print('WARNING: not removing marathon-user, because it is special')
        else:
            client.remove_app(app['id'], True)
コード例 #10
0
def __marathon_leadership_changed_in_marathon_api(original_leader):
    """ This method uses Marathon API to figure out that leadership changed.
        We have to retry here because leader election takes time and what might happen is that some nodes might
        not be aware of the new leader being elected resulting in HTTP 502.
    """
    current_leader = marathon.create_client().get_leader()
    print('leader according to marathon API: {}'.format(current_leader))
    assert original_leader != current_leader
コード例 #11
0
def stop_all_deployments(noisy=False):
    client = marathon.create_client()
    deployments = client.get_deployments()
    for deployment in deployments:
        try:
            client.stop_deployment(deployment['id'])
        except Exception as e:
            if noisy:
                print(e)
コード例 #12
0
def cluster_info(mom_name='marathon-user'):
    print("DC/OS: {}, in {} mode".format(shakedown.dcos_version(), shakedown.ee_version()))
    agents = shakedown.get_private_agents()
    print("Agents: {}".format(len(agents)))
    client = marathon.create_client()
    about = client.get_about()
    print("Marathon version: {}".format(about.get("version")))

    if shakedown.service_available_predicate(mom_name):
        with shakedown.marathon_on_marathon(mom_name):
            try:
                client = marathon.create_client()
                about = client.get_about()
                print("Marathon MoM version: {}".format(about.get("version")))
            except Exception:
                print("Marathon MoM not present")
    else:
        print("Marathon MoM not present")
コード例 #13
0
def clear_pods():
    try:
        client = marathon.create_client()
        pods = client.list_pod()
        for pod in pods:
            client.remove_pod(pod["id"], True)
        shakedown.deployment_wait()
    except Exception:
        pass
コード例 #14
0
ファイル: common.py プロジェクト: guenter/marathon
def stop_all_deployments(noisy=False):
    client = marathon.create_client()
    deployments = client.get_deployments()
    for deployment in deployments:
        try:
            client.stop_deployment(deployment['id'])
        except Exception as e:
            if noisy:
                print(e)
コード例 #15
0
ファイル: common.py プロジェクト: guenter/marathon
def cluster_info(mom_name='marathon-user'):
    print("DC/OS: {}, in {} mode".format(shakedown.dcos_version(), shakedown.ee_version()))
    agents = shakedown.get_private_agents()
    print("Agents: {}".format(len(agents)))
    client = marathon.create_client()
    about = client.get_about()
    print("Marathon version: {}".format(about.get("version")))

    if shakedown.service_available_predicate(mom_name):
        with shakedown.marathon_on_marathon(mom_name):
            try:
                client = marathon.create_client()
                about = client.get_about()
                print("Marathon MoM version: {}".format(about.get("version")))
            except Exception:
                print("Marathon MoM not present")
    else:
        print("Marathon MoM not present")
コード例 #16
0
ファイル: common.py プロジェクト: guenter/marathon
def clear_pods():
    try:
        client = marathon.create_client()
        pods = client.list_pod()
        for pod in pods:
            client.remove_pod(pod["id"], True)
        shakedown.deployment_wait()
    except Exception:
        pass
コード例 #17
0
def deployment_predicate(service_id=None):
    deployments = marathon.create_client().get_deployments()
    if (service_id is None):
        return len(deployments) == 0
    else:
        filtered = [
            deployment for deployment in deployments
            if (service_id in deployment['affectedApps'] or service_id in deployment['affectedPods'])
        ]
        return len(filtered) == 0
コード例 #18
0
def __marathon_leadership_changed_in_marathon_api(original_leader):
    """ This method uses Marathon API to figure out that leadership changed.
        We have to retry here because leader election takes time and what might happen is that some nodes might
        not be aware of the new leader being elected resulting in HTTP 502.
    """
    # Leader is returned like this 10.0.6.88:8080 - we want just the IP
    current_leader = marathon.create_client().get_leader().split(':', 1)[0]
    print('leader according to marathon API: {}'.format(current_leader))
    assert original_leader != current_leader
    return current_leader
コード例 #19
0
ファイル: common.py プロジェクト: guenter/marathon
def deployment_predicate(service_id=None):
    deployments = marathon.create_client().get_deployments()
    if (service_id is None):
        return len(deployments) == 0
    else:
        filtered = [
            deployment for deployment in deployments
            if (service_id in deployment['affectedApps'] or service_id in deployment['affectedPods'])
        ]
        return len(filtered) == 0
コード例 #20
0
ファイル: common.py プロジェクト: guenter/marathon
def __marathon_leadership_changed_in_marathon_api(original_leader):
    """ This method uses Marathon API to figure out that leadership changed.
        We have to retry here because leader election takes time and what might happen is that some nodes might
        not be aware of the new leader being elected resulting in HTTP 502.
    """
    # Leader is returned like this 10.0.6.88:8080 - we want just the IP
    current_leader = marathon.create_client().get_leader().split(':', 1)[0]
    print('leader according to marathon API: {}'.format(current_leader))
    assert original_leader != current_leader
    return current_leader
コード例 #21
0
ファイル: common.py プロジェクト: wesleyhuang2014/marathon
def deployments_for(service_id=None):
    deployments = marathon.create_client().get_deployments()
    if (service_id is None):
        return deployments
    else:
        filtered = [
            deployment for deployment in deployments
            if (service_id in deployment['affectedApps']
                or service_id in deployment['affectedPods'])
        ]
        return filtered
コード例 #22
0
def simple_sleep_app(name):
    # Deploy a simple sleep app in the MoM-EE
    with shakedown.marathon_on_marathon(name=name):
        client = marathon.create_client()

        app_def = apps.sleep_app()
        client.add_app(app_def)
        shakedown.deployment_wait()

        tasks = shakedown.get_service_task(name, app_def["id"].lstrip("/"))
        print('MoM-EE tasks: {}'.format(tasks))
        return tasks is not None
コード例 #23
0
def remove_mom_ee():
    mom_ee_versions = [('1.4', 'strict'), ('1.4', 'permissive'),
                       ('1.4', 'disabled'), ('1.3', 'strict'),
                       ('1.3', 'permissive'), ('1.3', 'disabled')]
    for mom_ee in mom_ee_versions:
        endpoint = mom_ee_endpoint(mom_ee[0], mom_ee[1])
        if shakedown.service_available_predicate(endpoint):
            print('Removing {}...'.format(endpoint))
            with shakedown.marathon_on_marathon(name=endpoint):
                shakedown.delete_all_apps()

    client = marathon.create_client()
    client.remove_app(MOM_EE_NAME)
    shakedown.deployment_wait()
    print('Successfully removed {}'.format(MOM_EE_NAME))
コード例 #24
0
def deployments_for(service_id=None, deployment_id=None):
    deployments = marathon.create_client().get_deployments()
    if deployment_id:
        filtered = [
            deployment for deployment in deployments
            if deployment_id == deployment["id"]
        ]
        return filtered
    elif service_id:
        filtered = [
            deployment for deployment in deployments
            if service_id in deployment['affectedApps'] or service_id in deployment['affectedPods']
        ]
        return filtered
    else:
        return deployments
コード例 #25
0
def test_framework_unavailable_on_mom():
    """Launches an app that has elements necessary to create a service endpoint in DCOS.
       This test confirms that the endpoint is not created when launched with MoM.
    """

    app_def = apps.fake_framework()
    app_id = app_def["id"]

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        common.deployment_wait(service_id=app_id)
    try:
        common.wait_for_service_endpoint('pyfw', 15)
    except Exception:
        pass
    else:
        assert False, 'MoM shoud NOT create a service endpoint'
コード例 #26
0
def test_framework_unavailable_on_mom():
    """Launches an app that has elements necessary to create a service endpoint in DCOS.
       This test confirms that the endpoint is not created when launched with MoM.
    """

    app_def = apps.fake_framework()

    with shakedown.marathon_on_marathon():
        common.delete_all_apps_wait()
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()

    try:
        shakedown.wait_for_service_endpoint('pyfw', 15)
    except:
        pass
    else:
        assert False, 'MoM shoud NOT create a service endpoint'
コード例 #27
0
def test_mom_when_mom_agent_bounced():
    """Launch an app from MoM and restart the node MoM is on."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    mom_ip = common.ip_of_mom()
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        shakedown.restart_agent(mom_ip)

        @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0]['id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
コード例 #28
0
def is_mom_ee_deployed():
    mom_ee_id = '/{}'.format(MOM_EE_NAME)
    client = marathon.create_client()
    apps = client.get_apps()
    return any(app['id'] == mom_ee_id for app in apps)
コード例 #29
0
def clean_up_marathon(parent_group="/"):
    client = marathon.create_client()

    response = client.remove_group(parent_group, force=True)
    deployment_wait(deployment_id=response["deploymentId"])
コード例 #30
0
ファイル: common.py プロジェクト: wesleyhuang2014/marathon
def clean_up_marathon():
    client = marathon.create_client()
    client.remove_group("/", force=True)
    deployment_wait()
コード例 #31
0
def marathon_version():
    client = marathon.create_client()
    about = client.get_about()
    # 1.3.9 or 1.4.0-RC8
    return LooseVersion(about.get("version"))
コード例 #32
0
ファイル: common.py プロジェクト: guenter/marathon
def delete_all_groups():
    client = marathon.create_client()
    groups = client.get_groups()
    for group in groups:
        client.remove_group(group["id"])
コード例 #33
0
def delete_all_groups():
    client = marathon.create_client()
    groups = client.get_groups()
    for group in groups:
        client.remove_group(group["id"])
コード例 #34
0
ファイル: common.py プロジェクト: guenter/marathon
def marathon_version():
    client = marathon.create_client()
    about = client.get_about()
    # 1.3.9 or 1.4.0-RC8
    return LooseVersion(about.get("version"))