def install_app_from_file(app_name: str, app_def_path: str) -> (bool, str):
    """
    Installs a marathon app using the path to an app definition.

    Args:
        app_def_path: Path to app definition

    Returns:
        (bool, str) tuple: Boolean indicates success of install attempt. String indicates
        error message if install attempt failed.
    """

    cmd = "marathon app add {}".format(app_def_path)
    log.info("Running %s", cmd)
    rc, stdout, stderr = sdk_cmd.run_raw_cli(cmd)

    if rc or stderr:
        log.error("returncode=%s stdout=%s stderr=%s", rc, stdout, stderr)
        return False, stderr

    if "Created deployment" not in stdout:
        stderr = "'Created deployment' not in STDOUT"
        log.error(stderr)
        return False, stderr

    log.info("Waiting for app %s to be running...", app_name)
    shakedown.wait_for_task("marathon", app_name, TIMEOUT_SECONDS)
    return True, ""
def test_mom_when_mom_process_killed():
    """Launched a task from MoM then killed MoM."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        shakedown.kill_process_on_host(common.ip_of_mom(), 'marathon-assembly')
        shakedown.wait_for_task('marathon', 'marathon-user', 300)
        shakedown.wait_for_service_endpoint('marathon-user')

        @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0]['id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
Example #3
0
def install_app_from_file(app_name: str, app_def_path: str) -> (bool, str):
    """
    Installs a marathon app using the path to an app definition.

    Args:
        app_def_path: Path to app definition

    Returns:
        (bool, str) tuple: Boolean indicates success of install attempt. String indicates
        error message if install attempt failed.
    """

    cmd = "marathon app add {}".format(app_def_path)
    log.info("Running %s", cmd)
    rc, stdout, stderr = sdk_cmd.run_raw_cli(cmd)

    if rc or stderr:
        log.error("returncode=%s stdout=%s stderr=%s", rc, stdout, stderr)
        return False, stderr

    if "Created deployment" not in stdout:
        stderr = "'Created deployment' not in STDOUT"
        log.error(stderr)
        return False, stderr

    log.info("Waiting for app %s to be running...", app_name)
    shakedown.wait_for_task("marathon", app_name)
    return True, ""
Example #4
0
def test_mom_when_mom_process_killed():
    """Launched a task from MoM then killed MoM."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        common.kill_process_on_host(common.ip_of_mom(), 'marathon-assembly')
        shakedown.wait_for_task('marathon', 'marathon-user', 300)
        common.wait_for_service_endpoint('marathon-user', path="ping")

        @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0]['id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
def test_mom_with_network_failure_bounce_master():
    """Marathon on Marathon (MoM) tests for DC/OS with network failures simulated by knocking out ports."""

    # get MoM ip
    mom_ip = common.ip_of_mom()
    logger.info("MoM IP: {}".format(mom_ip))

    app_def = apps.sleep_app()
    app_id = app_def["id"]

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.wait_for_task("marathon-user", app_id.lstrip('/'))
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]["id"]
        task_ip = tasks[0]['host']
        logger.info("\nTask IP: " + task_ip)

    # PR for network partitioning in shakedown makes this better
    # take out the net
    partition_agent(mom_ip)
    partition_agent(task_ip)

    # wait for a min
    time.sleep(timedelta(minutes=1).total_seconds())

    # bounce master
    shakedown.run_command_on_master("sudo systemctl restart dcos-mesos-master")

    # bring the net up
    reconnect_agent(mom_ip)
    reconnect_agent(task_ip)

    time.sleep(timedelta(minutes=1).total_seconds())
    common.wait_for_service_endpoint('marathon-user',
                                     timedelta(minutes=10).total_seconds(),
                                     path="ping")

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        shakedown.wait_for_task("marathon-user", app_id.lstrip('/'),
                                timedelta(minutes=10).total_seconds())

        @retrying.retry(wait_fixed=1000,
                        stop_max_attempt_number=30,
                        retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0][
                'id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
def test_mom_with_network_failure_bounce_master():
    """Marathon on Marathon (MoM) tests for DC/OS with network failures simulated by
    knocking out ports
    """

    # get MoM ip
    mom_ip = ip_of_mom()
    print("MoM IP: {}".format(mom_ip))

    app_def = get_resource("{}/large-sleep.json".format(fixture_dir()))

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.wait_for_task("marathon-user", "sleep")
        tasks = client.get_tasks('sleep')
        original_sleep_task_id = tasks[0]["id"]
        task_ip = tasks[0]['host']
        print("\nTask IP: " + task_ip)

    # PR for network partitioning in shakedown makes this better
    # take out the net
    partition_agent(mom_ip)
    partition_agent(task_ip)

    # wait for a min
    time.sleep(timedelta(minutes=1).total_seconds())

    # bounce master
    shakedown.run_command_on_master("sudo systemctl restart dcos-mesos-master")

    # bring the net up
    reconnect_agent(mom_ip)
    reconnect_agent(task_ip)

    time.sleep(timedelta(minutes=1).total_seconds())
    shakedown.wait_for_service_endpoint('marathon-user',
                                        timedelta(minutes=10).total_seconds())

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        shakedown.wait_for_task("marathon-user", "sleep",
                                timedelta(minutes=10).total_seconds())
        tasks = client.get_tasks('sleep')
        current_sleep_task_id = tasks[0]["id"]

    assert current_sleep_task_id == original_sleep_task_id, "Task ID shouldn't change"
def test_mom_with_network_failure_bounce_master():
    """Marathon on Marathon (MoM) tests for DC/OS with network failures simulated by knocking out ports."""

    # get MoM ip
    mom_ip = common.ip_of_mom()
    print("MoM IP: {}".format(mom_ip))

    app_def = apps.sleep_app()
    app_id = app_def["id"]

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.wait_for_task("marathon-user", app_id.lstrip('/'))
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]["id"]
        task_ip = tasks[0]['host']
        print("\nTask IP: " + task_ip)

    # PR for network partitioning in shakedown makes this better
    # take out the net
    partition_agent(mom_ip)
    partition_agent(task_ip)

    # wait for a min
    time.sleep(timedelta(minutes=1).total_seconds())

    # bounce master
    shakedown.run_command_on_master("sudo systemctl restart dcos-mesos-master")

    # bring the net up
    reconnect_agent(mom_ip)
    reconnect_agent(task_ip)

    time.sleep(timedelta(minutes=1).total_seconds())
    shakedown.wait_for_service_endpoint('marathon-user', timedelta(minutes=10).total_seconds())

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        shakedown.wait_for_task("marathon-user", app_id.lstrip('/'), timedelta(minutes=10).total_seconds())

        @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0]['id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
def test_mom_with_network_failure_bounce_master():
    """Marathon on Marathon (MoM) tests for DC/OS with network failures simulated by
    knocking out ports
    """

    # get MoM ip
    mom_ip = ip_of_mom()
    print("MoM IP: {}".format(mom_ip))

    app_def = get_resource("{}/large-sleep.json".format(fixture_dir()))

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.wait_for_task("marathon-user", "sleep")
        tasks = client.get_tasks('sleep')
        original_sleep_task_id = tasks[0]["id"]
        task_ip = tasks[0]['host']
        print("\nTask IP: " + task_ip)

    # PR for network partitioning in shakedown makes this better
    # take out the net
    partition_agent(mom_ip)
    partition_agent(task_ip)

    # wait for a min
    time.sleep(timedelta(minutes=1).total_seconds())

    # bounce master
    shakedown.run_command_on_master("sudo systemctl restart dcos-mesos-master")

    # bring the net up
    reconnect_agent(mom_ip)
    reconnect_agent(task_ip)

    time.sleep(timedelta(minutes=1).total_seconds())
    shakedown.wait_for_service_endpoint('marathon-user', timedelta(minutes=10).total_seconds())

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        shakedown.wait_for_task("marathon-user", "sleep", timedelta(minutes=10).total_seconds())
        tasks = client.get_tasks('sleep')
        current_sleep_task_id = tasks[0]["id"]

    assert current_sleep_task_id == original_sleep_task_id, "Task ID shouldn't change"
def test_mom_when_mom_process_killed():
    """ Launched a task from MoM then killed MoM.
    """
    app_def = app('agent-failure')
    host = ip_other_than_mom()
    pin_to_host(app_def, host)
    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()
        tasks = client.get_tasks('/agent-failure')
        original_task_id = tasks[0]['id']

        shakedown.kill_process_on_host(ip_of_mom(), 'marathon-assembly')
        shakedown.wait_for_task('marathon', 'marathon-user', 300)
        shakedown.wait_for_service_endpoint('marathon-user')

        tasks = client.get_tasks('/agent-failure')
        tasks[0]['id'] == original_task_id
Example #10
0
def install_app_from_file(app_name: str, app_def_path: str) -> (bool, str):
    """
    Installs a marathon app using the path to an app definition.

    Args:
        app_def_path: Path to app definition

    Returns:
        (bool, str) tuple: Boolean indicates success of install attempt. String indicates
        error message if install attempt failed.
    """
    output = sdk_cmd.run_cli("{cmd} {file_path}".format(
        cmd="marathon app add ", file_path=app_def_path))
    if "Created deployment" not in output:
        return 1, output

    log.info("Waiting for app to be running...")
    shakedown.wait_for_task("marathon", app_name)
    return 0, ""
def test_mom_when_mom_process_killed():
    """ Launched a task from MoM then killed MoM.
    """
    app_def = app('agent-failure')
    host = ip_other_than_mom()
    pin_to_host(app_def, host)
    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()
        tasks = client.get_tasks('/agent-failure')
        original_task_id = tasks[0]['id']

        shakedown.kill_process_on_host(ip_of_mom(), 'marathon-assembly')
        shakedown.wait_for_task('marathon', 'marathon-user', 300)
        shakedown.wait_for_service_endpoint('marathon-user')

        tasks = client.get_tasks('/agent-failure')
        tasks[0]['id'] == original_task_id
Example #12
0
def test_mom_with_network_failure():
    """Marathon on Marathon (MoM) tests for DC/OS with network failures
    simulated by knocking out ports
    """

    # get MoM ip
    mom_ip = ip_of_mom()
    print("MoM IP: {}".format(mom_ip))

    app_def = get_resource("{}/large-sleep.json".format(fixture_dir()))

    with marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.wait_for_task("marathon-user", "sleep")
        tasks = client.get_tasks('sleep')
        original_sleep_task_id = tasks[0]["id"]
        task_ip = tasks[0]['host']

    # PR for network partitioning in shakedown makes this better
    # take out the net
    partition_agent(mom_ip)
    partition_agent(task_ip)

    # wait for a min
    service_delay()

    # bring the net up
    reconnect_agent(mom_ip)
    reconnect_agent(task_ip)

    service_delay()
    shakedown.wait_for_service_endpoint(PACKAGE_APP_ID)
    shakedown.wait_for_task("marathon-user", "sleep")

    with marathon_on_marathon():
        client = marathon.create_client()
        shakedown.wait_for_task("marathon-user", "sleep")
        tasks = client.get_tasks('sleep')
        current_sleep_task_id = tasks[0]["id"]

    assert current_sleep_task_id == original_sleep_task_id, "Task ID shouldn't change"