def test_mom_with_network_failure_bounce_master():
    """Marathon on Marathon (MoM) tests for DC/OS with network failures simulated by knocking out ports."""

    # get MoM ip
    mom_ip = common.ip_of_mom()
    logger.info("MoM IP: {}".format(mom_ip))

    app_def = apps.sleep_app()
    app_id = app_def["id"]

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.wait_for_task("marathon-user", app_id.lstrip('/'))
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]["id"]
        task_ip = tasks[0]['host']
        logger.info("\nTask IP: " + task_ip)

    # PR for network partitioning in shakedown makes this better
    # take out the net
    partition_agent(mom_ip)
    partition_agent(task_ip)

    # wait for a min
    time.sleep(timedelta(minutes=1).total_seconds())

    # bounce master
    shakedown.run_command_on_master("sudo systemctl restart dcos-mesos-master")

    # bring the net up
    reconnect_agent(mom_ip)
    reconnect_agent(task_ip)

    time.sleep(timedelta(minutes=1).total_seconds())
    common.wait_for_service_endpoint('marathon-user',
                                     timedelta(minutes=10).total_seconds(),
                                     path="ping")

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        shakedown.wait_for_task("marathon-user", app_id.lstrip('/'),
                                timedelta(minutes=10).total_seconds())

        @retrying.retry(wait_fixed=1000,
                        stop_max_attempt_number=30,
                        retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0][
                'id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
Ejemplo n.º 2
0
def is_mom_version(version):
    same_version = False
    max_times = 10
    check_complete = False
    while not check_complete:
        try:
            max_times == 1
            with shakedown.marathon_on_marathon():
                client = marathon.create_client()
                about = client.get_about()
                same_version = version == about.get("version")
                check_complete = True
        except DCOSException:
            # if marathon doesn't exist yet
            pass
            return False
        except Exception as e:
            if max_times > 0:
                pass
                # this failure only happens at very high scale
                # it takes a lot of time to recover
                wait_for_service_endpoint('marathon-user', 600)
            else:
                return False
    return same_version
Ejemplo n.º 3
0
def marathon_service_name():

    shakedown.wait_for_service_endpoint('marathon-user', timedelta(minutes=5).total_seconds())
    with shakedown.marathon_on_marathon():
        yield 'marathon-user'
        shakedown.wait_for_service_endpoint('marathon-user', timedelta(minutes=5).total_seconds())
        clear_marathon()
Ejemplo n.º 4
0
def is_mom_version(version):
    same_version = False
    max_times = 10
    check_complete = False
    while not check_complete:
        try:
            max_times == 1
            with shakedown.marathon_on_marathon():
                client = marathon.create_client()
                about = client.get_about()
                same_version = version == about.get("version")
                check_complete = True
        except DCOSException:
            # if marathon doesn't exist yet
            pass
            return False
        except Exception as e:
            if max_times > 0:
                pass
                # this failure only happens at very high scale
                # it takes a lot of time to recover
                wait_for_service_endpoint('marathon-user', 600)
            else:
                return False
    return same_version
def test_mom_when_mom_agent_bounced():
    """Launch an app from MoM and restart the node MoM is on."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    mom_ip = common.ip_of_mom()
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        common.deployment_wait(service_id=app_id)
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        shakedown.restart_agent(mom_ip)

        @retrying.retry(wait_fixed=1000,
                        stop_max_attempt_number=30,
                        retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0][
                'id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
Ejemplo n.º 6
0
def test_mom_when_mom_process_killed():
    """Launched a task from MoM then killed MoM."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        shakedown.kill_process_on_host(common.ip_of_mom(), 'marathon-assembly')
        shakedown.wait_for_task('marathon', 'marathon-user', 300)
        shakedown.wait_for_service_endpoint('marathon-user')

        @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0]['id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
Ejemplo n.º 7
0
def marathon_service_name():

    common.ensure_mom()
    with shakedown.marathon_on_marathon():
        yield 'marathon-user'
        shakedown.wait_for_service_endpoint('marathon-user')
        clear_marathon()
def marathon_service_name():

    shakedown.wait_for_service_endpoint('marathon-user', timedelta(minutes=5).total_seconds())
    with shakedown.marathon_on_marathon():
        yield 'marathon-user'
        shakedown.wait_for_service_endpoint('marathon-user', timedelta(minutes=5).total_seconds())
        clear_marathon()
Ejemplo n.º 9
0
def test_mom_when_mom_process_killed():
    """Launched a task from MoM then killed MoM."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        common.kill_process_on_host(common.ip_of_mom(), 'marathon-assembly')
        shakedown.wait_for_task('marathon', 'marathon-user', 300)
        common.wait_for_service_endpoint('marathon-user', path="ping")

        @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0]['id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
Ejemplo n.º 10
0
def marathon_service_name():

    common.ensure_mom()
    with shakedown.marathon_on_marathon():
        yield 'marathon-user'
        shakedown.wait_for_service_endpoint('marathon-user')
        clear_marathon()
def setup_module(module):
    common.ensure_mom()
    shakedown.wait_for_service_endpoint('marathon-user',
                                        timedelta(minutes=5).total_seconds())
    common.cluster_info()
    with shakedown.marathon_on_marathon():
        clear_marathon()
Ejemplo n.º 12
0
def test_mom_with_network_failure_bounce_master():
    """Marathon on Marathon (MoM) tests for DC/OS with network failures simulated by knocking out ports."""

    # get MoM ip
    mom_ip = common.ip_of_mom()
    print("MoM IP: {}".format(mom_ip))

    app_def = apps.sleep_app()
    app_id = app_def["id"]

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.wait_for_task("marathon-user", app_id.lstrip('/'))
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]["id"]
        task_ip = tasks[0]['host']
        print("\nTask IP: " + task_ip)

    # PR for network partitioning in shakedown makes this better
    # take out the net
    partition_agent(mom_ip)
    partition_agent(task_ip)

    # wait for a min
    time.sleep(timedelta(minutes=1).total_seconds())

    # bounce master
    shakedown.run_command_on_master("sudo systemctl restart dcos-mesos-master")

    # bring the net up
    reconnect_agent(mom_ip)
    reconnect_agent(task_ip)

    time.sleep(timedelta(minutes=1).total_seconds())
    shakedown.wait_for_service_endpoint('marathon-user', timedelta(minutes=10).total_seconds())

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        shakedown.wait_for_task("marathon-user", app_id.lstrip('/'), timedelta(minutes=10).total_seconds())

        @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0]['id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
def test_mom_with_network_failure_bounce_master():
    """Marathon on Marathon (MoM) tests for DC/OS with network failures simulated by
    knocking out ports
    """

    # get MoM ip
    mom_ip = ip_of_mom()
    print("MoM IP: {}".format(mom_ip))

    app_def = get_resource("{}/large-sleep.json".format(fixture_dir()))

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.wait_for_task("marathon-user", "sleep")
        tasks = client.get_tasks('sleep')
        original_sleep_task_id = tasks[0]["id"]
        task_ip = tasks[0]['host']
        print("\nTask IP: " + task_ip)

    # PR for network partitioning in shakedown makes this better
    # take out the net
    partition_agent(mom_ip)
    partition_agent(task_ip)

    # wait for a min
    time.sleep(timedelta(minutes=1).total_seconds())

    # bounce master
    shakedown.run_command_on_master("sudo systemctl restart dcos-mesos-master")

    # bring the net up
    reconnect_agent(mom_ip)
    reconnect_agent(task_ip)

    time.sleep(timedelta(minutes=1).total_seconds())
    shakedown.wait_for_service_endpoint('marathon-user',
                                        timedelta(minutes=10).total_seconds())

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        shakedown.wait_for_task("marathon-user", "sleep",
                                timedelta(minutes=10).total_seconds())
        tasks = client.get_tasks('sleep')
        current_sleep_task_id = tasks[0]["id"]

    assert current_sleep_task_id == original_sleep_task_id, "Task ID shouldn't change"
Ejemplo n.º 14
0
def wait_for_marathon_user_and_cleanup():
    print("entering wait_for_marathon_user_and_cleanup fixture")
    shakedown.wait_for_service_endpoint('marathon-user', timedelta(minutes=5).total_seconds())
    with shakedown.marathon_on_marathon():
        yield
        shakedown.wait_for_service_endpoint('marathon-user', timedelta(minutes=5).total_seconds())
        common.clean_up_marathon()
    print("exiting wait_for_marathon_user_and_cleanup fixture")
def test_mom_with_network_failure_bounce_master():
    """Marathon on Marathon (MoM) tests for DC/OS with network failures simulated by
    knocking out ports
    """

    # get MoM ip
    mom_ip = ip_of_mom()
    print("MoM IP: {}".format(mom_ip))

    app_def = get_resource("{}/large-sleep.json".format(fixture_dir()))

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.wait_for_task("marathon-user", "sleep")
        tasks = client.get_tasks('sleep')
        original_sleep_task_id = tasks[0]["id"]
        task_ip = tasks[0]['host']
        print("\nTask IP: " + task_ip)

    # PR for network partitioning in shakedown makes this better
    # take out the net
    partition_agent(mom_ip)
    partition_agent(task_ip)

    # wait for a min
    time.sleep(timedelta(minutes=1).total_seconds())

    # bounce master
    shakedown.run_command_on_master("sudo systemctl restart dcos-mesos-master")

    # bring the net up
    reconnect_agent(mom_ip)
    reconnect_agent(task_ip)

    time.sleep(timedelta(minutes=1).total_seconds())
    shakedown.wait_for_service_endpoint('marathon-user')
    shakedown.wait_for_task("marathon-user", "sleep")

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        shakedown.wait_for_task("marathon-user", "sleep")
        tasks = client.get_tasks('sleep')
        current_sleep_task_id = tasks[0]["id"]

    assert current_sleep_task_id == original_sleep_task_id, "Task ID shouldn't change"
Ejemplo n.º 16
0
def wait_for_marathon_user_and_cleanup():
    common.wait_for_service_endpoint('marathon-user',
                                     timedelta(minutes=5).total_seconds(),
                                     path="ping")
    with shakedown.marathon_on_marathon():
        yield
        common.wait_for_service_endpoint('marathon-user',
                                         timedelta(minutes=5).total_seconds(),
                                         path="ping")
        common.clean_up_marathon()
def teardown_module(module):
    with shakedown.marathon_on_marathon():
        try:
            clear_marathon()
        except:
            pass
    # Uninstall MoM
    shakedown.uninstall_package_and_wait('marathon')
    shakedown.delete_zk_node('universe/marathon-user')
    # Remove everything from root marathon
    clear_marathon()
def teardown_module(module):
    with shakedown.marathon_on_marathon():
        try:
            clear_marathon()
        except:
            pass
    # Uninstall MoM
    shakedown.uninstall_package_and_wait('marathon')
    shakedown.delete_zk_node('universe/marathon-user')
    # Remove everything from root marathon
    clear_marathon()
Ejemplo n.º 19
0
def wait_for_marathon_user_and_cleanup():
    print("entering wait_for_marathon_user_and_cleanup fixture")
    shakedown.wait_for_service_endpoint('marathon-user',
                                        timedelta(minutes=5).total_seconds())
    with shakedown.marathon_on_marathon():
        yield
        shakedown.wait_for_service_endpoint(
            'marathon-user',
            timedelta(minutes=5).total_seconds())
        common.clean_up_marathon()
    print("exiting wait_for_marathon_user_and_cleanup fixture")
Ejemplo n.º 20
0
def teardown_module(module):
    with shakedown.marathon_on_marathon():
        try:
            common.clean_up_marathon()
        except Exception:
            pass

    shakedown.uninstall_package_and_wait('marathon')
    shakedown.delete_zk_node('universe/marathon-user')

    # Remove everything from root marathon
    common.clean_up_marathon()
def simple_sleep_app(name):
    # Deploy a simple sleep app in the MoM-EE
    with shakedown.marathon_on_marathon(name=name):
        client = marathon.create_client()

        app_def = apps.sleep_app()
        client.add_app(app_def)
        shakedown.deployment_wait()

        tasks = shakedown.get_service_task(name, app_def["id"].lstrip("/"))
        print('MoM-EE tasks: {}'.format(tasks))
        return tasks is not None
def remove_mom_ee():
    mom_ee_versions = [('1.4', 'strict'), ('1.4', 'permissive'),
                       ('1.4', 'disabled'), ('1.3', 'strict'),
                       ('1.3', 'permissive'), ('1.3', 'disabled')]
    for mom_ee in mom_ee_versions:
        endpoint = mom_ee_endpoint(mom_ee[0], mom_ee[1])
        if shakedown.service_available_predicate(endpoint):
            print('Removing {}...'.format(endpoint))
            with shakedown.marathon_on_marathon(name=endpoint):
                shakedown.delete_all_apps()

    client = marathon.create_client()
    client.remove_app(MOM_EE_NAME)
    shakedown.deployment_wait()
    print('Successfully removed {}'.format(MOM_EE_NAME))
Ejemplo n.º 23
0
def test_scaling_load(master_count, job_count, single_use: bool, run_delay,
                      cpu_quota, work_duration, mom, external_volume: bool,
                      scenario) -> None:
    """Launch a load test scenario. This does not verify the results
    of the test, but does ensure the instances and jobs were created.

    The installation is run in threads, but the job creation and
    launch is then done serially after all Jenkins instances have
    completed installation.

    Args:
        master_count: Number of Jenkins masters or instances
        job_count: Number of Jobs on each Jenkins master
        single_use: Mesos Single-Use Agent on (true) or off (false)
        run_delay: Jobs should run every X minute(s)
        cpu_quota: CPU quota (0.0 to disable)
        work_duration: Time, in seconds, for generated jobs to sleep
        mom: Marathon on Marathon instance name
        external_volume: External volume on rexray (true) or local volume (false)
    """
    with shakedown.marathon_on_marathon(mom):
        if cpu_quota is not 0.0:
            _setup_quota(SHARED_ROLE, cpu_quota)

    masters = [
        "jenkins{}".format(sdk_utils.random_string())
        for _ in range(0, int(master_count))
    ]
    # launch Jenkins services
    install_threads = _spawn_threads(masters,
                                     _install_jenkins,
                                     external_volume=external_volume,
                                     mom=mom,
                                     daemon=True)
    thread_failures = _wait_and_get_failures(install_threads,
                                             timeout=DEPLOY_TIMEOUT)
    thread_names = [x.name for x in thread_failures]

    # the rest of the commands require a running Jenkins instance
    deployed_masters = [x for x in masters if x not in thread_names]
    job_threads = _spawn_threads(deployed_masters,
                                 _create_jobs,
                                 jobs=job_count,
                                 single=single_use,
                                 delay=run_delay,
                                 duration=work_duration,
                                 scenario=scenario)
    _wait_on_threads(job_threads, JOB_RUN_TIMEOUT)
Ejemplo n.º 24
0
def cluster_info(mom_name='marathon-user'):
    print("DC/OS: {}, in {} mode".format(shakedown.dcos_version(), shakedown.ee_version()))
    agents = shakedown.get_private_agents()
    print("Agents: {}".format(len(agents)))
    client = marathon.create_client()
    about = client.get_about()
    print("Marathon version: {}".format(about.get("version")))

    if shakedown.service_available_predicate(mom_name):
        with shakedown.marathon_on_marathon(mom_name):
            try:
                client = marathon.create_client()
                about = client.get_about()
                print("Marathon MoM version: {}".format(about.get("version")))
            except Exception:
                print("Marathon MoM not present")
    else:
        print("Marathon MoM not present")
def test_framework_unavailable_on_mom():
    """Launches an app that has elements necessary to create a service endpoint in DCOS.
       This test confirms that the endpoint is not created when launched with MoM.
    """

    app_def = apps.fake_framework()
    app_id = app_def["id"]

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        common.deployment_wait(service_id=app_id)
    try:
        common.wait_for_service_endpoint('pyfw', 15)
    except Exception:
        pass
    else:
        assert False, 'MoM shoud NOT create a service endpoint'
Ejemplo n.º 26
0
def cluster_info(mom_name='marathon-user'):
    print("DC/OS: {}, in {} mode".format(shakedown.dcos_version(), shakedown.ee_version()))
    agents = shakedown.get_private_agents()
    print("Agents: {}".format(len(agents)))
    client = marathon.create_client()
    about = client.get_about()
    print("Marathon version: {}".format(about.get("version")))

    if shakedown.service_available_predicate(mom_name):
        with shakedown.marathon_on_marathon(mom_name):
            try:
                client = marathon.create_client()
                about = client.get_about()
                print("Marathon MoM version: {}".format(about.get("version")))
            except Exception:
                print("Marathon MoM not present")
    else:
        print("Marathon MoM not present")
Ejemplo n.º 27
0
def test_framework_unavailable_on_mom():
    """Launches an app that has elements necessary to create a service endpoint in DCOS.
       This test confirms that the endpoint is not created when launched with MoM.
    """

    app_def = apps.fake_framework()

    with shakedown.marathon_on_marathon():
        common.delete_all_apps_wait()
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()

    try:
        shakedown.wait_for_service_endpoint('pyfw', 15)
    except:
        pass
    else:
        assert False, 'MoM shoud NOT create a service endpoint'
def test_mom_when_mom_process_killed():
    """ Launched a task from MoM then killed MoM.
    """
    app_def = app('agent-failure')
    host = ip_other_than_mom()
    pin_to_host(app_def, host)
    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()
        tasks = client.get_tasks('/agent-failure')
        original_task_id = tasks[0]['id']

        shakedown.kill_process_on_host(ip_of_mom(), 'marathon-assembly')
        shakedown.wait_for_task('marathon', 'marathon-user', 300)
        shakedown.wait_for_service_endpoint('marathon-user')

        tasks = client.get_tasks('/agent-failure')
        tasks[0]['id'] == original_task_id
def test_mom_when_mom_process_killed():
    """ Launched a task from MoM then killed MoM.
    """
    app_def = app('agent-failure')
    host = ip_other_than_mom()
    pin_to_host(app_def, host)
    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()
        tasks = client.get_tasks('/agent-failure')
        original_task_id = tasks[0]['id']

        shakedown.kill_process_on_host(ip_of_mom(), 'marathon-assembly')
        shakedown.wait_for_task('marathon', 'marathon-user', 300)
        shakedown.wait_for_service_endpoint('marathon-user')

        tasks = client.get_tasks('/agent-failure')
        tasks[0]['id'] == original_task_id
Ejemplo n.º 30
0
def cluster_info(mom_name='marathon-user'):
    agents = get_private_agents()
    print("agents: {}".format(len(agents)))
    client = marathon.create_client()
    about = client.get_about()
    print("marathon version: {}".format(about.get("version")))
    # see if there is a MoM

    if service_available_predicate(mom_name):
        with shakedown.marathon_on_marathon(mom_name):
            try:
                client = marathon.create_client()
                about = client.get_about()
                print("marathon MoM version: {}".format(about.get("version")))

            except Exception as e:
                print("Marathon MoM not present")
    else:
        print("Marathon MoM not present")
Ejemplo n.º 31
0
def cluster_info(mom_name='marathon-user'):
    agents = get_private_agents()
    print("agents: {}".format(len(agents)))
    client = marathon.create_client()
    about = client.get_about()
    print("marathon version: {}".format(about.get("version")))
    # see if there is a MoM

    if service_available_predicate(mom_name):
        with shakedown.marathon_on_marathon(mom_name):
            try:
                client = marathon.create_client()
                about = client.get_about()
                print("marathon MoM version: {}".format(about.get("version")))

            except Exception as e:
                print("Marathon MoM not present")
    else:
        print("Marathon MoM not present")
def test_mom_when_mom_agent_bounced():
    """ Launch an app from MoM and restart the node MoM is on.
    """
    app_def = app('agent-failure')
    mom_ip = ip_of_mom()
    host = ip_other_than_mom()
    pin_to_host(app_def, host)
    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()
        tasks = client.get_tasks('/agent-failure')
        original_task_id = tasks[0]['id']

        shakedown.restart_agent(mom_ip)

        @retrying.retry(wait_fixed=1000, stop_max_delay=3000)
        def check_task_is_back():
            tasks = client.get_tasks('/agent-failure')
            tasks[0]['id'] == original_task_id
def test_mom_when_mom_agent_bounced():
    """ Launch an app from MoM and restart the node MoM is on.
    """
    app_def = app('agent-failure')
    mom_ip = ip_of_mom()
    host = ip_other_than_mom()
    pin_to_host(app_def, host)
    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()
        tasks = client.get_tasks('/agent-failure')
        original_task_id = tasks[0]['id']

        shakedown.restart_agent(mom_ip)

        @retrying.retry(wait_fixed=1000, stop_max_delay=3000)
        def check_task_is_back():
            tasks = client.get_tasks('/agent-failure')
            tasks[0]['id'] == original_task_id
def test_framework_unavailable_on_mom():
    """ Launches an app that has elements necessary to create a service endpoint in DCOS.
        This test confirms that the endpoint is not created when launched with MoM.
    """
    if shakedown.service_available_predicate('pyfw'):
        client = marathon.create_client()
        client.remove_app('python-http', True)
        shakedown.deployment_wait()
        shakedown.wait_for_service_endpoint_removal('pyfw')

    with shakedown.marathon_on_marathon():
        delete_all_apps_wait()
        client = marathon.create_client()
        client.add_app(common.fake_framework_app())
        shakedown.deployment_wait()

    try:
        shakedown.wait_for_service_endpoint('pyfw', 15)
        assert False, 'MoM shoud NOT create a service endpoint'
    except:
        assert True
        pass
def test_framework_unavailable_on_mom():
    """ Launches an app that has elements necessary to create a service endpoint in DCOS.
        This test confirms that the endpoint is not created when launched with MoM.
    """
    if shakedown.service_available_predicate('pyfw'):
        client = marathon.create_client()
        client.remove_app('python-http', True)
        shakedown.deployment_wait()
        shakedown.wait_for_service_endpoint_removal('pyfw')

    with shakedown.marathon_on_marathon():
        delete_all_apps_wait()
        client = marathon.create_client()
        client.add_app(common.fake_framework_app())
        shakedown.deployment_wait()

    try:
        shakedown.wait_for_service_endpoint('pyfw', 15)
        assert False, 'MoM shoud NOT create a service endpoint'
    except:
        assert True
        pass
Ejemplo n.º 36
0
def test_mom_when_mom_agent_bounced():
    """Launch an app from MoM and restart the node MoM is on."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    mom_ip = common.ip_of_mom()
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with shakedown.marathon_on_marathon():
        client = marathon.create_client()
        client.add_app(app_def)
        shakedown.deployment_wait()
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        shakedown.restart_agent(mom_ip)

        @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0]['id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
Ejemplo n.º 37
0
def setup_module(module):
    common.ensure_mom()
    common.cluster_info()
    with shakedown.marathon_on_marathon():
        common.clean_up_marathon()
Ejemplo n.º 38
0
def setup_module(module):
    common.ensure_mom()
    common.wait_for_marathon_up('marathon-user')
    common.cluster_info()
    with shakedown.marathon_on_marathon():
        clear_marathon()
Ejemplo n.º 39
0
def test_scaling_load(
    master_count,
    job_count,
    single_use: bool,
    run_delay,
    cpu_quota,
    memory_quota,
    work_duration,
    mom,
    external_volume: bool,
    scenario,
    min_index,
    max_index,
    batch_size,
    enforce_quota_guarantee,
    enforce_quota_limit,
    create_framework: bool,
    create_jobs: bool,
) -> None:

    """Launch a load test scenario. This does not verify the results
    of the test, but does ensure the instances and jobs were created.

    The installation is run in threads, but the job creation and
    launch is then done serially after all Jenkins instances have
    completed installation.

    Args:
        master_count: Number of Jenkins masters or instances
        job_count: Number of Jobs on each Jenkins master
        single_use: Mesos Single-Use Agent on (true) or off (false)
        run_delay: Jobs should run every X minute(s)
        cpu_quota: CPU quota (0.0 to disable)
        work_duration: Time, in seconds, for generated jobs to sleep
        mom: Marathon on Marathon instance name
        external_volume: External volume on rexray (true) or local volume (false)
        min_index: minimum index to begin jenkins suffixes at
        max_index: maximum index to end jenkins suffixes at
        batch_size: batch size to deploy jenkins instances in
    """
    security_mode = sdk_dcos.get_security_mode()
    # DELETEME@kjoshi get rid of these two after verification
    # _setup_quota(SHARED_ROLE, cpu_quota, memory_quota)
    if mom and cpu_quota != 0.0 and memory_quota != 0.0:
        with shakedown.marathon_on_marathon(mom):
            _setup_quota(SHARED_ROLE, cpu_quota, memory_quota)

    # create marathon client
    if mom:
        _configure_admin_router(mom, SHARED_ROLE)
        with shakedown.marathon_on_marathon(mom):
            marathon_client = shakedown.marathon.create_client()
    else:
        marathon_client = shakedown.marathon.create_client()
    
    # figure out the range of masters we want to create
    if min_index == -1 or max_index == -1:
        min_index = 0
        max_index = master_count - 1

    masters = ["jenkins/jenkins{}".format(index) for index in range(min_index, max_index)]
    successful_deployments = set(masters)

    # create service accounts in parallel
    sdk_security.install_enterprise_cli()

    security_mode = sdk_dcos.get_security_mode()

    if create_framework:
        log.info(
            "\n\nCreating service accounts for: [{}]\n\n".format(successful_deployments)
        )
        service_account_creation_failures = _create_service_accounts_stage(
            masters, min_index, max_index, batch_size, security_mode
        )
        log.info(
            "\n\nService account failures: [{}]\n\n".format(
                service_account_creation_failures
            )
        )

        successful_deployments -= service_account_creation_failures
        log.info(
            "\n\nCreating jenkins frameworks for: [{}]\n\n".format(
                successful_deployments
            )
        )

        install_jenkins_failures = _install_jenkins_stage(
            [x for x in successful_deployments],
            min_index,
            max_index,
            batch_size,
            security_mode,
            marathon_client,
            external_volume,
            mom,
        )
        log.info(
            "\n\nJenkins framework creation failures: [{}]\n\n".format(
                install_jenkins_failures
            )
        )
        successful_deployments -= install_jenkins_failures

    if create_jobs:
        log.info(
            "\n\nCreating jenkins jobs for: [{}]\n\n".format(successful_deployments)
        )

        job_creation_failures = _create_jobs_stage(
            [x for x in successful_deployments],
            min_index,
            max_index,
            batch_size,
            security_mode,
            marathon_client,
            external_volume,
            mom,
            job_count,
            single_use,
            run_delay,
            work_duration,
            scenario,
        )
        successful_deployments -= job_creation_failures

    log.info("\n\nAll masters to deploy: [{}]\n\n".format(",".join(masters)))
    log.info(
        "\n\nSuccessful Jenkins deployments: [{}]\n\n".format(successful_deployments)
    )
    log.info(
        "\n\nFailed Jenkins deployments: [{}]\n\n".format(
            set(masters) - successful_deployments
        )
    )
    log.info("Timings: {}".format(json.dumps(TIMINGS)))
Ejemplo n.º 40
0
def test_scaling_load(master_count, job_count, single_use: bool, run_delay,
                      cpu_quota, work_duration, mom, external_volume: bool,
                      scenario, min_index, max_index, batch_size) -> None:
    """Launch a load test scenario. This does not verify the results
    of the test, but does ensure the instances and jobs were created.

    The installation is run in threads, but the job creation and
    launch is then done serially after all Jenkins instances have
    completed installation.

    Args:
        master_count: Number of Jenkins masters or instances
        job_count: Number of Jobs on each Jenkins master
        single_use: Mesos Single-Use Agent on (true) or off (false)
        run_delay: Jobs should run every X minute(s)
        cpu_quota: CPU quota (0.0 to disable)
        work_duration: Time, in seconds, for generated jobs to sleep
        mom: Marathon on Marathon instance name
        external_volume: External volume on rexray (true) or local volume (false)
        min_index: minimum index to begin jenkins suffixes at
        max_index: maximum index to end jenkins suffixes at
        batch_size: batch size to deploy jenkins instances in
    """
    security_mode = sdk_dcos.get_security_mode()
    if mom and cpu_quota != 0.0:
        with shakedown.marathon_on_marathon(mom):
            _setup_quota(SHARED_ROLE, cpu_quota)

    # create marathon client
    if mom:
        with shakedown.marathon_on_marathon(mom):
            marathon_client = shakedown.marathon.create_client()
    else:
        marathon_client = shakedown.marathon.create_client()

    masters = []
    if min_index == -1 or max_index == -1:
        masters = [
            "jenkins{}".format(sdk_utils.random_string())
            for _ in range(0, int(master_count))
        ]
    else:
        #max and min indexes are specified
        #NOTE: using min/max will override master count
        masters = [
            "jenkins{}".format(index) for index in range(min_index, max_index)
        ]
    # create service accounts in parallel
    sdk_security.install_enterprise_cli()
    service_account_threads = _spawn_threads(masters,
                                             _create_service_accounts,
                                             security=security_mode)

    thread_failures = _wait_and_get_failures(service_account_threads,
                                             timeout=SERVICE_ACCOUNT_TIMEOUT)
    # launch Jenkins services
    current = 0
    end = max_index - min_index
    while current + batch_size <= end:

        log.info(
            "Re-authenticating current batch load of jenkins{} - jenkins{} "
            "to prevent auth-timeouts on scale cluster.".format(
                current, current + batch_size))
        dcos_login.login_session()

        batched_masters = masters[current:current + batch_size]
        install_threads = _spawn_threads(batched_masters,
                                         _install_jenkins,
                                         event='deployments',
                                         client=marathon_client,
                                         external_volume=external_volume,
                                         security=security_mode,
                                         daemon=True,
                                         mom=mom)
        thread_failures = _wait_and_get_failures(install_threads,
                                                 timeout=DEPLOY_TIMEOUT)
        thread_names = [x.name for x in thread_failures]

        # the rest of the commands require a running Jenkins instance
        deployed_masters = [
            x for x in batched_masters if x not in thread_names
        ]
        job_threads = _spawn_threads(deployed_masters,
                                     _create_jobs,
                                     jobs=job_count,
                                     single=single_use,
                                     delay=run_delay,
                                     duration=work_duration,
                                     scenario=scenario)
        _wait_on_threads(job_threads, JOB_RUN_TIMEOUT)
        r = json.dumps(TIMINGS)
        print(r)
        current = current + batch_size
Ejemplo n.º 41
0
def setup_module(module):
    common.ensure_mom()
    common.wait_for_marathon_up('marathon-user')
    common.cluster_info()
    with shakedown.marathon_on_marathon():
        clear_marathon()
def setup_module(module):
    common.ensure_mom()
    shakedown.wait_for_service_endpoint('marathon-user', timedelta(minutes=5).total_seconds())
    common.cluster_info()
    with shakedown.marathon_on_marathon():
        clear_marathon()
Ejemplo n.º 43
0
def setup_module(module):
    common.ensure_mom()
    common.cluster_info()
    with shakedown.marathon_on_marathon():
        common.clean_up_marathon()