def test_mom_with_network_failure_bounce_master(): """Marathon on Marathon (MoM) tests for DC/OS with network failures simulated by knocking out ports.""" # get MoM ip mom_ip = common.ip_of_mom() logger.info("MoM IP: {}".format(mom_ip)) app_def = apps.sleep_app() app_id = app_def["id"] with shakedown.marathon_on_marathon(): client = marathon.create_client() client.add_app(app_def) shakedown.wait_for_task("marathon-user", app_id.lstrip('/')) tasks = client.get_tasks(app_id) original_task_id = tasks[0]["id"] task_ip = tasks[0]['host'] logger.info("\nTask IP: " + task_ip) # PR for network partitioning in shakedown makes this better # take out the net partition_agent(mom_ip) partition_agent(task_ip) # wait for a min time.sleep(timedelta(minutes=1).total_seconds()) # bounce master shakedown.run_command_on_master("sudo systemctl restart dcos-mesos-master") # bring the net up reconnect_agent(mom_ip) reconnect_agent(task_ip) time.sleep(timedelta(minutes=1).total_seconds()) common.wait_for_service_endpoint('marathon-user', timedelta(minutes=10).total_seconds(), path="ping") with shakedown.marathon_on_marathon(): client = marathon.create_client() shakedown.wait_for_task("marathon-user", app_id.lstrip('/'), timedelta(minutes=10).total_seconds()) @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_task_is_back(): tasks = client.get_tasks(app_id) assert tasks[0][ 'id'] == original_task_id, "The task ID has changed" check_task_is_back()
def is_mom_version(version): same_version = False max_times = 10 check_complete = False while not check_complete: try: max_times == 1 with shakedown.marathon_on_marathon(): client = marathon.create_client() about = client.get_about() same_version = version == about.get("version") check_complete = True except DCOSException: # if marathon doesn't exist yet pass return False except Exception as e: if max_times > 0: pass # this failure only happens at very high scale # it takes a lot of time to recover wait_for_service_endpoint('marathon-user', 600) else: return False return same_version
def marathon_service_name(): shakedown.wait_for_service_endpoint('marathon-user', timedelta(minutes=5).total_seconds()) with shakedown.marathon_on_marathon(): yield 'marathon-user' shakedown.wait_for_service_endpoint('marathon-user', timedelta(minutes=5).total_seconds()) clear_marathon()
def is_mom_version(version): same_version = False max_times = 10 check_complete = False while not check_complete: try: max_times == 1 with shakedown.marathon_on_marathon(): client = marathon.create_client() about = client.get_about() same_version = version == about.get("version") check_complete = True except DCOSException: # if marathon doesn't exist yet pass return False except Exception as e: if max_times > 0: pass # this failure only happens at very high scale # it takes a lot of time to recover wait_for_service_endpoint('marathon-user', 600) else: return False return same_version
def test_mom_when_mom_agent_bounced(): """Launch an app from MoM and restart the node MoM is on.""" app_def = apps.sleep_app() app_id = app_def["id"] mom_ip = common.ip_of_mom() host = common.ip_other_than_mom() common.pin_to_host(app_def, host) with shakedown.marathon_on_marathon(): client = marathon.create_client() client.add_app(app_def) common.deployment_wait(service_id=app_id) tasks = client.get_tasks(app_id) original_task_id = tasks[0]['id'] shakedown.restart_agent(mom_ip) @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_task_is_back(): tasks = client.get_tasks(app_id) assert tasks[0][ 'id'] == original_task_id, "The task ID has changed" check_task_is_back()
def test_mom_when_mom_process_killed(): """Launched a task from MoM then killed MoM.""" app_def = apps.sleep_app() app_id = app_def["id"] host = common.ip_other_than_mom() common.pin_to_host(app_def, host) with shakedown.marathon_on_marathon(): client = marathon.create_client() client.add_app(app_def) shakedown.deployment_wait() tasks = client.get_tasks(app_id) original_task_id = tasks[0]['id'] shakedown.kill_process_on_host(common.ip_of_mom(), 'marathon-assembly') shakedown.wait_for_task('marathon', 'marathon-user', 300) shakedown.wait_for_service_endpoint('marathon-user') @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_task_is_back(): tasks = client.get_tasks(app_id) assert tasks[0]['id'] == original_task_id, "The task ID has changed" check_task_is_back()
def marathon_service_name(): common.ensure_mom() with shakedown.marathon_on_marathon(): yield 'marathon-user' shakedown.wait_for_service_endpoint('marathon-user') clear_marathon()
def marathon_service_name(): shakedown.wait_for_service_endpoint('marathon-user', timedelta(minutes=5).total_seconds()) with shakedown.marathon_on_marathon(): yield 'marathon-user' shakedown.wait_for_service_endpoint('marathon-user', timedelta(minutes=5).total_seconds()) clear_marathon()
def test_mom_when_mom_process_killed(): """Launched a task from MoM then killed MoM.""" app_def = apps.sleep_app() app_id = app_def["id"] host = common.ip_other_than_mom() common.pin_to_host(app_def, host) with shakedown.marathon_on_marathon(): client = marathon.create_client() client.add_app(app_def) shakedown.deployment_wait() tasks = client.get_tasks(app_id) original_task_id = tasks[0]['id'] common.kill_process_on_host(common.ip_of_mom(), 'marathon-assembly') shakedown.wait_for_task('marathon', 'marathon-user', 300) common.wait_for_service_endpoint('marathon-user', path="ping") @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_task_is_back(): tasks = client.get_tasks(app_id) assert tasks[0]['id'] == original_task_id, "The task ID has changed" check_task_is_back()
def marathon_service_name(): common.ensure_mom() with shakedown.marathon_on_marathon(): yield 'marathon-user' shakedown.wait_for_service_endpoint('marathon-user') clear_marathon()
def setup_module(module): common.ensure_mom() shakedown.wait_for_service_endpoint('marathon-user', timedelta(minutes=5).total_seconds()) common.cluster_info() with shakedown.marathon_on_marathon(): clear_marathon()
def test_mom_with_network_failure_bounce_master(): """Marathon on Marathon (MoM) tests for DC/OS with network failures simulated by knocking out ports.""" # get MoM ip mom_ip = common.ip_of_mom() print("MoM IP: {}".format(mom_ip)) app_def = apps.sleep_app() app_id = app_def["id"] with shakedown.marathon_on_marathon(): client = marathon.create_client() client.add_app(app_def) shakedown.wait_for_task("marathon-user", app_id.lstrip('/')) tasks = client.get_tasks(app_id) original_task_id = tasks[0]["id"] task_ip = tasks[0]['host'] print("\nTask IP: " + task_ip) # PR for network partitioning in shakedown makes this better # take out the net partition_agent(mom_ip) partition_agent(task_ip) # wait for a min time.sleep(timedelta(minutes=1).total_seconds()) # bounce master shakedown.run_command_on_master("sudo systemctl restart dcos-mesos-master") # bring the net up reconnect_agent(mom_ip) reconnect_agent(task_ip) time.sleep(timedelta(minutes=1).total_seconds()) shakedown.wait_for_service_endpoint('marathon-user', timedelta(minutes=10).total_seconds()) with shakedown.marathon_on_marathon(): client = marathon.create_client() shakedown.wait_for_task("marathon-user", app_id.lstrip('/'), timedelta(minutes=10).total_seconds()) @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_task_is_back(): tasks = client.get_tasks(app_id) assert tasks[0]['id'] == original_task_id, "The task ID has changed" check_task_is_back()
def test_mom_with_network_failure_bounce_master(): """Marathon on Marathon (MoM) tests for DC/OS with network failures simulated by knocking out ports """ # get MoM ip mom_ip = ip_of_mom() print("MoM IP: {}".format(mom_ip)) app_def = get_resource("{}/large-sleep.json".format(fixture_dir())) with shakedown.marathon_on_marathon(): client = marathon.create_client() client.add_app(app_def) shakedown.wait_for_task("marathon-user", "sleep") tasks = client.get_tasks('sleep') original_sleep_task_id = tasks[0]["id"] task_ip = tasks[0]['host'] print("\nTask IP: " + task_ip) # PR for network partitioning in shakedown makes this better # take out the net partition_agent(mom_ip) partition_agent(task_ip) # wait for a min time.sleep(timedelta(minutes=1).total_seconds()) # bounce master shakedown.run_command_on_master("sudo systemctl restart dcos-mesos-master") # bring the net up reconnect_agent(mom_ip) reconnect_agent(task_ip) time.sleep(timedelta(minutes=1).total_seconds()) shakedown.wait_for_service_endpoint('marathon-user', timedelta(minutes=10).total_seconds()) with shakedown.marathon_on_marathon(): client = marathon.create_client() shakedown.wait_for_task("marathon-user", "sleep", timedelta(minutes=10).total_seconds()) tasks = client.get_tasks('sleep') current_sleep_task_id = tasks[0]["id"] assert current_sleep_task_id == original_sleep_task_id, "Task ID shouldn't change"
def wait_for_marathon_user_and_cleanup(): print("entering wait_for_marathon_user_and_cleanup fixture") shakedown.wait_for_service_endpoint('marathon-user', timedelta(minutes=5).total_seconds()) with shakedown.marathon_on_marathon(): yield shakedown.wait_for_service_endpoint('marathon-user', timedelta(minutes=5).total_seconds()) common.clean_up_marathon() print("exiting wait_for_marathon_user_and_cleanup fixture")
def test_mom_with_network_failure_bounce_master(): """Marathon on Marathon (MoM) tests for DC/OS with network failures simulated by knocking out ports """ # get MoM ip mom_ip = ip_of_mom() print("MoM IP: {}".format(mom_ip)) app_def = get_resource("{}/large-sleep.json".format(fixture_dir())) with shakedown.marathon_on_marathon(): client = marathon.create_client() client.add_app(app_def) shakedown.wait_for_task("marathon-user", "sleep") tasks = client.get_tasks('sleep') original_sleep_task_id = tasks[0]["id"] task_ip = tasks[0]['host'] print("\nTask IP: " + task_ip) # PR for network partitioning in shakedown makes this better # take out the net partition_agent(mom_ip) partition_agent(task_ip) # wait for a min time.sleep(timedelta(minutes=1).total_seconds()) # bounce master shakedown.run_command_on_master("sudo systemctl restart dcos-mesos-master") # bring the net up reconnect_agent(mom_ip) reconnect_agent(task_ip) time.sleep(timedelta(minutes=1).total_seconds()) shakedown.wait_for_service_endpoint('marathon-user') shakedown.wait_for_task("marathon-user", "sleep") with shakedown.marathon_on_marathon(): client = marathon.create_client() shakedown.wait_for_task("marathon-user", "sleep") tasks = client.get_tasks('sleep') current_sleep_task_id = tasks[0]["id"] assert current_sleep_task_id == original_sleep_task_id, "Task ID shouldn't change"
def wait_for_marathon_user_and_cleanup(): common.wait_for_service_endpoint('marathon-user', timedelta(minutes=5).total_seconds(), path="ping") with shakedown.marathon_on_marathon(): yield common.wait_for_service_endpoint('marathon-user', timedelta(minutes=5).total_seconds(), path="ping") common.clean_up_marathon()
def teardown_module(module): with shakedown.marathon_on_marathon(): try: clear_marathon() except: pass # Uninstall MoM shakedown.uninstall_package_and_wait('marathon') shakedown.delete_zk_node('universe/marathon-user') # Remove everything from root marathon clear_marathon()
def teardown_module(module): with shakedown.marathon_on_marathon(): try: clear_marathon() except: pass # Uninstall MoM shakedown.uninstall_package_and_wait('marathon') shakedown.delete_zk_node('universe/marathon-user') # Remove everything from root marathon clear_marathon()
def wait_for_marathon_user_and_cleanup(): print("entering wait_for_marathon_user_and_cleanup fixture") shakedown.wait_for_service_endpoint('marathon-user', timedelta(minutes=5).total_seconds()) with shakedown.marathon_on_marathon(): yield shakedown.wait_for_service_endpoint( 'marathon-user', timedelta(minutes=5).total_seconds()) common.clean_up_marathon() print("exiting wait_for_marathon_user_and_cleanup fixture")
def teardown_module(module): with shakedown.marathon_on_marathon(): try: common.clean_up_marathon() except Exception: pass shakedown.uninstall_package_and_wait('marathon') shakedown.delete_zk_node('universe/marathon-user') # Remove everything from root marathon common.clean_up_marathon()
def simple_sleep_app(name): # Deploy a simple sleep app in the MoM-EE with shakedown.marathon_on_marathon(name=name): client = marathon.create_client() app_def = apps.sleep_app() client.add_app(app_def) shakedown.deployment_wait() tasks = shakedown.get_service_task(name, app_def["id"].lstrip("/")) print('MoM-EE tasks: {}'.format(tasks)) return tasks is not None
def remove_mom_ee(): mom_ee_versions = [('1.4', 'strict'), ('1.4', 'permissive'), ('1.4', 'disabled'), ('1.3', 'strict'), ('1.3', 'permissive'), ('1.3', 'disabled')] for mom_ee in mom_ee_versions: endpoint = mom_ee_endpoint(mom_ee[0], mom_ee[1]) if shakedown.service_available_predicate(endpoint): print('Removing {}...'.format(endpoint)) with shakedown.marathon_on_marathon(name=endpoint): shakedown.delete_all_apps() client = marathon.create_client() client.remove_app(MOM_EE_NAME) shakedown.deployment_wait() print('Successfully removed {}'.format(MOM_EE_NAME))
def test_scaling_load(master_count, job_count, single_use: bool, run_delay, cpu_quota, work_duration, mom, external_volume: bool, scenario) -> None: """Launch a load test scenario. This does not verify the results of the test, but does ensure the instances and jobs were created. The installation is run in threads, but the job creation and launch is then done serially after all Jenkins instances have completed installation. Args: master_count: Number of Jenkins masters or instances job_count: Number of Jobs on each Jenkins master single_use: Mesos Single-Use Agent on (true) or off (false) run_delay: Jobs should run every X minute(s) cpu_quota: CPU quota (0.0 to disable) work_duration: Time, in seconds, for generated jobs to sleep mom: Marathon on Marathon instance name external_volume: External volume on rexray (true) or local volume (false) """ with shakedown.marathon_on_marathon(mom): if cpu_quota is not 0.0: _setup_quota(SHARED_ROLE, cpu_quota) masters = [ "jenkins{}".format(sdk_utils.random_string()) for _ in range(0, int(master_count)) ] # launch Jenkins services install_threads = _spawn_threads(masters, _install_jenkins, external_volume=external_volume, mom=mom, daemon=True) thread_failures = _wait_and_get_failures(install_threads, timeout=DEPLOY_TIMEOUT) thread_names = [x.name for x in thread_failures] # the rest of the commands require a running Jenkins instance deployed_masters = [x for x in masters if x not in thread_names] job_threads = _spawn_threads(deployed_masters, _create_jobs, jobs=job_count, single=single_use, delay=run_delay, duration=work_duration, scenario=scenario) _wait_on_threads(job_threads, JOB_RUN_TIMEOUT)
def cluster_info(mom_name='marathon-user'): print("DC/OS: {}, in {} mode".format(shakedown.dcos_version(), shakedown.ee_version())) agents = shakedown.get_private_agents() print("Agents: {}".format(len(agents))) client = marathon.create_client() about = client.get_about() print("Marathon version: {}".format(about.get("version"))) if shakedown.service_available_predicate(mom_name): with shakedown.marathon_on_marathon(mom_name): try: client = marathon.create_client() about = client.get_about() print("Marathon MoM version: {}".format(about.get("version"))) except Exception: print("Marathon MoM not present") else: print("Marathon MoM not present")
def test_framework_unavailable_on_mom(): """Launches an app that has elements necessary to create a service endpoint in DCOS. This test confirms that the endpoint is not created when launched with MoM. """ app_def = apps.fake_framework() app_id = app_def["id"] with shakedown.marathon_on_marathon(): client = marathon.create_client() client.add_app(app_def) common.deployment_wait(service_id=app_id) try: common.wait_for_service_endpoint('pyfw', 15) except Exception: pass else: assert False, 'MoM shoud NOT create a service endpoint'
def cluster_info(mom_name='marathon-user'): print("DC/OS: {}, in {} mode".format(shakedown.dcos_version(), shakedown.ee_version())) agents = shakedown.get_private_agents() print("Agents: {}".format(len(agents))) client = marathon.create_client() about = client.get_about() print("Marathon version: {}".format(about.get("version"))) if shakedown.service_available_predicate(mom_name): with shakedown.marathon_on_marathon(mom_name): try: client = marathon.create_client() about = client.get_about() print("Marathon MoM version: {}".format(about.get("version"))) except Exception: print("Marathon MoM not present") else: print("Marathon MoM not present")
def test_framework_unavailable_on_mom(): """Launches an app that has elements necessary to create a service endpoint in DCOS. This test confirms that the endpoint is not created when launched with MoM. """ app_def = apps.fake_framework() with shakedown.marathon_on_marathon(): common.delete_all_apps_wait() client = marathon.create_client() client.add_app(app_def) shakedown.deployment_wait() try: shakedown.wait_for_service_endpoint('pyfw', 15) except: pass else: assert False, 'MoM shoud NOT create a service endpoint'
def test_mom_when_mom_process_killed(): """ Launched a task from MoM then killed MoM. """ app_def = app('agent-failure') host = ip_other_than_mom() pin_to_host(app_def, host) with shakedown.marathon_on_marathon(): client = marathon.create_client() client.add_app(app_def) shakedown.deployment_wait() tasks = client.get_tasks('/agent-failure') original_task_id = tasks[0]['id'] shakedown.kill_process_on_host(ip_of_mom(), 'marathon-assembly') shakedown.wait_for_task('marathon', 'marathon-user', 300) shakedown.wait_for_service_endpoint('marathon-user') tasks = client.get_tasks('/agent-failure') tasks[0]['id'] == original_task_id
def test_mom_when_mom_process_killed(): """ Launched a task from MoM then killed MoM. """ app_def = app('agent-failure') host = ip_other_than_mom() pin_to_host(app_def, host) with shakedown.marathon_on_marathon(): client = marathon.create_client() client.add_app(app_def) shakedown.deployment_wait() tasks = client.get_tasks('/agent-failure') original_task_id = tasks[0]['id'] shakedown.kill_process_on_host(ip_of_mom(), 'marathon-assembly') shakedown.wait_for_task('marathon', 'marathon-user', 300) shakedown.wait_for_service_endpoint('marathon-user') tasks = client.get_tasks('/agent-failure') tasks[0]['id'] == original_task_id
def cluster_info(mom_name='marathon-user'): agents = get_private_agents() print("agents: {}".format(len(agents))) client = marathon.create_client() about = client.get_about() print("marathon version: {}".format(about.get("version"))) # see if there is a MoM if service_available_predicate(mom_name): with shakedown.marathon_on_marathon(mom_name): try: client = marathon.create_client() about = client.get_about() print("marathon MoM version: {}".format(about.get("version"))) except Exception as e: print("Marathon MoM not present") else: print("Marathon MoM not present")
def cluster_info(mom_name='marathon-user'): agents = get_private_agents() print("agents: {}".format(len(agents))) client = marathon.create_client() about = client.get_about() print("marathon version: {}".format(about.get("version"))) # see if there is a MoM if service_available_predicate(mom_name): with shakedown.marathon_on_marathon(mom_name): try: client = marathon.create_client() about = client.get_about() print("marathon MoM version: {}".format(about.get("version"))) except Exception as e: print("Marathon MoM not present") else: print("Marathon MoM not present")
def test_mom_when_mom_agent_bounced(): """ Launch an app from MoM and restart the node MoM is on. """ app_def = app('agent-failure') mom_ip = ip_of_mom() host = ip_other_than_mom() pin_to_host(app_def, host) with shakedown.marathon_on_marathon(): client = marathon.create_client() client.add_app(app_def) shakedown.deployment_wait() tasks = client.get_tasks('/agent-failure') original_task_id = tasks[0]['id'] shakedown.restart_agent(mom_ip) @retrying.retry(wait_fixed=1000, stop_max_delay=3000) def check_task_is_back(): tasks = client.get_tasks('/agent-failure') tasks[0]['id'] == original_task_id
def test_mom_when_mom_agent_bounced(): """ Launch an app from MoM and restart the node MoM is on. """ app_def = app('agent-failure') mom_ip = ip_of_mom() host = ip_other_than_mom() pin_to_host(app_def, host) with shakedown.marathon_on_marathon(): client = marathon.create_client() client.add_app(app_def) shakedown.deployment_wait() tasks = client.get_tasks('/agent-failure') original_task_id = tasks[0]['id'] shakedown.restart_agent(mom_ip) @retrying.retry(wait_fixed=1000, stop_max_delay=3000) def check_task_is_back(): tasks = client.get_tasks('/agent-failure') tasks[0]['id'] == original_task_id
def test_framework_unavailable_on_mom(): """ Launches an app that has elements necessary to create a service endpoint in DCOS. This test confirms that the endpoint is not created when launched with MoM. """ if shakedown.service_available_predicate('pyfw'): client = marathon.create_client() client.remove_app('python-http', True) shakedown.deployment_wait() shakedown.wait_for_service_endpoint_removal('pyfw') with shakedown.marathon_on_marathon(): delete_all_apps_wait() client = marathon.create_client() client.add_app(common.fake_framework_app()) shakedown.deployment_wait() try: shakedown.wait_for_service_endpoint('pyfw', 15) assert False, 'MoM shoud NOT create a service endpoint' except: assert True pass
def test_framework_unavailable_on_mom(): """ Launches an app that has elements necessary to create a service endpoint in DCOS. This test confirms that the endpoint is not created when launched with MoM. """ if shakedown.service_available_predicate('pyfw'): client = marathon.create_client() client.remove_app('python-http', True) shakedown.deployment_wait() shakedown.wait_for_service_endpoint_removal('pyfw') with shakedown.marathon_on_marathon(): delete_all_apps_wait() client = marathon.create_client() client.add_app(common.fake_framework_app()) shakedown.deployment_wait() try: shakedown.wait_for_service_endpoint('pyfw', 15) assert False, 'MoM shoud NOT create a service endpoint' except: assert True pass
def test_mom_when_mom_agent_bounced(): """Launch an app from MoM and restart the node MoM is on.""" app_def = apps.sleep_app() app_id = app_def["id"] mom_ip = common.ip_of_mom() host = common.ip_other_than_mom() common.pin_to_host(app_def, host) with shakedown.marathon_on_marathon(): client = marathon.create_client() client.add_app(app_def) shakedown.deployment_wait() tasks = client.get_tasks(app_id) original_task_id = tasks[0]['id'] shakedown.restart_agent(mom_ip) @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_task_is_back(): tasks = client.get_tasks(app_id) assert tasks[0]['id'] == original_task_id, "The task ID has changed" check_task_is_back()
def setup_module(module): common.ensure_mom() common.cluster_info() with shakedown.marathon_on_marathon(): common.clean_up_marathon()
def setup_module(module): common.ensure_mom() common.wait_for_marathon_up('marathon-user') common.cluster_info() with shakedown.marathon_on_marathon(): clear_marathon()
def test_scaling_load( master_count, job_count, single_use: bool, run_delay, cpu_quota, memory_quota, work_duration, mom, external_volume: bool, scenario, min_index, max_index, batch_size, enforce_quota_guarantee, enforce_quota_limit, create_framework: bool, create_jobs: bool, ) -> None: """Launch a load test scenario. This does not verify the results of the test, but does ensure the instances and jobs were created. The installation is run in threads, but the job creation and launch is then done serially after all Jenkins instances have completed installation. Args: master_count: Number of Jenkins masters or instances job_count: Number of Jobs on each Jenkins master single_use: Mesos Single-Use Agent on (true) or off (false) run_delay: Jobs should run every X minute(s) cpu_quota: CPU quota (0.0 to disable) work_duration: Time, in seconds, for generated jobs to sleep mom: Marathon on Marathon instance name external_volume: External volume on rexray (true) or local volume (false) min_index: minimum index to begin jenkins suffixes at max_index: maximum index to end jenkins suffixes at batch_size: batch size to deploy jenkins instances in """ security_mode = sdk_dcos.get_security_mode() # DELETEME@kjoshi get rid of these two after verification # _setup_quota(SHARED_ROLE, cpu_quota, memory_quota) if mom and cpu_quota != 0.0 and memory_quota != 0.0: with shakedown.marathon_on_marathon(mom): _setup_quota(SHARED_ROLE, cpu_quota, memory_quota) # create marathon client if mom: _configure_admin_router(mom, SHARED_ROLE) with shakedown.marathon_on_marathon(mom): marathon_client = shakedown.marathon.create_client() else: marathon_client = shakedown.marathon.create_client() # figure out the range of masters we want to create if min_index == -1 or max_index == -1: min_index = 0 max_index = master_count - 1 masters = ["jenkins/jenkins{}".format(index) for index in range(min_index, max_index)] successful_deployments = set(masters) # create service accounts in parallel sdk_security.install_enterprise_cli() security_mode = sdk_dcos.get_security_mode() if create_framework: log.info( "\n\nCreating service accounts for: [{}]\n\n".format(successful_deployments) ) service_account_creation_failures = _create_service_accounts_stage( masters, min_index, max_index, batch_size, security_mode ) log.info( "\n\nService account failures: [{}]\n\n".format( service_account_creation_failures ) ) successful_deployments -= service_account_creation_failures log.info( "\n\nCreating jenkins frameworks for: [{}]\n\n".format( successful_deployments ) ) install_jenkins_failures = _install_jenkins_stage( [x for x in successful_deployments], min_index, max_index, batch_size, security_mode, marathon_client, external_volume, mom, ) log.info( "\n\nJenkins framework creation failures: [{}]\n\n".format( install_jenkins_failures ) ) successful_deployments -= install_jenkins_failures if create_jobs: log.info( "\n\nCreating jenkins jobs for: [{}]\n\n".format(successful_deployments) ) job_creation_failures = _create_jobs_stage( [x for x in successful_deployments], min_index, max_index, batch_size, security_mode, marathon_client, external_volume, mom, job_count, single_use, run_delay, work_duration, scenario, ) successful_deployments -= job_creation_failures log.info("\n\nAll masters to deploy: [{}]\n\n".format(",".join(masters))) log.info( "\n\nSuccessful Jenkins deployments: [{}]\n\n".format(successful_deployments) ) log.info( "\n\nFailed Jenkins deployments: [{}]\n\n".format( set(masters) - successful_deployments ) ) log.info("Timings: {}".format(json.dumps(TIMINGS)))
def test_scaling_load(master_count, job_count, single_use: bool, run_delay, cpu_quota, work_duration, mom, external_volume: bool, scenario, min_index, max_index, batch_size) -> None: """Launch a load test scenario. This does not verify the results of the test, but does ensure the instances and jobs were created. The installation is run in threads, but the job creation and launch is then done serially after all Jenkins instances have completed installation. Args: master_count: Number of Jenkins masters or instances job_count: Number of Jobs on each Jenkins master single_use: Mesos Single-Use Agent on (true) or off (false) run_delay: Jobs should run every X minute(s) cpu_quota: CPU quota (0.0 to disable) work_duration: Time, in seconds, for generated jobs to sleep mom: Marathon on Marathon instance name external_volume: External volume on rexray (true) or local volume (false) min_index: minimum index to begin jenkins suffixes at max_index: maximum index to end jenkins suffixes at batch_size: batch size to deploy jenkins instances in """ security_mode = sdk_dcos.get_security_mode() if mom and cpu_quota != 0.0: with shakedown.marathon_on_marathon(mom): _setup_quota(SHARED_ROLE, cpu_quota) # create marathon client if mom: with shakedown.marathon_on_marathon(mom): marathon_client = shakedown.marathon.create_client() else: marathon_client = shakedown.marathon.create_client() masters = [] if min_index == -1 or max_index == -1: masters = [ "jenkins{}".format(sdk_utils.random_string()) for _ in range(0, int(master_count)) ] else: #max and min indexes are specified #NOTE: using min/max will override master count masters = [ "jenkins{}".format(index) for index in range(min_index, max_index) ] # create service accounts in parallel sdk_security.install_enterprise_cli() service_account_threads = _spawn_threads(masters, _create_service_accounts, security=security_mode) thread_failures = _wait_and_get_failures(service_account_threads, timeout=SERVICE_ACCOUNT_TIMEOUT) # launch Jenkins services current = 0 end = max_index - min_index while current + batch_size <= end: log.info( "Re-authenticating current batch load of jenkins{} - jenkins{} " "to prevent auth-timeouts on scale cluster.".format( current, current + batch_size)) dcos_login.login_session() batched_masters = masters[current:current + batch_size] install_threads = _spawn_threads(batched_masters, _install_jenkins, event='deployments', client=marathon_client, external_volume=external_volume, security=security_mode, daemon=True, mom=mom) thread_failures = _wait_and_get_failures(install_threads, timeout=DEPLOY_TIMEOUT) thread_names = [x.name for x in thread_failures] # the rest of the commands require a running Jenkins instance deployed_masters = [ x for x in batched_masters if x not in thread_names ] job_threads = _spawn_threads(deployed_masters, _create_jobs, jobs=job_count, single=single_use, delay=run_delay, duration=work_duration, scenario=scenario) _wait_on_threads(job_threads, JOB_RUN_TIMEOUT) r = json.dumps(TIMINGS) print(r) current = current + batch_size
def setup_module(module): common.ensure_mom() common.wait_for_marathon_up('marathon-user') common.cluster_info() with shakedown.marathon_on_marathon(): clear_marathon()
def setup_module(module): common.ensure_mom() shakedown.wait_for_service_endpoint('marathon-user', timedelta(minutes=5).total_seconds()) common.cluster_info() with shakedown.marathon_on_marathon(): clear_marathon()
def setup_module(module): common.ensure_mom() common.cluster_info() with shakedown.marathon_on_marathon(): common.clean_up_marathon()