def verify_shared_executor(pod_name, expected_files=['essential', 'nonessential'], delete_files=True): '''verify that both tasks share the same executor: - matching ExecutorInfo - both 'essential' and 'nonessential' present in shared-volume/ across both tasks ''' tasks = sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'pod info {}'.format(pod_name), json=True) assert len(tasks) == 2 # check that the task executors all match executor = tasks[0]['info']['executor'] for task in tasks[1:]: assert executor == task['info']['executor'] # for each task, check shared volume content matches what's expected task_names = [task['info']['name'] for task in tasks] for task_name in task_names: # 1.9 just uses the host filesystem in 'task exec', so use 'task ls' across the board instead filenames = sdk_cmd.run_cli('task ls {} shared-volume/'.format(task_name)).strip().split() assert set(expected_files) == set(filenames) # delete files from volume in preparation for a following task relaunch if delete_files: if sdk_utils.dcos_version_less_than("1.10"): # 1.9 just uses the host filesystem in 'task exec', so figure out the absolute volume path manually expected_file_path = sdk_tasks.task_exec( task_names[0], 'find /var/lib/mesos/slave/volumes -iname ' + filenames[0])[1].strip() # volume dir is parent of the expected file path. volume_dir = os.path.dirname(expected_file_path) else: # 1.10+ works correctly: path is relative to sandbox volume_dir = 'shared-volume/' sdk_tasks.task_exec(task_names[0], 'rm ' + ' '.join([os.path.join(volume_dir, name) for name in filenames]))
def configure_package(configure_security): try: foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) sdk_install.uninstall(config.PACKAGE_NAME, foldered_name) if sdk_utils.dcos_version_less_than("1.9"): # Last beta-kafka release (1.1.25-0.10.1.0-beta) excludes 1.8. Skip upgrade tests with 1.8 and just install sdk_install.install( config.PACKAGE_NAME, foldered_name, config.DEFAULT_BROKER_COUNT, additional_options={"service": { "name": foldered_name }}) else: sdk_upgrade.test_upgrade(config.PACKAGE_NAME, foldered_name, config.DEFAULT_BROKER_COUNT, additional_options={ "service": { "name": foldered_name }, "brokers": { "cpus": 0.5 } }) # wait for brokers to finish registering before starting tests test_utils.broker_count_check(config.DEFAULT_BROKER_COUNT, service_name=foldered_name) yield # let the test session execute finally: sdk_install.uninstall(config.PACKAGE_NAME, foldered_name)
def task_exec(task_name: str, cmd: str, return_stderr_in_stdout: bool = False) -> tuple: """ Invokes the given command on the task via `dcos task exec`. :param task_name: Name of task to run command on. :param cmd: The command to execute. :return: a tuple consisting of the task exec's return code, stdout, and stderr """ if cmd.startswith("./") and sdk_utils.dcos_version_less_than("1.10"): full_cmd = os.path.join(get_task_sandbox_path(task_name), cmd) if cmd.startswith("./bootstrap"): # On 1.9 we need to set LIB_PROCESS_IP for bootstrap full_cmd = "bash -c \"LIBPROCESS_IP=0.0.0.0 {}\"".format(full_cmd) else: full_cmd = cmd exec_cmd = "task exec {task_name} {cmd}".format(task_name=task_name, cmd=full_cmd) rc, stdout, stderr = run_raw_cli(exec_cmd) if return_stderr_in_stdout: return rc, stdout + "\n" + stderr return rc, stdout, stderr
def configure_package(configure_security): try: sdk_install.uninstall(config.PACKAGE_NAME, foldered_name) if sdk_utils.dcos_version_less_than("1.9"): # HDFS upgrade in 1.8 is not supported. sdk_install.install( config.PACKAGE_NAME, foldered_name, config.DEFAULT_TASK_COUNT, additional_options={"service": {"name": foldered_name}}, timeout_seconds=30 * 60, ) else: sdk_upgrade.test_upgrade( config.PACKAGE_NAME, foldered_name, config.DEFAULT_TASK_COUNT, from_options={"service": {"name": foldered_name}}, timeout_seconds=30 * 60, ) yield # let the test session execute finally: sdk_install.uninstall(config.PACKAGE_NAME, foldered_name)
def configure_package(configure_security): try: foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) sdk_install.uninstall(config.PACKAGE_NAME, foldered_name) if sdk_utils.dcos_version_less_than("1.9"): # HDFS upgrade in 1.8 is not supported. sdk_install.install( config.PACKAGE_NAME, foldered_name, config.DEFAULT_TASK_COUNT, additional_options={"service": { "name": foldered_name }}, timeout_seconds=30 * 60) else: sdk_upgrade.test_upgrade( config.PACKAGE_NAME, foldered_name, config.DEFAULT_TASK_COUNT, additional_options={"service": { "name": foldered_name }}, timeout_seconds=30 * 60) yield # let the test session execute finally: sdk_install.uninstall(config.PACKAGE_NAME, foldered_name)
def install_kafka(use_v0=False): mesos_api_version = "V0" if use_v0 else "V1" foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) if sdk_utils.dcos_version_less_than("1.9"): # Last beta-kafka release (1.1.25-0.10.1.0-beta) excludes 1.8. Skip upgrade tests with 1.8 and just install sdk_install.install(config.PACKAGE_NAME, foldered_name, config.DEFAULT_BROKER_COUNT, additional_options={ "service": { "name": foldered_name, "mesos_api_version": mesos_api_version } }) else: sdk_upgrade.test_upgrade(config.PACKAGE_NAME, foldered_name, config.DEFAULT_BROKER_COUNT, additional_options={ "service": { "name": foldered_name, "mesos_api_version": mesos_api_version }, "brokers": { "cpus": 0.5 } })
def _upgrade_or_downgrade(package_name, to_package_version, service_name, running_task_count, additional_options, timeout_seconds, wait_for_deployment): initial_config = get_config(package_name, service_name) task_ids = sdk_tasks.get_task_ids(service_name, '') if sdk_utils.dcos_version_less_than( "1.10") or shakedown.ee_version() is None: log.info('Using marathon upgrade flow to upgrade {} {}'.format( package_name, to_package_version)) sdk_marathon.destroy_app(service_name) sdk_install.install(package_name, service_name, running_task_count, additional_options=additional_options, package_version=to_package_version, timeout_seconds=timeout_seconds, wait_for_deployment=wait_for_deployment) else: log.info('Using CLI upgrade flow to upgrade {} {}'.format( package_name, to_package_version)) if additional_options: with tempfile.NamedTemporaryFile() as opts_f: opts_f.write(json.dumps(additional_options).encode('utf-8')) opts_f.flush( ) # ensure json content is available for the CLI to read below sdk_cmd.svc_cli( package_name, service_name, 'update start --package-version={} --options={}'.format( to_package_version, opts_f.name)) else: sdk_cmd.svc_cli( package_name, service_name, 'update start --package-version={}'.format(to_package_version)) # we must manually upgrade the package CLI because it's not done automatically in this flow # (and why should it? that'd imply the package CLI replacing itself via a call to the main CLI...) sdk_cmd.run_cli( 'package install --yes --cli --package-version={} {}'.format( to_package_version, package_name)) if wait_for_deployment: updated_config = get_config(package_name, service_name) if updated_config == initial_config: log.info( 'No config change detected. Tasks should not be restarted') sdk_tasks.check_tasks_not_updated(service_name, '', task_ids) else: log.info('Checking that all tasks have restarted') sdk_tasks.check_tasks_updated(service_name, '', task_ids) # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected # total task count via ONCE tasks, without actually completing deployment log.info( "Waiting for package={} service={} to finish deployment plan...". format(package_name, service_name)) sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds)
def pytest_runtest_setup(item): min_version_mark = item.get_marker('dcos_min_version') if min_version_mark: min_version = min_version_mark.args[0] message = 'Feature only supported in DC/OS {} and up'.format(min_version) if 'reason' in min_version_mark.kwargs: message += ': {}'.format(min_version_mark.kwargs['reason']) if sdk_utils.dcos_version_less_than(min_version): pytest.skip(message)
def setup_service_account(service_name: str, service_account_secret: str = None) -> dict: """ Setup the service account for TLS. If the account or secret of the specified name already exists, these are deleted. """ if sdk_utils.is_open_dcos(): log.error( "The setup of a service account requires DC/OS EE. service_name=%s", service_name) raise Exception("The setup of a service account requires DC/OS EE") name = service_name secret = name if service_account_secret is None else service_account_secret service_account_info = sdk_security.setup_security( service_name, linux_user="******", service_account=name, service_account_secret=secret) log.info("Adding permissions required for TLS.") if sdk_utils.dcos_version_less_than("1.11"): sdk_cmd.run_cli( "security org groups add_user superusers {name}".format(name=name)) else: acls = [ { "rid": "dcos:secrets:default:/{}/*".format(service_name), "action": "full" }, { "rid": "dcos:secrets:list:default:/{}".format(service_name), "action": "read" }, { "rid": "dcos:adminrouter:ops:ca:rw", "action": "full" }, { "rid": "dcos:adminrouter:ops:ca:ro", "action": "full" }, ] for acl in acls: cmd_list = [ "security", "org", "users", "grant", "--description", "\"Allow provisioning TLS certificates\"", name, acl["rid"], acl["action"] ] sdk_cmd.run_cli(" ".join(cmd_list)) return service_account_info
def setup_service_account( service_name: str, service_account_secret: Optional[str] = None, ) -> Dict[str, Any]: """ Setup the service account for TLS. If the account or secret of the specified name already exists, these are deleted. """ if sdk_utils.is_open_dcos(): log.error("The setup of a service account requires DC/OS EE. service_name=%s", service_name) raise Exception("The setup of a service account requires DC/OS EE") secret = service_name if service_account_secret is None else service_account_secret service_account = "{}-service-account".format(service_name.replace("/", "")) service_account_info = sdk_security.setup_security( service_name, linux_user="******", service_account=service_account, service_account_secret=secret, ) log.info("Adding permissions required for TLS.") if sdk_utils.dcos_version_less_than("1.11"): sdk_cmd.run_cli("security org groups add_user superusers {sa}".format(sa=service_account)) else: acls = [ {"rid": "dcos:secrets:default:/{}/*".format(service_name.strip("/")), "action": "full"}, { "rid": "dcos:secrets:list:default:/{}".format(service_name.strip("/")), "action": "read", }, {"rid": "dcos:adminrouter:ops:ca:rw", "action": "full"}, {"rid": "dcos:adminrouter:ops:ca:ro", "action": "full"}, ] for acl in acls: cmd_list = [ "security", "org", "users", "grant", "--description", '"Allow provisioning TLS certificates"', service_account, acl["rid"], acl["action"], ] sdk_cmd.run_cli(" ".join(cmd_list)) return service_account_info
def test_health_check(kafka_server_with_health_check): if sdk_utils.dcos_version_less_than( "1.12") and kafka_server_with_health_check[ "mode"] == "SASL_SSL" and kafka_server_with_health_check[ "method"] == "FUNCTIONAL": log.info( "Skipping check of Health-Check logs as DC/OS version is less than 1.12." ) return check_health_check_logs()
def teardown_spark(service_name=SPARK_SERVICE_NAME, zk='spark_mesos_dispatcher'): sdk_install.uninstall( SPARK_PACKAGE_NAME, service_name, role=re.escape('*'), service_account='spark-service-acct', zk=zk) if not sdk_utils.dcos_version_less_than('1.10'): # On 1.10+, sdk_uninstall doesn't run janitor. However Spark always needs it for ZK cleanup. sdk_install.retried_run_janitor(service_name, re.escape('*'), 'spark-service-acct', zk)
def uninstall(package_name, service_name): """Uninstalls the specified service from the cluster, and verifies that its resources and framework were correctly cleaned up after the uninstall has completed. Any agents which are expected to have orphaned resources (e.g. due to being shut down) should be passed to ignore_dead_agent() before triggering the uninstall. """ start = time.time() log.info("Uninstalling {}".format(service_name)) # Display current SDK Plan before uninstall, helps with debugging stuck uninstalls log.info("Current plan status for {}".format(service_name)) sdk_cmd.svc_cli(package_name, service_name, "plan status deploy", check=False) try: _retried_uninstall_package_and_wait(package_name, service_name) except Exception: log.exception( "Got exception when uninstalling {}".format(service_name)) raise cleanup_start = time.time() try: if sdk_utils.dcos_version_less_than("1.10"): # 1.9 and earlier: Run janitor to unreserve resources log.info("Janitoring {}".format(service_name)) _retried_run_janitor(service_name) except Exception: log.exception("Got exception when cleaning up {}".format(service_name)) raise finish = time.time() log.info("Uninstalled {} after pkg({}) + cleanup({}) = total({})".format( service_name, sdk_utils.pretty_duration(cleanup_start - start), sdk_utils.pretty_duration(finish - cleanup_start), sdk_utils.pretty_duration(finish - start), )) # Sanity check: Verify that all resources and the framework have been successfully cleaned up, # and throw an exception if anything is left over (uninstall bug?) _verify_completed_uninstall(service_name) # Finally, remove the service from the installed list (used by sdk_diag) global _installed_service_names try: _installed_service_names.remove(service_name) except KeyError: pass # Expected when tests preemptively uninstall at start of test
def _task_exec(task_id_prefix: str, cmd: str, print_output: bool = True) -> Tuple[int, str, str]: if cmd.startswith("./") and sdk_utils.dcos_version_less_than("1.10"): # On 1.9 task exec is run relative to the host filesystem, not the container filesystem full_cmd = os.path.join(get_task_sandbox_path(task_id_prefix), cmd) if cmd.startswith("./bootstrap"): # On 1.9 we also need to set LIBPROCESS_IP for bootstrap full_cmd = 'bash -c "LIBPROCESS_IP=0.0.0.0 {}"'.format(full_cmd) else: full_cmd = cmd return run_cli("task exec {} {}".format(task_id_prefix, cmd), print_output=print_output)
def _task_exec(task_id_prefix: str, cmd: str) -> tuple: if cmd.startswith("./") and sdk_utils.dcos_version_less_than("1.10"): # On 1.9 task exec is run relative to the host filesystem, not the container filesystem full_cmd = os.path.join(get_task_sandbox_path(task_id_prefix), cmd) if cmd.startswith("./bootstrap"): # On 1.9 we also need to set LIB_PROCESS_IP for bootstrap full_cmd = "bash -c \"LIBPROCESS_IP=0.0.0.0 {}\"".format(full_cmd) else: full_cmd = cmd return run_raw_cli("task exec {} {}".format(task_id_prefix, cmd))
def uninstall( package_name, service_name, role=None, service_account=None, zk=None): start = time.time() global _installed_service_names try: _installed_service_names.remove(service_name) except KeyError: pass # allow tests to 'uninstall' up-front log.info('Uninstalling {}'.format(service_name)) try: retried_uninstall_package_and_wait(package_name, service_name=service_name) except Exception as e: log.info('Got exception when uninstalling {}'.format(service_name)) log.info(traceback.format_exc()) raise finally: log.info('Reserved resources post uninstall:') sdk_utils.list_reserved_resources() cleanup_start = time.time() try: if sdk_utils.dcos_version_less_than('1.10'): log.info('Janitoring {}'.format(service_name)) retried_run_janitor(service_name, role, service_account, zk) else: log.info('Waiting for Marathon app to be removed {}'.format(service_name)) sdk_marathon.retried_wait_for_deployment_and_app_removal( sdk_marathon.get_app_id(service_name), timeout=TIMEOUT_SECONDS) except Exception as e: log.info('Got exception when cleaning up {}'.format(service_name)) log.info(traceback.format_exc()) raise finally: log.info('Reserved resources post cleanup:') sdk_utils.list_reserved_resources() finish = time.time() log.info( 'Uninstalled {} after pkg({}) + cleanup({}) = total({})'.format( service_name, shakedown.pretty_duration(cleanup_start - start), shakedown.pretty_duration(finish - cleanup_start), shakedown.pretty_duration(finish - start)))
def uninstall( package_name, service_name, role=None, service_account=None, zk=None): start = time.time() global _installed_service_names try: _installed_service_names.remove(service_name) except KeyError: pass # allow tests to 'uninstall' up-front log.info('Uninstalling {}'.format(service_name)) try: retried_uninstall_package_and_wait(package_name, service_name=service_name) except Exception: log.info('Got exception when uninstalling {}'.format(service_name)) log.info(traceback.format_exc()) raise finally: log.info('Reserved resources post uninstall:') sdk_utils.list_reserved_resources() cleanup_start = time.time() try: if sdk_utils.dcos_version_less_than('1.10'): log.info('Janitoring {}'.format(service_name)) retried_run_janitor(service_name, role, service_account, zk) else: log.info('Waiting for Marathon app to be removed {}'.format(service_name)) sdk_marathon.retried_wait_for_deployment_and_app_removal( sdk_marathon.get_app_id(service_name), timeout=TIMEOUT_SECONDS) except Exception: log.info('Got exception when cleaning up {}'.format(service_name)) log.info(traceback.format_exc()) raise finally: log.info('Reserved resources post cleanup:') sdk_utils.list_reserved_resources() finish = time.time() log.info( 'Uninstalled {} after pkg({}) + cleanup({}) = total({})'.format( service_name, shakedown.pretty_duration(cleanup_start - start), shakedown.pretty_duration(finish - cleanup_start), shakedown.pretty_duration(finish - start)))
def get_cmd_output_pair(): """ In DC/OS prior to version 1.10, task exec does not run the command in the MESOS_SANDBOX directory and this causes the check of the file contents to fail. Here we simply rely on the existence of the file. """ if sdk_utils.dcos_version_less_than("1.10"): cmd = "task ls hello-0-server hello-container-path/toxic-output" output = "" else: cmd = "task exec hello-0-server cat hello-container-path/toxic-output" output = "I'm addicted to you / Don't you know that you're toxic?" return cmd, output
def uninstall(package_name, service_name): '''Uninstalls the specified service from the cluster, and verifies that its resources and framework were correctly cleaned up after the uninstall has completed. Any agents which are expected to have orphaned resources (e.g. due to being shut down) should be passed to ignore_dead_agent() before triggering the uninstall. ''' start = time.time() log.info('Uninstalling {}'.format(service_name)) try: _retried_uninstall_package_and_wait(package_name, service_name=service_name) except Exception: log.exception('Got exception when uninstalling {}'.format(service_name)) raise cleanup_start = time.time() try: if sdk_utils.dcos_version_less_than('1.10'): # 1.9 and earlier: Run janitor to unreserve resources log.info('Janitoring {}'.format(service_name)) _retried_run_janitor(service_name) else: # 1.10 and later: Wait for uninstall scheduler to finish and be removed by Cosmos log.info('Waiting for Marathon app to be removed {}'.format(service_name)) sdk_marathon.retried_wait_for_deployment_and_app_removal( sdk_marathon.get_app_id(service_name), timeout=TIMEOUT_SECONDS) except Exception: log.exception('Got exception when cleaning up {}'.format(service_name)) raise finish = time.time() log.info( 'Uninstalled {} after pkg({}) + cleanup({}) = total({})'.format( service_name, shakedown.pretty_duration(cleanup_start - start), shakedown.pretty_duration(finish - cleanup_start), shakedown.pretty_duration(finish - start))) # Sanity check: Verify that all resources and the framework have been successfully cleaned up, # and throw an exception if anything is left over (uninstall bug?) _verify_completed_uninstall(service_name) # Finally, remove the service from the installed list (used by sdk_diag) global _installed_service_names try: _installed_service_names.remove(service_name) except KeyError: pass # Expected when tests preemptively uninstall at start of test
def verify_shared_executor( pod_name, expected_files=["essential", "nonessential"], delete_files=True ): """verify that both tasks share the same executor: - matching ExecutorInfo - both 'essential' and 'nonessential' present in shared-volume/ across both tasks """ rc, stdout, _ = sdk_cmd.svc_cli( config.PACKAGE_NAME, config.SERVICE_NAME, "pod info {}".format(pod_name), print_output=False ) assert rc == 0, "Pod info failed" try: tasks = json.loads(stdout) except Exception: log.exception("Failed to parse pod info: {}".format(stdout)) assert False, "Failed to parse pod info, see above" assert len(tasks) == 2, "Expected 2 tasks: {}".format(stdout) # check that the task executors all match executor = tasks[0]["info"]["executor"] for task in tasks[1:]: assert executor == task["info"]["executor"] # for each task, check shared volume content matches what's expected task_names = [task["info"]["name"] for task in tasks] for task_name in task_names: # 1.9 just uses the host filesystem in 'task exec', so use 'task ls' across the board instead filenames = sdk_cmd.run_cli("task ls {} shared-volume/".format(task_name))[1].strip().split() assert set(expected_files) == set(filenames) # delete files from volume in preparation for a following task relaunch if delete_files: if sdk_utils.dcos_version_less_than("1.10"): # 1.9 just uses the host filesystem in 'task exec', so figure out the absolute volume path manually expected_file_path = sdk_cmd.service_task_exec( config.SERVICE_NAME, task_names[0], "find /var/lib/mesos/slave/volumes -iname " + filenames[0], )[1].strip() # volume dir is parent of the expected file path. volume_dir = os.path.dirname(expected_file_path) else: # 1.10+ works correctly: path is relative to sandbox volume_dir = "shared-volume/" sdk_cmd.service_task_exec( config.SERVICE_NAME, task_names[0], "rm " + " ".join([os.path.join(volume_dir, name) for name in filenames]), )
def add_package_registry_stub() -> Dict: # TODO Remove this method, install from bootstrap registry. if PACKAGE_REGISTRY_STUB_URL not in os.environ: raise Exception( '{} is not found in env.'.format(PACKAGE_REGISTRY_STUB_URL)) stub_url = os.environ[PACKAGE_REGISTRY_STUB_URL] with urllib.request.urlopen(stub_url) as url: repo = json.loads(url.read().decode()) min_supported = [ x for x in repo['packages'] if x['name'] == PACKAGE_REGISTRY_NAME ][0]['minDcosReleaseVersion'] if sdk_utils.dcos_version_less_than(min_supported): raise Exception( 'Min DC/OS {} required for package registry'.format(min_supported)) return sdk_repository.add_stub_universe_urls([stub_url])
def pytest_runtest_setup(item): '''Hook to run before every test.''' # Inject header at start of test, following automatic "path/to/test_file.py::test_name": # Don't do this when running in teamcity, where it's redundant. if not teamcity.is_running_under_teamcity(): print(''' ========== ======= START: {}::{} =========='''.format(sdk_utils.get_test_suite_name(item), item.name)) # Check if we're entering a new test suite. global testlogs_test_index global testlogs_current_test_suite test_suite = sdk_utils.get_test_suite_name(item) if test_suite != testlogs_current_test_suite: # New test suite: # 1 Store all the task ids which already exist as of this point. testlogs_current_test_suite = test_suite global testlogs_ignored_task_ids testlogs_ignored_task_ids = testlogs_ignored_task_ids.union( get_task_ids()) log.info( 'Entering new test suite {}: {} preexisting tasks will be ignored on test failure.' .format(test_suite, len(testlogs_ignored_task_ids))) # 2 Reset the test index. testlogs_test_index = 0 # 3 Remove any prior logs for the test suite. test_log_dir = sdk_utils.get_test_suite_log_directory(item) if os.path.exists(test_log_dir): log.info( 'Deleting existing test suite logs: {}/'.format(test_log_dir)) shutil.rmtree(test_log_dir) # Increment the test index (to 1, if this is a new suite), and pass the value to sdk_utils for use internally. testlogs_test_index += 1 sdk_utils.set_test_index(testlogs_test_index) min_version_mark = item.get_marker('dcos_min_version') if min_version_mark: min_version = min_version_mark.args[0] message = 'Feature only supported in DC/OS {} and up'.format( min_version) if 'reason' in min_version_mark.kwargs: message += ': {}'.format(min_version_mark.kwargs['reason']) if sdk_utils.dcos_version_less_than(min_version): pytest.skip(message)
def _task_exec(task_id_prefix: str, cmd: str, return_stderr_in_stdout: bool = False) -> tuple: if cmd.startswith("./") and sdk_utils.dcos_version_less_than("1.10"): # On 1.9 task exec is run relative to the host filesystem, not the container filesystem full_cmd = os.path.join(get_task_sandbox_path(task_id_prefix), cmd) if cmd.startswith("./bootstrap"): # On 1.9 we also need to set LIB_PROCESS_IP for bootstrap full_cmd = "bash -c \"LIBPROCESS_IP=0.0.0.0 {}\"".format(full_cmd) else: full_cmd = cmd rc, stdout, stderr = run_raw_cli("task exec {} {}".format(task_id_prefix, cmd)) if return_stderr_in_stdout: return rc, stdout + "\n" + stderr return rc, stdout, stderr
def _escape_placement_for_1_9(options: dict) -> dict: # 1.9 requires `\"` to be escped to `\\\"` # when submitting placement constraints log.info(options) if not sdk_utils.dcos_version_less_than("1.10"): log.info("DC/OS version > 1.10") return options def escape_section_placement(section: str, options: dict) -> dict: if section in options and "placement" in options[section]: options[section]["placement"] = options[section][ "placement"].replace("\"", "\\\"") log.info("Escaping %s", section) log.info(options) return options return escape_section_placement("hello", escape_section_placement("world", options))
def _upgrade_or_downgrade(package_name, to_package_version, service_name, running_task_count, additional_options, timeout_seconds, wait_for_deployment): task_ids = sdk_tasks.get_task_ids(service_name, '') if sdk_utils.dcos_version_less_than( "1.10") or shakedown.ee_version() is None: log.info('Using marathon upgrade flow to upgrade {} {}'.format( package_name, to_package_version)) sdk_marathon.destroy_app(service_name) sdk_install.install(package_name, service_name, running_task_count, additional_options=additional_options, package_version=to_package_version, timeout_seconds=timeout_seconds, wait_for_deployment=wait_for_deployment) else: log.info('Using CLI upgrade flow to upgrade {} {}'.format( package_name, to_package_version)) if additional_options: with tempfile.NamedTemporaryFile() as opts_f: opts_f.write(json.dumps(additional_options).encode('utf-8')) opts_f.flush( ) # ensure json content is available for the CLI to read below sdk_cmd.svc_cli( package_name, service_name, 'update start --package-version={} --options={}'.format( to_package_version, opts_f.name)) else: sdk_cmd.svc_cli( package_name, service_name, 'update start --package-version={}'.format(to_package_version)) if wait_for_deployment: log.info('Checking that all tasks have restarted') sdk_tasks.check_tasks_updated(service_name, '', task_ids) # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected # total task count via FINISHED tasks, without actually completing deployment log.info("Waiting for {}/{} to finish deployment plan...".format( package_name, service_name)) sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds)
def wait_for_toxic_sidecar(): """ Since the sidecar task fails too quickly, we check for the contents of the file generated in hello-container-path/toxic-output instead Note that we only check the output of hello-0. In DC/OS prior to version 1.10, task exec does not run the command in the MESOS_SANDBOX directory and this causes the check of the file contents to fail. Here we simply rely on the existence of the file. """ if sdk_utils.dcos_version_less_than("1.10"): cmd = "task ls hello-0-server hello-container-path/toxic-output" expected_output = "" else: cmd = "task exec hello-0-server cat hello-container-path/toxic-output" expected_output = "I'm addicted to you / Don't you know that you're toxic?" output = sdk_cmd.run_cli(cmd).strip() logging.info("Checking for toxic output returned: %s", output) return output == expected_output
def wait_for_toxic_sidecar(): """ Since the sidecar task fails too quickly, we check for the contents of the file generated in hello-container-path/toxic-output instead Note that we only check the output of hello-0. In DC/OS prior to version 1.10, task exec does not run the command in the MESOS_SANDBOX directory and this causes the check of the file contents to fail. Here we simply rely on the existence of the file. """ if sdk_utils.dcos_version_less_than("1.10"): # Note: As of this writing, 'task ls' does 'contains' comparisons of task ids correctly, # so we don't need to include a service name prefix here. _, output, _ = sdk_cmd.run_cli("task ls hello-0-server hello-container-path/toxic-output") expected_output = "" else: _, output, _ = sdk_cmd.service_task_exec( config.SERVICE_NAME, "hello-0-server", "cat hello-container-path/toxic-output" ) expected_output = "I'm addicted to you / Don't you know that you're toxic?" return output.strip() == expected_output
def setup_service_account(service_name: str, service_account_secret: str=None) -> dict: """ Setup the service account for TLS. If the account or secret of the specified name already exists, these are deleted. """ if sdk_utils.is_open_dcos(): log.error("The setup of a service account requires DC/OS EE. service_name=%s", service_name) raise Exception("The setup of a service account requires DC/OS EE") name = service_name secret = name if service_account_secret is None else service_account_secret service_account_info = sdk_security.setup_security(service_name, service_account=name, service_account_secret=secret) log.info("Adding permissions required for TLS.") if sdk_utils.dcos_version_less_than("1.11"): sdk_cmd.run_cli("security org groups add_user superusers {name}".format(name=name)) else: acls = [ {"rid": "dcos:secrets:default:/{}/*".format(service_name), "action": "full"}, {"rid": "dcos:secrets:list:default:/{}".format(service_name), "action": "read"}, {"rid": "dcos:adminrouter:ops:ca:rw", "action": "full"}, {"rid": "dcos:adminrouter:ops:ca:ro", "action": "full"}, ] for acl in acls: cmd_list = ["security", "org", "users", "grant", "--description", "\"Allow provisioning TLS certificates\"", name, acl["rid"], acl["action"] ] sdk_cmd.run_cli(" ".join(cmd_list)) return service_account_info
def verify_shared_executor(pod_name, expected_files=['essential', 'nonessential'], delete_files=True): '''verify that both tasks share the same executor: - matching ExecutorInfo - both 'essential' and 'nonessential' present in shared-volume/ across both tasks ''' tasks = sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'pod info {}'.format(pod_name), json=True) assert len(tasks) == 2 # check that the task executors all match executor = tasks[0]['info']['executor'] for task in tasks[1:]: assert executor == task['info']['executor'] # for each task, check shared volume content matches what's expected task_names = [task['info']['name'] for task in tasks] for task_name in task_names: # 1.9 just uses the host filesystem in 'task exec', so use 'task ls' across the board instead filenames = sdk_cmd.run_cli('task ls {} shared-volume/'.format(task_name)).strip().split() assert set(expected_files) == set(filenames) # delete files from volume in preparation for a following task relaunch if delete_files: if sdk_utils.dcos_version_less_than("1.10"): # 1.9 just uses the host filesystem in 'task exec', so figure out the absolute volume path manually expected_file_path = sdk_cmd.service_task_exec( config.SERVICE_NAME, task_names[0], 'find /var/lib/mesos/slave/volumes -iname ' + filenames[0])[1].strip() # volume dir is parent of the expected file path. volume_dir = os.path.dirname(expected_file_path) else: # 1.10+ works correctly: path is relative to sandbox volume_dir = 'shared-volume/' sdk_cmd.service_task_exec( config.SERVICE_NAME, task_names[0], 'rm ' + ' '.join([os.path.join(volume_dir, name) for name in filenames]))
import sdk_cmd import sdk_install import sdk_jobs import sdk_plan import sdk_recovery import sdk_utils from security import transport_encryption from tests import config pytestmark = [ sdk_utils.dcos_ee_only, pytest.mark.skipif( sdk_utils.dcos_version_less_than("1.10"), reason="TLS tests require DC/OS 1.10+" ), ] @pytest.fixture(scope="module") def dcos_ca_bundle() -> str: """ Retrieve DC/OS CA bundle and returns the content. """ return transport_encryption.fetch_dcos_ca_bundle_contents().decode("ascii") @pytest.fixture(scope="module") def service_account(configure_security: None) -> Iterator[Dict[str, Any]]: """
@pytest.mark.recovery def test_master_killed(): sdk_cmd.kill_task_with_pattern('mesos-master') config.check_running() @pytest.mark.recovery def test_zk_killed(): sdk_cmd.kill_task_with_pattern('zookeeper') config.check_running() @pytest.mark.recovery @pytest.mark.skipif(sdk_utils.dcos_version_less_than("1.10"), reason="BLOCKED-INFINITY-3203: Skipping recovery tests on 1.9") def test_config_update_then_kill_task_in_node(): # kill 1 of 2 world tasks world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world') config.bump_world_cpus() sdk_cmd.kill_task_with_pattern('world', 'world-0-server.{}.mesos'.format(config.SERVICE_NAME)) sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids) config.check_running() @pytest.mark.recovery @pytest.mark.skipif(sdk_utils.dcos_version_less_than("1.10"), reason="BLOCKED-INFINITY-3203: Skipping recovery tests on 1.9") def test_config_update_then_kill_all_task_in_node(): # kill both world tasks
import sdk_tasks import sdk_utils from security import kerberos as krb5 from security import transport_encryption from tests import auth from tests import config log = logging.getLogger(__name__) foldered_name = config.FOLDERED_SERVICE_NAME pytestmark = [ sdk_utils.dcos_ee_only, pytest.mark.skipif( sdk_utils.dcos_version_less_than("1.10"), reason="Kerberos tests require DC/OS 1.10 or higher", ), ] @pytest.fixture(scope="module", autouse=True) def service_account(configure_security): """ Sets up a service account for use with TLS. """ try: service_account_info = transport_encryption.setup_service_account( foldered_name) yield service_account_info finally:
def get_foldered_dns_name(service_name: str) -> str: if sdk_utils.dcos_version_less_than("1.10"): return service_name return sdk_utils.get_foldered_name(service_name).replace("/", "")
import pytest import retrying import shakedown import sdk_install import sdk_utils import sdk_marathon import sdk_cmd from tests import config pytestmark = pytest.mark.skipif( sdk_utils.is_strict_mode() and sdk_utils.dcos_version_less_than('1.11'), reason="secure hierarchical roles are only supported on 1.11+") pre_reserved_options = { "service": { "yaml": "pre-reserved" } } @pytest.fixture(scope='module', autouse=True) def configure_package(configure_security): try: sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, config.DEFAULT_TASK_COUNT, additional_options=pre_reserved_options)
} dispatcher_zk = get_zk_dispatcher(service_name) try: utils.upload_dcos_test_jar() utils.require_spark(service_name=service_name, additional_options=options, zk=dispatcher_zk) yield finally: utils.teardown_spark(service_name=service_name, zk=dispatcher_zk) @pytest.mark.skipif( sdk_utils.dcos_version_less_than('1.14'), reason="Group role enforcement is available only in DCOS v1.14 and higher") @pytest.mark.sanity @pytest.mark.parametrize('group,enforce_group,service_name,role,enforce_role', [("dev", True, "/dev/spark", "spark_role", False), ("dev", True, "/dev/spark", "spark_role", True), ("dev", True, "/dev/spark", "dev", False), ("dev", True, "/dev/spark", "dev", True)]) def test_marathon_group_enforced(create_group, setup_spark, group, enforce_group, service_name, role, enforce_role): log.info( "Running test: group='{}', enforce_group='{}', service_name='{}', role='{}', enforce_role='{}'" .format(group, enforce_group, service_name, role, enforce_role)) dispatcher_framework = dcos_utils.get_framework_json(service_name,
def _uninstall( package_name, service_name, role=None, service_account=None, zk=None): start = time.time() if sdk_utils.dcos_version_less_than("1.10"): log.info('Uninstalling/janitoring {}'.format(service_name)) try: shakedown.uninstall_package_and_wait( package_name, service_name=service_name) except (dcos.errors.DCOSException, ValueError) as e: log.info('Got exception when uninstalling package, ' + 'continuing with janitor anyway: {}'.format(e)) if 'marathon' in str(e): log.info('Detected a probable marathon flake. Raising so retry will trigger.') raise janitor_start = time.time() # leading slash removed, other slashes converted to double underscores: deslashed_service_name = service_name.lstrip('/').replace('/', '__') if role is None: role = deslashed_service_name + '-role' if service_account is None: service_account = service_name + '-principal' if zk is None: zk = 'dcos-service-' + deslashed_service_name janitor_cmd = ('docker run mesosphere/janitor /janitor.py ' '-r {role} -p {service_account} -z {zk} --auth_token={auth}') shakedown.run_command_on_master( janitor_cmd.format( role=role, service_account=service_account, zk=zk, auth=sdk_cmd.run_cli('config show core.dcos_acs_token', print_output=False).strip())) finish = time.time() log.info( 'Uninstall done after pkg({}) + janitor({}) = total({})'.format( shakedown.pretty_duration(janitor_start - start), shakedown.pretty_duration(finish - janitor_start), shakedown.pretty_duration(finish - start))) else: log.info('Uninstalling {}'.format(service_name)) try: shakedown.uninstall_package_and_wait( package_name, service_name=service_name) # service_name may already contain a leading slash: marathon_app_id = '/' + service_name.lstrip('/') log.info('Waiting for no deployments for {}'.format(marathon_app_id)) shakedown.deployment_wait(TIMEOUT_SECONDS, marathon_app_id) # wait for service to be gone according to marathon def marathon_dropped_service(): client = shakedown.marathon.create_client() app_list = client.get_apps() app_ids = [app['id'] for app in app_list] log.info('Marathon apps: {}'.format(app_ids)) matching_app_ids = [ app_id for app_id in app_ids if app_id == marathon_app_id ] if len(matching_app_ids) > 1: log.info('Found multiple apps with id {}'.format( marathon_app_id)) return len(matching_app_ids) == 0 log.info('Waiting for no {} Marathon app'.format(marathon_app_id)) shakedown.time_wait(marathon_dropped_service, timeout_seconds=TIMEOUT_SECONDS) except (dcos.errors.DCOSException, ValueError) as e: log.info( 'Got exception when uninstalling package: {}'.format(e)) if 'marathon' in str(e): log.info('Detected a probable marathon flake. Raising so retry will trigger.') raise finally: sdk_utils.list_reserved_resources()
def get_foldered_dns_name(service_name): if sdk_utils.dcos_version_less_than('1.10'): return service_name return sdk_utils.get_foldered_name(service_name).replace("/", "")
import sdk_utils from security import kerberos as krb5 from security import transport_encryption from tests import auth from tests import config log = logging.getLogger(__name__) pytestmark = [ sdk_utils.dcos_ee_only, pytest.mark.skipif( sdk_utils.dcos_version_less_than("1.10"), reason="TLS tests require DC/OS 1.10+" ), ] @pytest.fixture(scope="module", autouse=True) def service_account(configure_security): """ Sets up a service account for use with TLS. """ try: service_account_info = transport_encryption.setup_service_account(config.SERVICE_NAME) yield service_account_info finally: transport_encryption.cleanup_service_account(config.SERVICE_NAME, service_account_info)
import logging import pytest import retrying import sdk_cmd import sdk_install import sdk_marathon import sdk_plan import sdk_utils from tests import config log = logging.getLogger(__name__) pytestmark = pytest.mark.skipif( sdk_utils.is_strict_mode() and sdk_utils.dcos_version_less_than('1.11'), reason="secure hierarchical roles are only supported on 1.11+") @pytest.fixture(scope='module', autouse=True) def configure_package(configure_security): try: sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) options = {"service": {"yaml": "pre-reserved-sidecar"}} # this yml has 1 hello's + 0 world's: sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, 1, additional_options=options)
from security import transport_encryption from tests import auth from tests import config from tests import test_utils log = logging.getLogger(__name__) pytestmark = [ pytest.mark.skip( reason="INFINTY-INFINITY-3367: Address issues in Kafka security toggle" ), pytest.mark.skipif(sdk_utils.is_open_dcos(), reason="Security tests require DC/OS EE"), pytest.mark.skipif(sdk_utils.dcos_version_less_than("1.10"), reason="Security tests require DC/OS 1.10+"), ] MESSAGES = [] @pytest.fixture(scope='module', autouse=True) def service_account(configure_security): """ Sets up a service account for use with TLS. """ try: name = config.SERVICE_NAME service_account_info = transport_encryption.setup_service_account(name)