def test_history_kdc_config(hdfs_with_kerberos): history_service_with_kdc_config = "spark-history-with-kdc-config" try: # This deployment will fail if kerberos is not configured properly. shakedown.install_package( package_name=utils.HISTORY_PACKAGE_NAME, options_json={ "service": { "name": history_service_with_kdc_config, "user": SPARK_HISTORY_USER, "hdfs-config-url": "http://api.{}.marathon.l4lb.thisdcos.directory/v1/endpoints" .format(HDFS_SERVICE_NAME) }, "security": { "kerberos": { "enabled": True, "kdc": { "hostname": hdfs_with_kerberos.get_host(), "port": int(hdfs_with_kerberos.get_port()) }, "realm": sdk_auth.REALM, "principal": GENERIC_HDFS_USER_PRINCIPAL, "keytab": KEYTAB_SECRET_PATH } } }, wait_for_completion=True, # wait for it to become healthy timeout_sec=240) finally: sdk_marathon.destroy_app(history_service_with_kdc_config)
def kafka_client(kerberos): """ A pytest fixture to install a Kafka client as a Marathon application. This client is capable of both Kerberos and TLS communication. On teardown, the client is uninstalled. """ try: client_id = "kafka-client" client = { "id": client_id, "mem": 512, "container": { "type": "MESOS", "docker": { "image": "elezar/kafka-client:latest", "forcePullImage": True }, "volumes": [ { "containerPath": "/tmp/kafkaconfig/kafka-client.keytab", "secret": "kafka_keytab" } ] }, "secrets": { "kafka_keytab": { "source": kerberos.get_keytab_path(), } }, "networks": [ { "mode": "host" } ], "env": { "JVM_MaxHeapSize": "512", "KAFKA_CLIENT_MODE": "test", "KAFKA_TOPIC": "securetest", "KAFKA_BROKER_LIST": "" } } sdk_marathon.install_app(client) # Create a TLS certificate for the TLS tests transport_encryption.create_tls_artifacts( cn="client", marathon_task=client_id) yield { **client, **{ "tls-id": "client", } } finally: sdk_marathon.destroy_app(client_id)
def test_upgrade_downgrade(): # Ensure both Universe and the test repo exist. # In particular, the Framework Test Suite only runs packages from Universe; # it doesn't add a test repo like the PR jobs. if len(shakedown.get_package_repos()['repositories']) != 2: print('No test repo found. Skipping test_upgrade_downgrade') return test_repo_name, test_repo_url = get_test_repo_info() test_version = get_pkg_version() print('Found test version: {}'.format(test_version)) remove_repo(test_repo_name, test_version) master_version = get_pkg_version() print('Found master version: {}'.format(master_version)) print('Installing master version') install.install(PACKAGE_NAME, DEFAULT_TASK_COUNT, package_version=master_version) print('Upgrading to test version') marathon.destroy_app(PACKAGE_NAME) add_repo(test_repo_name, test_repo_url, prev_version=master_version) install.install(PACKAGE_NAME, DEFAULT_TASK_COUNT, package_version=test_version) print('Downgrading to master version') marathon.destroy_app(PACKAGE_NAME) install.install(PACKAGE_NAME, DEFAULT_TASK_COUNT, package_version=master_version)
def test_user_can_write_and_read(kerberos): try: client_app_def = config.get_kerberized_hdfs_client_app() client_app_def["secrets"]["hdfs_keytab"][ "source"] = kerberos.get_keytab_path() client_app_def["env"]["REALM"] = kerberos.get_realm() client_app_def["env"]["KDC_ADDRESS"] = kerberos.get_kdc_address() sdk_marathon.install_app(client_app_def) client_task_id = client_app_def["id"] sdk_auth.kinit(client_task_id, keytab=config.KEYTAB, principal=config.GENERIC_HDFS_USER_PRINCIPAL) write_cmd = "/bin/bash -c '{}'".format( config.hdfs_write_command(config.TEST_FILE_1_NAME, config.TEST_CONTENT_SMALL)) sdk_tasks.task_exec(client_task_id, write_cmd) read_cmd = "/bin/bash -c '{}'".format( config.hdfs_read_command(config.TEST_FILE_1_NAME)) _, stdout, _ = sdk_tasks.task_exec(client_task_id, read_cmd) assert stdout == config.TEST_CONTENT_SMALL finally: sdk_marathon.destroy_app(client_task_id)
def install(self) -> Dict[str, Any]: if sdk_marathon.app_exists(self.app_definition["id"]): if self._persist: log.info("Found installed KDC app, reusing it") return _get_kdc_task(self.app_definition["id"]) log.info("Found installed KDC app, destroying it first") sdk_marathon.destroy_app(self.app_definition["id"]) # (re-)create a service account for the KDC service sdk_security.create_service_account( service_account_name=KDC_SERVICE_ACCOUNT, service_account_secret=KDC_SERVICE_ACCOUNT_SECRET, ) sdk_security._grant( KDC_SERVICE_ACCOUNT, "dcos:secrets:default:%252F*", "Create any secret in the root path", "create", ) sdk_security._grant( KDC_SERVICE_ACCOUNT, "dcos:secrets:default:%252F*", "Update any secret in the root path", "update", ) log.info("Installing KDC Marathon app") sdk_marathon.install_app(self.app_definition) log.info("KDC app installed successfully") log.info("Waiting for KDC web API endpoint to become available") self.__wait_for_kdc_api() log.info("KDC web API is now available") return _get_kdc_task(self.app_definition["id"])
def test_upgrade(): test_version = upgrade.get_pkg_version(PACKAGE_NAME) utils.out('Found test version: {}'.format(test_version)) repositories = json.loads( cmd.run_cli('package repo list --json'))['repositories'] utils.out("Repositories: " + str(repositories)) if len(repositories) < 2: utils.out( "There is only one version in the repository. Skipping upgrade test!" ) assert repo[0]['name'] == 'Universe' return test_repo_name, test_repo_url = upgrade.get_test_repo_info() for repo in repositories: if repo['name'] != 'Universe': shakedown.remove_package_repo(repo['name']) universe_version = upgrade.get_pkg_version(PACKAGE_NAME) utils.out('Found Universe version: {}'.format(universe_version)) utils.out('Installing Universe version: {}'.format(universe_version)) install.install(PACKAGE_NAME, DEFAULT_BROKER_COUNT) utils.out('Installation complete for Universe version: {}'.format( universe_version)) tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT) broker_ids = tasks.get_task_ids(SERVICE_NAME, 'broker-') utils.out( 'Adding test version to repository with name: {} and url: {}'.format( test_repo_name, test_repo_url)) upgrade.add_repo(test_repo_name, test_repo_url, universe_version, 0, PACKAGE_NAME) utils.out('Upgrading to test version: {}'.format(test_version)) marathon.destroy_app(SERVICE_NAME) utils.out('Installing test version: {}'.format(test_version)) # installation will return with old tasks because they are still running install.install(PACKAGE_NAME, DEFAULT_BROKER_COUNT) utils.out( 'Installation complete for test version: {}'.format(test_version)) # wait till tasks are restarted tasks.check_tasks_updated(SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE), broker_ids) utils.out('All task are restarted') # all tasks are running tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT) address = service_cli('endpoints {}'.format(DEFAULT_TASK_NAME)) assert len(address) == 3 assert len(address['dns']) == DEFAULT_BROKER_COUNT assert len(address['address']) == DEFAULT_BROKER_COUNT
def update_or_upgrade_or_downgrade( package_name: str, service_name: str, to_version: Optional[str], to_options: Dict[str, Any], expected_running_tasks: int, wait_for_deployment: bool = True, timeout_seconds: int = TIMEOUT_SECONDS, ) -> bool: initial_config = get_config(package_name, service_name) task_ids = sdk_tasks.get_task_ids(service_name, "") if (to_version and not is_cli_supports_service_version_upgrade()) or ( to_options and not is_cli_supports_service_options_update() ): log.info("Using marathon flow to upgrade %s to version [%s]", service_name, to_version) sdk_marathon.destroy_app(service_name) sdk_install.install( package_name, service_name, expected_running_tasks, additional_options=to_options, package_version=to_version, timeout_seconds=timeout_seconds, wait_for_deployment=wait_for_deployment, ) else: _update_service_with_cli(package_name, service_name, to_version, to_options) if wait_for_deployment: _wait_for_deployment(package_name, service_name, initial_config, task_ids, timeout_seconds) return not wait_for_deployment
def _upgrade_or_downgrade(from_package_name, to_package_name, to_package_version, service_name, running_task_count, additional_options, timeout_seconds): task_ids = tasks.get_task_ids(service_name, '') if shakedown.dcos_version_less_than("1.10") or shakedown.ee_version( ) is None or from_package_name != to_package_name: log.info('Using marathon upgrade flow to upgrade {} => {} {}'.format( from_package_name, to_package_name, to_package_version)) marathon.destroy_app(service_name) install.install(to_package_name, running_task_count, service_name=service_name, additional_options=additional_options, timeout_seconds=timeout_seconds, package_version=to_package_version) else: log.info('Using CLI upgrade flow to upgrade {} => {} {}'.format( from_package_name, to_package_name, to_package_version)) if additional_options: with tempfile.NamedTemporaryFile() as opts_f: opts_f.write(json.dumps(additional_options).encode('utf-8')) opts_f.flush() # ensure json content is available for the CLI cmd.run_cli( '{} --name={} update start --package-version={} --options={}' .format(to_package_name, service_name, to_package_version, opts_f.name)) else: cmd.run_cli( '{} --name={} update start --package-version={}'.format( to_package_name, service_name, to_package_version)) log.info('Checking that all tasks have restarted') tasks.check_tasks_updated(service_name, '', task_ids)
def test_upgrade_downgrade(): # Ensure both Universe and the test repo exist. if len(package.get_repo_list()) != 2: print('No test repo found. Skipping test_upgrade_downgrade') return test_version = package.get_pkg_version() print('Found test version: {}'.format(test_version)) package.add_repo(MASTER_CUSTOM_NAME, MASTER_CUSTOM_URL, prev_version=test_version) master_version = package.get_pkg_version() print('Found master version: {}'.format(master_version)) print('Installing master version') install.install(PACKAGE_NAME, DEFAULT_TASK_COUNT, package_version=master_version) write_some_data("data-0-node.hdfs.mesos", TEST_FILE_NAME) # gives chance for write to succeed and replication to occur time.sleep(5) print('Upgrading to test version') marathon.destroy_app(PACKAGE_NAME) install.install(PACKAGE_NAME, DEFAULT_TASK_COUNT, package_version=test_version) read_some_data("data-0-node.hdfs.mesos", TEST_FILE_NAME) print('Downgrading to master version') marathon.destroy_app(PACKAGE_NAME) install.install(PACKAGE_NAME, DEFAULT_TASK_COUNT, package_version=master_version) read_some_data("data-0-node.hdfs.mesos", TEST_FILE_NAME) # clean up package.remove_repo(MASTER_CUSTOM_NAME, PACKAGE_NAME, master_version)
def update_or_upgrade_or_downgrade( package_name: str, service_name: str, to_version: Optional[str], to_options: Dict[str, Any], expected_running_tasks: int, wait_for_deployment: bool = True, timeout_seconds: int = TIMEOUT_SECONDS, ) -> None: initial_config = get_config(package_name, service_name) task_ids = sdk_tasks.get_task_ids(service_name, "") if (to_version and not is_cli_supports_service_version_upgrade()) or ( to_options and not is_cli_supports_service_options_update()): log.info("Using marathon flow to upgrade %s to version [%s]", service_name, to_version) sdk_marathon.destroy_app(service_name) sdk_install.install( package_name, service_name, expected_running_tasks, additional_options=to_options, package_version=to_version, timeout_seconds=timeout_seconds, wait_for_deployment=wait_for_deployment, ) else: _update_service_with_cli(package_name, service_name, to_version, to_options) if wait_for_deployment: _wait_for_deployment(package_name, service_name, initial_config, task_ids, timeout_seconds)
def setup_history_server(hdfs_with_kerberos, setup_hdfs_client, configure_universe): try: sdk_auth.kinit(HDFS_CLIENT_ID, keytab="hdfs.keytab", principal=GENERIC_HDFS_USER_PRINCIPAL) hdfs_cmd("mkdir /history") hdfs_cmd("chmod 1777 /history") shakedown.install_package( package_name=utils.HISTORY_PACKAGE_NAME, options_json={ "service": { "user": SPARK_HISTORY_USER, "hdfs-config-url": "http://api.{}.marathon.l4lb.thisdcos.directory/v1/endpoints" .format(HDFS_SERVICE_NAME) }, "security": { "kerberos": { "enabled": True, "krb5conf": utils.HDFS_KRB5_CONF, "principal": GENERIC_HDFS_USER_PRINCIPAL, "keytab": KEYTAB_SECRET_PATH } } }, wait_for_completion=True # wait for it to become healthy ) yield finally: sdk_marathon.destroy_app(utils.HISTORY_SERVICE_NAME)
def _upgrade_or_downgrade(package_name, to_package_version, service_name, running_task_count, additional_options, timeout_seconds, wait_for_deployment): initial_config = get_config(package_name, service_name) task_ids = sdk_tasks.get_task_ids(service_name, '') if sdk_utils.dcos_version_less_than( "1.10") or shakedown.ee_version() is None: log.info('Using marathon upgrade flow to upgrade {} {}'.format( package_name, to_package_version)) sdk_marathon.destroy_app(service_name) sdk_install.install(package_name, service_name, running_task_count, additional_options=additional_options, package_version=to_package_version, timeout_seconds=timeout_seconds, wait_for_deployment=wait_for_deployment) else: log.info('Using CLI upgrade flow to upgrade {} {}'.format( package_name, to_package_version)) if additional_options: with tempfile.NamedTemporaryFile() as opts_f: opts_f.write(json.dumps(additional_options).encode('utf-8')) opts_f.flush( ) # ensure json content is available for the CLI to read below sdk_cmd.svc_cli( package_name, service_name, 'update start --package-version={} --options={}'.format( to_package_version, opts_f.name)) else: sdk_cmd.svc_cli( package_name, service_name, 'update start --package-version={}'.format(to_package_version)) # we must manually upgrade the package CLI because it's not done automatically in this flow # (and why should it? that'd imply the package CLI replacing itself via a call to the main CLI...) sdk_cmd.run_cli( 'package install --yes --cli --package-version={} {}'.format( to_package_version, package_name)) if wait_for_deployment: updated_config = get_config(package_name, service_name) if updated_config == initial_config: log.info( 'No config change detected. Tasks should not be restarted') sdk_tasks.check_tasks_not_updated(service_name, '', task_ids) else: log.info('Checking that all tasks have restarted') sdk_tasks.check_tasks_updated(service_name, '', task_ids) # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected # total task count via ONCE tasks, without actually completing deployment log.info( "Waiting for package={} service={} to finish deployment plan...". format(package_name, service_name)) sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds)
def hdfs_client(): try: client = config.get_hdfs_client_app(config.SERVICE_NAME) sdk_marathon.install_app(client) yield client finally: sdk_marathon.destroy_app(client["id"])
def hdfs_client(kerberos, hdfs_server): try: client = config.get_hdfs_client_app(hdfs_server["service"]["name"], kerberos) sdk_marathon.install_app(client) krb5.write_krb5_config_file(client["id"], "/etc/krb5.conf", kerberos) yield client finally: sdk_marathon.destroy_app(client["id"])
def hdfs_client(hdfs_service): try: client = config.get_hdfs_client_app(hdfs_service["service"]["name"]) sdk_marathon.install_app(client) yield client finally: sdk_marathon.destroy_app(client["id"])
def upgrade_or_downgrade(package_name, running_task_count): task_ids = tasks.get_task_ids(package_name, '') marathon.destroy_app(package_name) install.install(package_name, running_task_count) print('Waiting for upgrade / downgrade deployment to complete') spin.time_wait_noisy(lambda: ( plan.get_deployment_plan(package_name).json()['status'] == 'COMPLETE')) print('Checking that all tasks have restarted') tasks.check_tasks_updated(package_name, '', task_ids)
def kafka_client(kerberos, kafka_server): brokers = sdk_cmd.svc_cli( kafka_server["package_name"], kafka_server["service"]["name"], "endpoint broker-tls", json=True)["dns"] try: client_id = "kafka-client" client = { "id": client_id, "mem": 512, "user": "******", "container": { "type": "MESOS", "docker": { "image": "elezar/kafka-client:latest", "forcePullImage": True }, "volumes": [ { "containerPath": "/tmp/kafkaconfig/kafka-client.keytab", "secret": "kafka_keytab" } ] }, "secrets": { "kafka_keytab": { "source": kerberos.get_keytab_path(), } }, "networks": [ { "mode": "host" } ], "env": { "JVM_MaxHeapSize": "512", "KAFKA_CLIENT_MODE": "test", "KAFKA_TOPIC": "securetest", "KAFKA_BROKER_LIST": ",".join(brokers) } } sdk_marathon.install_app(client) transport_encryption.create_tls_artifacts( cn="client", task=client_id) broker_hosts = list(map(lambda x: x.split(':')[0], brokers)) yield {**client, **{"brokers": broker_hosts}} finally: sdk_marathon.destroy_app(client_id)
def kafka_client(kerberos, kafka_server): brokers = sdk_cmd.svc_cli( kafka_server["package_name"], kafka_server["service"]["name"], "endpoint broker-tls", json=True)["dns"] try: client_id = "kafka-client" client = { "id": client_id, "mem": 512, "user": "******", "container": { "type": "MESOS", "docker": { "image": "elezar/kafka-client:latest", "forcePullImage": True }, "volumes": [ { "containerPath": "/tmp/kafkaconfig/kafka-client.keytab", "secret": "kafka_keytab" } ] }, "secrets": { "kafka_keytab": { "source": kerberos.get_keytab_path(), } }, "networks": [ { "mode": "host" } ], "env": { "JVM_MaxHeapSize": "512", "KAFKA_CLIENT_MODE": "test", "KAFKA_TOPIC": "securetest", "KAFKA_BROKER_LIST": ",".join(brokers) } } sdk_marathon.install_app(client) transport_encryption.create_tls_artifacts( cn="client", marathon_task=client_id) broker_hosts = list(map(lambda x: x.split(':')[0], brokers)) yield {**client, **{"brokers": broker_hosts}} finally: sdk_marathon.destroy_app(client_id)
def cleanup(self): log.info("Removing the marathon KDC app") sdk_marathon.destroy_app(KERBEROS_APP_ID) log.info("Deleting temporary working directory") self.temp_working_dir.cleanup() #TODO: separate secrets handling into another module log.info("Deleting keytab secret") sdk_security.delete_secret(self.keytab_secret_path)
def kafka_client(kerberos): """ A pytest fixture to install a Kafka client as a Marathon application. This client is capable of both Kerberos and TLS communication. On teardown, the client is uninstalled. """ try: client_id = "kafka-client" client = { "id": client_id, "mem": 512, "container": { "type": "MESOS", "docker": { "image": "elezar/kafka-client:4b9c060", "forcePullImage": True }, "volumes": [{ "containerPath": "/tmp/kafkaconfig/kafka-client.keytab", "secret": "kafka_keytab" }] }, "secrets": { "kafka_keytab": { "source": kerberos.get_keytab_path(), } }, "networks": [{ "mode": "host" }], "env": { "JVM_MaxHeapSize": "512", "KAFKA_CLIENT_MODE": "test", "KAFKA_TOPIC": "securetest", "KAFKA_BROKER_LIST": "" } } sdk_marathon.install_app(client) # Create a TLS certificate for the TLS tests transport_encryption.create_tls_artifacts(cn="client", marathon_task=client_id) yield { **client, **{ "tls-id": "client", } } finally: sdk_marathon.destroy_app(client_id)
def _upgrade_or_downgrade(package_name, to_package_version, service_name, running_task_count, additional_options, timeout_seconds, wait_for_deployment): task_ids = sdk_tasks.get_task_ids(service_name, '') if shakedown.dcos_version_less_than( "1.10") or shakedown.ee_version() is None: log.info('Using marathon upgrade flow to upgrade {} {}'.format( package_name, to_package_version)) sdk_marathon.destroy_app(service_name) sdk_install.install(package_name, service_name, running_task_count, additional_options=additional_options, package_version=to_package_version, timeout_seconds=timeout_seconds, wait_for_deployment=wait_for_deployment) else: log.info('Using CLI upgrade flow to upgrade {} {}'.format( package_name, to_package_version)) if additional_options: with tempfile.NamedTemporaryFile() as opts_f: opts_f.write(json.dumps(additional_options).encode('utf-8')) opts_f.flush( ) # ensure json content is available for the CLI to read below sdk_cmd.svc_cli( package_name, service_name, 'update start --package-version={} --options={}'.format( to_package_version, opts_f.name)) else: sdk_cmd.svc_cli( package_name, service_name, 'update start --package-version={}'.format(to_package_version)) if wait_for_deployment: log.info('Checking that all tasks have restarted') sdk_tasks.check_tasks_updated(service_name, '', task_ids) # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected # total task count via FINISHED tasks, without actually completing deployment log.info("Waiting for {}/{} to finish deployment plan...".format( package_name, service_name)) sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds) # given the above wait for plan completion, here we just wait up to 5 minutes if shakedown.dcos_version_less_than("1.9"): log.info( "Skipping `is_suppressed` check for %s/%s as this is only suppored starting in version 1.9", package_name, service_name) else: log.info("Waiting for %s/%s to be suppressed...", package_name, service_name) shakedown.wait_for(lambda: sdk_api.is_suppressed(service_name), noisy=True, timeout_seconds=5 * 60)
def _upgrade_or_downgrade( package_name, to_package_version, service_name, running_task_count, additional_options, timeout_seconds, wait_for_deployment): task_ids = sdk_tasks.get_task_ids(service_name, '') if shakedown.dcos_version_less_than("1.10") or shakedown.ee_version() is None: log.info('Using marathon upgrade flow to upgrade {} {}'.format(package_name, to_package_version)) sdk_marathon.destroy_app(service_name) sdk_install.install( package_name, service_name, running_task_count, additional_options=additional_options, package_version=to_package_version, timeout_seconds=timeout_seconds, wait_for_deployment=wait_for_deployment) else: log.info('Using CLI upgrade flow to upgrade {} {}'.format(package_name, to_package_version)) if additional_options: with tempfile.NamedTemporaryFile() as opts_f: opts_f.write(json.dumps(additional_options).encode('utf-8')) opts_f.flush() # ensure json content is available for the CLI to read below sdk_cmd.svc_cli( package_name, service_name, 'update start --package-version={} --options={}'.format(to_package_version, opts_f.name)) else: sdk_cmd.svc_cli( package_name, service_name, 'update start --package-version={}'.format(to_package_version)) if wait_for_deployment: log.info('Checking that all tasks have restarted') sdk_tasks.check_tasks_updated(service_name, '', task_ids) # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected # total task count via FINISHED tasks, without actually completing deployment log.info("Waiting for {}/{} to finish deployment plan...".format( package_name, service_name)) sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds) # given the above wait for plan completion, here we just wait up to 5 minutes if shakedown.dcos_version_less_than("1.9"): log.info("Skipping `is_suppressed` check for %s/%s as this is only suppored starting in version 1.9", package_name, service_name) else: log.info("Waiting for %s/%s to be suppressed...", package_name, service_name) shakedown.wait_for( lambda: sdk_api.is_suppressed(service_name), noisy=True, timeout_seconds=5 * 60)
def kerberized_hdfs_client(kerberos): try: client_app_def = config.get_kerberized_hdfs_client_app() client_app_def["secrets"]["hdfs_keytab"][ "source"] = kerberos.get_keytab_path() client_app_def["env"]["REALM"] = kerberos.get_realm() client_app_def["env"]["KDC_ADDRESS"] = kerberos.get_kdc_address() sdk_marathon.install_app(client_app_def) yield client_app_def["id"] finally: sdk_marathon.destroy_app(client_app_def["id"])
def hdfs_client(kerberos, hdfs_server): try: client_id = "hdfs-client" client = { "id": client_id, "mem": 1024, "user": "******", "container": { "type": "MESOS", "docker": { "image": "nvaziri/hdfs-client:stable", "forcePullImage": True }, "volumes": [{ "containerPath": "/{}/hdfs.keytab".format(config.HADOOP_VERSION), "secret": "hdfs_keytab" }] }, "secrets": { "hdfs_keytab": { "source": kerberos.get_keytab_path() } }, "networks": [{ "mode": "host" }], "env": { "REALM": kerberos.get_realm(), "KDC_ADDRESS": kerberos.get_kdc_address(), "JAVA_HOME": "/usr/lib/jvm/default-java", "KRB5_CONFIG": "/etc/krb5.conf", "HDFS_SERVICE_NAME": sdk_hosts._safe_name(config.FOLDERED_SERVICE_NAME), "HADOOP_VERSION": config.HADOOP_VERSION } } sdk_marathon.install_app(client) krb5.write_krb5_config_file(client_id, "/etc/krb5.conf", kerberos) yield client finally: sdk_marathon.destroy_app(client_id)
def cleanup(self): sdk_security.install_enterprise_cli() log.info("Removing the marathon KDC app") sdk_marathon.destroy_app(self.app_definition["id"]) if self._temp_working_dir and isinstance(self._temp_working_dir, tempfile.TemporaryDirectory): log.info("Deleting temporary working directory") self._temp_working_dir.cleanup() # TODO: separate secrets handling into another module log.info("Deleting keytab secret") sdk_security.delete_secret(self.keytab_secret_path)
def install(self) -> dict: if sdk_marathon.app_exists(self.app_definition["id"]): if self._persist: log.info("Found installed KDC app, reusing it") return _get_kdc_task(self.app_definition["id"]) log.info("Found installed KDC app, destroying it first") sdk_marathon.destroy_app(self.app_definition["id"]) log.info("Installing KDC Marathon app") sdk_marathon.install_app(self.app_definition) log.info("KDC app installed successfully") return _get_kdc_task(self.app_definition["id"])
def kafka_client(kerberos, kafka_server): brokers = sdk_cmd.svc_cli(kafka_server["package_name"], kafka_server["service"]["name"], "endpoint broker", parse_json=True)[1]["dns"] try: client_id = "kafka-client" client = { "id": client_id, "mem": 512, "container": { "type": "MESOS", "docker": { "image": "elezar/kafka-client:4b9c060", "forcePullImage": True }, "volumes": [{ "containerPath": "/tmp/kafkaconfig/kafka-client.keytab", "secret": "kafka_keytab", }], }, "secrets": { "kafka_keytab": { "source": kerberos.get_keytab_path() } }, "networks": [{ "mode": "host" }], "env": { "JVM_MaxHeapSize": "512", "KAFKA_CLIENT_MODE": "test", "KAFKA_TOPIC": "securetest", "KAFKA_BROKER_LIST": ",".join(brokers), }, } sdk_marathon.install_app(client) yield { **client, **{ "brokers": list(map(lambda x: x.split(":")[0], brokers)) } } finally: sdk_marathon.destroy_app(client_id)
def hdfs_client(kerberos, hdfs_server): try: client_id = "hdfs-client" client = { "id": client_id, "mem": 1024, "user": "******", "container": { "type": "MESOS", "docker": { "image": "elezar/hdfs-client:dev", "forcePullImage": True }, "volumes": [ { "containerPath": "/hadoop-2.6.0-cdh5.9.1/hdfs.keytab", "secret": "hdfs_keytab" } ] }, "secrets": { "hdfs_keytab": { "source": kerberos.get_keytab_path() } }, "networks": [ { "mode": "host" } ], "env": { "REALM": kerberos.get_realm(), "KDC_ADDRESS": kerberos.get_kdc_address(), "JAVA_HOME": "/usr/lib/jvm/default-java", "KRB5_CONFIG": "/etc/krb5.conf", "HDFS_SERVICE_NAME": config.SERVICE_NAME, "HADOOP_VERSION": config.HADOOP_VERSION } } sdk_marathon.install_app(client) krb5.write_krb5_config_file(client_id, "/etc/krb5.conf", kerberos) dcos_ca_bundle = transport_encryption.fetch_dcos_ca_bundle(client_id) yield {**client, **{"dcos_ca_bundle": dcos_ca_bundle}} finally: sdk_marathon.destroy_app(client_id)
def upgrade_or_downgrade(package_name, running_task_count, additional_options, package_version=None): task_ids = tasks.get_task_ids(package_name, '') marathon.destroy_app(package_name) install.install(package_name, running_task_count, additional_options=additional_options, package_version=package_version, check_suppression=False) sdk_utils.out('Waiting for upgrade / downgrade deployment to complete') plan.wait_for_completed_deployment(package_name) sdk_utils.out('Checking that all tasks have restarted') tasks.check_tasks_updated(package_name, '', task_ids)
def hdfs_client(kerberos, hdfs_server): try: client_id = "hdfs-client" client = { "id": client_id, "mem": 1024, "user": "******", "container": { "type": "MESOS", "docker": { "image": "elezar/hdfs-client:dev", "forcePullImage": True }, "volumes": [ { "containerPath": "/hadoop-2.6.0-cdh5.9.1/hdfs.keytab", "secret": "hdfs_keytab" } ] }, "secrets": { "hdfs_keytab": { "source": kerberos.get_keytab_path() } }, "networks": [ { "mode": "host" } ], "env": { "REALM": kerberos.get_realm(), "KDC_ADDRESS": kerberos.get_kdc_address(), "JAVA_HOME": "/usr/lib/jvm/default-java", "KRB5_CONFIG": "/etc/krb5.conf", "HDFS_SERVICE_NAME": config.SERVICE_NAME, } } sdk_marathon.install_app(client) krb5.write_krb5_config_file(client_id, "/etc/krb5.conf", kerberos) dcos_ca_bundle = transport_encryption.fetch_dcos_ca_bundle(client_id) yield {**client, **{"dcos_ca_bundle": dcos_ca_bundle}} finally: sdk_marathon.destroy_app(client_id)
def test_upgrade(): test_version = upgrade.get_pkg_version(PACKAGE_NAME) print('Found test version: {}'.format(test_version)) repositories = json.loads(cmd.run_cli('package repo list --json'))['repositories'] print("Repositories: " + str(repositories)) if len(repositories) < 2: print("There is only one version in the repository. Skipping upgrade test!") assert repo[0]['name'] == 'Universe' return test_repo_name, test_repo_url = upgrade.get_test_repo_info(); for repo in repositories: if repo['name'] != 'Universe': shakedown.remove_package_repo(repo['name']) universe_version = upgrade.get_pkg_version(PACKAGE_NAME) print('Found Universe version: {}'.format(universe_version)) print('Installing Universe version: {}'.format(universe_version)) install.install(PACKAGE_NAME, DEFAULT_BROKER_COUNT) print('Installation complete for Universe version: {}'.format(universe_version)) tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT) broker_ids = tasks.get_task_ids(SERVICE_NAME, 'broker-') print('Adding test version to repository with name: {} and url: {}'.format(test_repo_name, test_repo_url)) upgrade.add_repo(test_repo_name, test_repo_url, universe_version, 0, PACKAGE_NAME) print('Upgrading to test version: {}'.format(test_version)) marathon.destroy_app(SERVICE_NAME) print('Installing test version: {}'.format(test_version)) # installation will return with old tasks because they are still running install.install(PACKAGE_NAME, DEFAULT_BROKER_COUNT) print('Installation complete for test version: {}'.format(test_version)) # wait till tasks are restarted tasks.check_tasks_updated(SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE), broker_ids) print('All task are restarted') # all tasks are running tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT)
def setup_hdfs_client(hdfs_with_kerberos): try: curr_dir = os.path.dirname(os.path.realpath(__file__)) app_def_path = "{}/resources/hdfsclient.json".format(curr_dir) with open(app_def_path) as f: hdfsclient_app_def = json.load(f) hdfsclient_app_def["id"] = HDFS_CLIENT_ID hdfsclient_app_def["secrets"]["hdfs_keytab"][ "source"] = KEYTAB_SECRET_PATH sdk_marathon.install_app(hdfsclient_app_def) sdk_auth.kinit(HDFS_CLIENT_ID, keytab="hdfs.keytab", principal=GENERIC_HDFS_USER_PRINCIPAL) yield finally: sdk_marathon.destroy_app(HDFS_CLIENT_ID)
def kafka_client(): brokers = [ "kafka-0-broker.{}.autoip.dcos.thisdcos.directory:1030".format( config.SERVICE_NAME), "kafka-1-broker.{}.autoip.dcos.thisdcos.directory:1030".format( config.SERVICE_NAME), "kafka-2-broker.{}.autoip.dcos.thisdcos.directory:1030".format( config.SERVICE_NAME) ] try: client_id = "kafka-client" client = { "id": client_id, "mem": 512, "user": "******", "container": { "type": "MESOS", "docker": { "image": "elezar/kafka-client:latest", "forcePullImage": True }, }, "networks": [{ "mode": "host" }], "env": { "JVM_MaxHeapSize": "512", "KAFKA_CLIENT_MODE": "test", "KAFKA_BROKER_LIST": ",".join(brokers), "KAFKA_OPTS": "" } } sdk_marathon.install_app(client) yield { **client, **{ "brokers": list(map(lambda x: x.split(':')[0], brokers)) } } finally: sdk_marathon.destroy_app(client_id)
def test_upgrade_downgrade(): test_repo_name, test_repo_url = get_test_repo_info() test_version = get_pkg_version() print('Found test version: {}'.format(test_version)) repositories = json.loads( cmd.run_cli('package repo list --json'))['repositories'] print("Repositories: " + str(repositories)) universe_url = "fail" for repo in repositories: if repo['name'] == 'Universe': universe_url = repo['uri'] break assert "fail" != universe_url print("Universe URL: " + universe_url) shakedown.remove_package_repo('Universe') add_repo('Universe', universe_url, test_version, 0) universe_version = get_pkg_version() print('Found Universe version: {}'.format(universe_version)) print('Installing Universe version') install.install(PACKAGE_NAME, DEFAULT_TASK_COUNT) shakedown.remove_package_repo('Universe') add_last_repo('Universe', universe_url, universe_version) print('Upgrading to test version') marathon.destroy_app(PACKAGE_NAME) install.install(PACKAGE_NAME, DEFAULT_TASK_COUNT) shakedown.remove_package_repo('Universe') add_repo('Universe', universe_url, test_version, 0) print('Downgrading to master version') marathon.destroy_app(PACKAGE_NAME) install.install(PACKAGE_NAME, DEFAULT_TASK_COUNT) shakedown.remove_package_repo('Universe') add_last_repo('Universe', universe_url, universe_version)
def cleanup(self) -> None: sdk_security.install_enterprise_cli() log.info("Removing the marathon KDC app") sdk_marathon.destroy_app(self.app_definition["id"]) if self._temp_working_dir and isinstance(self._temp_working_dir, tempfile.TemporaryDirectory): log.info("Deleting temporary working directory") self._temp_working_dir.cleanup() sdk_security.delete_service_account( service_account_name=KDC_SERVICE_ACCOUNT, service_account_secret=KDC_SERVICE_ACCOUNT_SECRET, ) # TODO: separate secrets handling into another module log.info("Deleting keytab secret") sdk_security.install_enterprise_cli() sdk_security.delete_secret(self.get_keytab_path())
def test_history_kdc_config(hdfs_with_kerberos, kerberos_env): history_service_with_kdc_config = "spark-history-with-kdc-config" try: # This deployment will fail if kerberos is not configured properly. sdk_install.uninstall(HISTORY_PACKAGE_NAME, history_service_with_kdc_config) sdk_install.install( HISTORY_PACKAGE_NAME, history_service_with_kdc_config, 0, additional_options={ "service": { "name": history_service_with_kdc_config, "user": utils.SPARK_HISTORY_USER, "log-dir": "hdfs://hdfs{}".format(HDFS_HISTORY_DIR), "hdfs-config-url": "http://api.{}.marathon.l4lb.thisdcos.directory/v1/endpoints" .format(HDFS_SERVICE_NAME) }, "security": { "kerberos": { "enabled": True, "kdc": { "hostname": kerberos_env.get_host(), "port": int(kerberos_env.get_port()) }, "realm": sdk_auth.REALM, "principal": GENERIC_HDFS_USER_PRINCIPAL, "keytab": KEYTAB_SECRET_PATH } } }, wait_for_deployment=False, # no deploy plan insert_strict_options=False ) # no standard service_account/etc options finally: sdk_marathon.destroy_app(history_service_with_kdc_config)
def kafka_client(): brokers = ["kafka-0-broker.{}.autoip.dcos.thisdcos.directory:1030".format(config.SERVICE_NAME), "kafka-1-broker.{}.autoip.dcos.thisdcos.directory:1030".format(config.SERVICE_NAME), "kafka-2-broker.{}.autoip.dcos.thisdcos.directory:1030".format(config.SERVICE_NAME)] try: client_id = "kafka-client" client = { "id": client_id, "mem": 512, "container": { "type": "MESOS", "docker": { "image": "elezar/kafka-client:latest", "forcePullImage": True }, }, "networks": [ { "mode": "host" } ], "env": { "JVM_MaxHeapSize": "512", "KAFKA_CLIENT_MODE": "test", "KAFKA_BROKER_LIST": ",".join(brokers), "KAFKA_OPTS": "" } } sdk_marathon.install_app(client) broker_hosts = list(map(lambda x: x.split(':')[0], brokers)) yield {**client, **{"brokers": broker_hosts}} finally: sdk_marathon.destroy_app(client_id)
def test_marathon_volume_collission(): # This test validates that a service registered in a sub-role of # slave_public will _not_ unreserve Marathon volumes RESERVED # in the `slave_public` role. # Uninstall HW first sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) # Install the marathon app marathon_app_name = "persistent-test" persistent_app = { "id": marathon_app_name, "mem": 128, "user": "******", "cmd": "echo 'this is a test' > persistent-volume/test && sleep 10000", "container": { "type": "MESOS", "volumes": [ { "persistent": { "type": "root", "size": 500, "constraints": [] }, "mode": "RW", "containerPath": "persistent-volume" } ] } } try: sdk_marathon.install_app(persistent_app) # Get its persistent Volume host = sdk_marathon.get_scheduler_host(marathon_app_name) ok, pv_name = sdk_cmd.agent_ssh(host, "ls /var/lib/mesos/slave/volumes/roles/slave_public") assert ok pv_name = pv_name.strip() @retrying.retry(wait_fixed=1000, stop_max_delay=60*1000) def check_content(): ok, pv_content = sdk_cmd.agent_ssh(host, "cat /var/lib/mesos/slave/volumes/roles/slave_public/{}/test".format(pv_name)) assert pv_content.strip() == "this is a test" check_content() # Scale down the Marathon app app_config = sdk_marathon.get_config(marathon_app_name) app_config['instances'] = 0 sdk_marathon.update_app(marathon_app_name, app_config) # Install Hello World sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, config.DEFAULT_TASK_COUNT, additional_options=pre_reserved_options) # Make sure the persistent volume is still there check_content() # Uninstall Hello World sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) # Make sure the persistent volume is still there check_content() # Scale back up the marathon app app_config = sdk_marathon.get_config(marathon_app_name) app_config['instances'] = 1 sdk_marathon.update_app(marathon_app_name, app_config) # Make sure the persistent volume is still there check_content() finally: # Reinstall hello world sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, config.DEFAULT_TASK_COUNT, additional_options=pre_reserved_options) sdk_marathon.destroy_app(marathon_app_name)
def test_marathon_volume_collision(): # This test validates that a service registered in a sub-role of # slave_public will _not_ unreserve Marathon volumes RESERVED # in the `slave_public` role. # Uninstall HW first sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) # Install the marathon app marathon_app_name = "persistent-test" volume_name = "persistent-volume" persistent_app = { "id": marathon_app_name, "mem": 128, "user": "******", "cmd": "echo 'this is a test' > {}/test && sleep 10000".format(volume_name), "container": { "type": "MESOS", "volumes": [ { "persistent": {"type": "root", "size": 500, "constraints": []}, "mode": "RW", "containerPath": volume_name, } ], }, } try: sdk_marathon.install_app(persistent_app) # Get its persistent Volume host = sdk_marathon.get_scheduler_host(marathon_app_name) # Should get e.g.: "/var/lib/mesos/slave/volumes/roles/slave_public/persistent-test#persistent-volume#76e7bb6d-64fa-11e8-abc5-8e679b292d5e" rc, pv_path, _ = sdk_cmd.agent_ssh( host, "ls -d /var/lib/mesos/slave/volumes/roles/slave_public/{}#{}#*".format( marathon_app_name, volume_name ), ) if rc != 0: log.error( "Could not get slave_public roles. return-code: '%s'\n", rc) assert rc == 0 pv_path = pv_path.strip() @retrying.retry(wait_fixed=1000, stop_max_delay=60 * 1000) def check_content(): rc, pv_content, _ = sdk_cmd.agent_ssh(host, "cat {}/test".format(pv_path)) assert rc == 0 and pv_content.strip() == "this is a test" check_content() # Scale down the Marathon app app_config = sdk_marathon.get_config(marathon_app_name) app_config["instances"] = 0 sdk_marathon.update_app(app_config) # Install Hello World sdk_install.install( config.PACKAGE_NAME, config.SERVICE_NAME, config.DEFAULT_TASK_COUNT, additional_options=pre_reserved_options, ) # Make sure the persistent volume is still there check_content() # Uninstall Hello World sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) # Make sure the persistent volume is still there check_content() # Scale back up the marathon app app_config = sdk_marathon.get_config(marathon_app_name) app_config["instances"] = 1 sdk_marathon.update_app(app_config) # Make sure the persistent volume is still there check_content() finally: # Reinstall hello world sdk_install.install( config.PACKAGE_NAME, config.SERVICE_NAME, config.DEFAULT_TASK_COUNT, additional_options=pre_reserved_options, ) sdk_marathon.destroy_app(marathon_app_name)