def test_driver_executor_tls(): ''' Put keystore and truststore as secrets in DC/OS secret store. Run SparkPi job with TLS enabled, referencing those secrets. Make sure other secrets still show up. ''' python_script_path = os.path.join(THIS_DIR, 'jobs', 'python', 'pi_with_secret.py') python_script_url = utils.upload_file(python_script_path) resources_folder = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'resources') keystore_file = 'server.jks' truststore_file = 'trust.jks' keystore_path = os.path.join(resources_folder, '{}.base64'.format(keystore_file)) truststore_path = os.path.join(resources_folder, '{}.base64'.format(truststore_file)) keystore_secret = '__dcos_base64__keystore' truststore_secret = '__dcos_base64__truststore' my_secret = 'mysecret' my_secret_content = 'secretcontent' shakedown.run_dcos_command( 'security secrets create /{} --value-file {}'.format( keystore_secret, keystore_path)) shakedown.run_dcos_command( 'security secrets create /{} --value-file {}'.format( truststore_secret, truststore_path)) shakedown.run_dcos_command('security secrets create /{} --value {}'.format( my_secret, my_secret_content)) password = '******' try: utils.run_tests( app_url=python_script_url, app_args="30 {} {}".format(my_secret, my_secret_content), expected_output="Pi is roughly 3", args=[ "--keystore-secret-path", keystore_secret, "--truststore-secret-path", truststore_secret, "--private-key-password", format(password), "--keystore-password", format(password), "--truststore-password", format(password), "--conf", "spark.mesos.driver.secret.names={}".format(my_secret), "--conf", "spark.mesos.driver.secret.filenames={}".format(my_secret), "--conf", "spark.mesos.driver.secret.envkeys={}".format(my_secret), ]) finally: shakedown.run_dcos_command( 'security secrets delete /{}'.format(keystore_secret)) shakedown.run_dcos_command( 'security secrets delete /{}'.format(truststore_secret)) shakedown.run_dcos_command( 'security secrets delete /{}'.format(my_secret))
def _run_terasort_job(terasort_class, app_args, expected_output): jar_url = 'https://downloads.mesosphere.io/spark/examples/spark-terasort-1.1-jar-with-dependencies_2.11.jar' submit_args = ["--class", terasort_class] + KERBEROS_ARGS utils.run_tests(app_url=jar_url, app_args=" ".join(app_args), expected_output=expected_output, args=submit_args)
def _test_spark_docker_image(dist): utils.require_spark( additional_options={'service': { 'docker-image': dist['image'] }}) example_jar_url = EXAMPLES_JAR_PATH_TEMPLATE.format(dist['scala_version']) expected_groups_count = 12000 num_mappers = 4 value_size_bytes = 100 num_reducers = 4 utils.run_tests( app_url=example_jar_url, app_args= f"{num_mappers} {expected_groups_count} {value_size_bytes} {num_reducers}", expected_output=str(expected_groups_count), args=[ "--class org.apache.spark.examples.GroupByTest", "--conf spark.executor.cores=1", "--conf spark.cores.max=4", "--conf spark.scheduler.minRegisteredResourcesRatio=1", "--conf spark.scheduler.maxRegisteredResourcesWaitingTime=3m" ]) utils.teardown_spark()
def _run_terasort(): jar_url = TERASORT_JAR utils.run_tests(app_url=jar_url, app_args="hdfs:///terasort_in hdfs:///terasort_out", expected_output="", app_name=SOAK_SPARK_APP_NAME, args=(["--class", "com.github.ehiggs.spark.terasort.TeraSort"] + COMMON_ARGS))
def _run_teravalidate(): jar_url = TERASORT_JAR utils.run_tests(app_url=jar_url, app_args="hdfs:///terasort_out hdfs:///terasort_validate", expected_output="partitions are properly sorted", app_name=SOAK_SPARK_APP_NAME, args=(["--class", "com.github.ehiggs.spark.terasort.TeraValidate"] + COMMON_ARGS))
def test_sparkPi(service_name=utils.SPARK_SERVICE_NAME): utils.run_tests( app_url=utils.SPARK_EXAMPLES, app_args="100", expected_output="Pi is roughly 3", service_name=service_name, args=["--class org.apache.spark.examples.SparkPi"])
def _test_spark_docker_image(docker_image): utils.upload_dcos_test_jar() utils.require_spark( additional_options={'service': { 'docker-image': docker_image }}) expected_groups_count = 12000 num_mappers = 4 value_size_bytes = 100 num_reducers = 4 sleep = 500 python_script_path = os.path.join(THIS_DIR, 'jobs', 'python', 'shuffle_app.py') python_script_url = utils.upload_file(python_script_path) utils.run_tests( app_url=python_script_url, app_args="{} {} {} {} {}".format(num_mappers, expected_groups_count, value_size_bytes, num_reducers, sleep), expected_output="Groups count: {}".format(expected_groups_count), args=[ "--conf spark.executor.cores=1", "--conf spark.cores.max=4", "--conf spark.scheduler.minRegisteredResourcesRatio=1", "--conf spark.scheduler.maxRegisteredResourcesWaitingTime=3m" ]) utils.teardown_spark()
def test_packages_flag(): utils.run_tests( app_url=utils.dcos_test_jar_url(), app_args="20", expected_output="210", args=["--packages com.google.guava:guava:23.0", "--class ProvidedPackages"])
def test_s3_env(): creds = s3.get_credentials() args = [ "--conf spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format( creds.access_key), "--conf spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format( creds.secret_key) ] args.append("--class S3Job") linecount_path = os.path.join(THIS_DIR, 'resources', 'linecount.txt') s3.upload_file(linecount_path) # download/read linecount.txt only utils.run_tests(app_url=utils.dcos_test_jar_url(), app_args="--readUrl {} --countOnly".format( s3.s3n_url('linecount.txt')), expected_output="Read 3 lines", args=args) # download/read linecount.txt, reupload as linecount-env.txt utils.run_tests(app_url=utils.dcos_test_jar_url(), app_args="--readUrl {} --writeUrl {}".format( s3.s3n_url('linecount.txt'), s3.s3n_url('linecount-env.txt')), expected_output="Read 3 lines", args=args) assert len(list(s3.list("linecount-env.txt"))) > 0
def test_foldered_spark(setup_spark): utils.run_tests( app_url=utils.SPARK_EXAMPLES, app_args="100", expected_output="Pi is roughly 3", service_name=service_name, driver_role=driver_role, args=["--class org.apache.spark.examples.SparkPi"])
def test_cni(): utils.run_tests(app_url=utils.SPARK_EXAMPLES, app_args="", expected_output="Pi is roughly 3", args=[ "--conf spark.mesos.network.name=dcos", "--class org.apache.spark.examples.SparkPi" ])
def test_multi_arg_confs(service_name=utils.SPARK_SERVICE_NAME): utils.run_tests( app_url=utils.dcos_test_jar_url(), app_args="", expected_output="spark.driver.extraJavaOptions,-XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Dparam3=\"valA valB\"", service_name=service_name, args=["--conf spark.driver.extraJavaOptions='-XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Dparam3=\\\"valA valB\\\"'", "--class MultiConfs"])
def _run_teragen(): jar_url = TERASORT_JAR input_size = os.getenv('TERASORT_INPUT_SIZE', '1g') utils.run_tests(app_url=jar_url, app_args="{} hdfs:///terasort_in".format(input_size), expected_output="Number of records written", app_name=SOAK_SPARK_APP_NAME, args=(["--class", "com.github.ehiggs.spark.terasort.TeraGen"] + COMMON_ARGS))
def test_cli_multiple_spaces(): utils.run_tests(app_url=utils.SPARK_EXAMPLES, app_args="30", expected_output="Pi is roughly 3", args=[ "--conf spark.cores.max=2", "--class org.apache.spark.examples.SparkPi" ])
def test_python(): python_script_path = os.path.join(THIS_DIR, 'jobs', 'python', 'pi_with_include.py') python_script_url = utils.upload_file(python_script_path) py_file_path = os.path.join(THIS_DIR, 'jobs', 'python', 'PySparkTestInclude.py') py_file_url = utils.upload_file(py_file_path) utils.run_tests(app_url=python_script_url, app_args="30", expected_output="Pi is roughly 3", args=["--py-files {}".format(py_file_url)])
def test_pipeline(kerberos_flag, stop_count, jar_uri, keytab_secret, spark_service_name, jaas_uri=None): stop_count = str(stop_count) kerberized = True if kerberos_flag == "true" else False broker_dns = sdk_cmd.svc_cli(KAFKA_PACKAGE_NAME, KAFKA_SERVICE_NAME, 'endpoints broker', json=True)['dns'][0] topic = "top1" big_file, big_file_url = "file:///mnt/mesos/sandbox/big.txt", "http://norvig.com/big.txt" # arguments to the application producer_args = " ".join([broker_dns, big_file, topic, kerberos_flag]) uris = "spark.mesos.uris={}".format(big_file_url) if kerberized and jaas_uri is None: _uri = upload_jaas() uris += ",{}".format(_uri) else: uris += ",{}".format(jaas_uri) common_args = [ "--conf", "spark.mesos.containerizer=mesos", "--conf", "spark.scheduler.maxRegisteredResourcesWaitingTime=2400s", "--conf", "spark.scheduler.minRegisteredResourcesRatio=1.0", "--conf", uris ] kerberos_args = get_kerberized_kafka_spark_conf(spark_service_name, keytab_secret) producer_config = ["--conf", "spark.cores.max=2", "--conf", "spark.executor.cores=1", "--class", "KafkaFeeder"] + common_args if kerberized: producer_config += kerberos_args producer_id = utils.submit_job(app_url=jar_uri, app_args=producer_args, service_name=spark_service_name, args=producer_config) sdk_tasks.check_running(KAFKA_SERVICE_NAME, 1, timeout_seconds=600) consumer_config = ["--conf", "spark.cores.max=2", "--conf", "spark.executor.cores=1", "--class", "KafkaConsumer"] + common_args if kerberized: consumer_config += kerberos_args consumer_args = " ".join([broker_dns, topic, stop_count, kerberos_flag]) try: utils.run_tests(app_url=jar_uri, app_args=consumer_args, expected_output="Read {} words".format(stop_count), service_name=spark_service_name, args=consumer_config) finally: utils.kill_driver(producer_id, spark_service_name)
def test_jars_flag(service_name=utils.SPARK_SERVICE_NAME): uploadedJarUrl = utils.dcos_test_jar_url() jarName = uploadedJarUrl.split("/")[-1] # dcos-spark-scala-assembly-XX-SNAPSHOT.jar utils.run_tests( app_url=utils.SPARK_EXAMPLES, # submit an app that does not include class 'MultiConfs' app_args="", expected_output="spark.driver.extraClassPath,/mnt/mesos/sandbox/{}".format(jarName), service_name=service_name, args=["--jars {}".format(uploadedJarUrl), "--class MultiConfs"])
def test_jar(service_name=utils.SPARK_SERVICE_NAME): master_url = ("https" if sdk_utils.is_strict_mode() else "http") + "://leader.mesos:5050" spark_job_runner_args = '{} dcos \\"*\\" spark:only 2 --auth-token={}'.format( master_url, shakedown.dcos_acs_token()) utils.run_tests(app_url=utils.upload_mesos_test_jar(), app_args=spark_job_runner_args, expected_output="All tests passed", service_name=service_name, args=['--class com.typesafe.spark.test.mesos.framework.runners.SparkJobRunner'])
def test_history(kerberized_spark, hdfs_with_kerberos, setup_history_server): job_args = [ "--class", "org.apache.spark.examples.SparkPi", "--conf", "spark.eventLog.enabled=true", "--conf", "spark.eventLog.dir=hdfs://hdfs{}".format(HDFS_HISTORY_DIR) ] utils.run_tests(app_url=utils.SPARK_EXAMPLES, app_args="100", expected_output="Pi is roughly 3", service_name="spark", args=(job_args + SPARK_SUBMIT_HDFS_KERBEROS_ARGS))
def test_history(): job_args = [ "--class", "org.apache.spark.examples.SparkPi", "--conf", "spark.eventLog.enabled=true", "--conf", "spark.eventLog.dir=hdfs://hdfs/history" ] utils.run_tests(app_url=utils.SPARK_EXAMPLES, app_args="100", expected_output="Pi is roughly 3", app_name="/spark", args=(job_args + KERBEROS_ARGS))
def test_value_secret(): secret_value = "secret-value" utils.run_tests( app_url=utils.dcos_test_jar_url(), app_args=auth_token, expected_output=secret_value, args=[ "--conf=spark.mesos.driver.secret.values={}".format(secret_value), "--conf=spark.mesos.driver.secret.envkeys=SECRET_ENV_KEY", "--class SecretConfs" ])
def test_rpc_auth(): secret_name = "sparkauth" sdk_security.delete_secret(secret_name) rc, _, _ = sdk_cmd.run_raw_cli("{} --verbose secret /{}".format(utils.SPARK_PACKAGE_NAME, secret_name)) assert rc == 0, "Failed to generate Spark auth secret" utils.run_tests( app_url=utils.SPARK_EXAMPLES, app_args="100", expected_output="Pi is roughly 3", service_name=utils.SPARK_SERVICE_NAME, args=["--executor-auth-secret {}".format(secret_name), "--class org.apache.spark.examples.SparkPi"])
def test_jar(app_name=utils.SPARK_APP_NAME): master_url = ("https" if utils.is_strict() else "http") + "://leader.mesos:5050" spark_job_runner_args = '{} dcos \\"*\\" spark:only 2 --auth-token={}'.format( master_url, shakedown.dcos_acs_token()) jar_url = utils.upload_file(os.getenv('TEST_JAR_PATH')) utils.run_tests( app_url=jar_url, app_args=spark_job_runner_args, expected_output="All tests passed", app_name=app_name, args=[ "--class", 'com.typesafe.spark.test.mesos.framework.runners.SparkJobRunner' ])
def test_env_based_ref_secret(): secret_path = "/spark/secret-name" secret_value = "secret-value" dcos_utils.delete_secret(secret_path) dcos_utils.create_secret(secret_path, secret_value, False) try: utils.run_tests( app_url=utils.dcos_test_jar_url(), app_args=auth_token, expected_output=secret_value, args=[ "--conf=spark.mesos.driver.secret.names={}".format( secret_path), "--conf=spark.mesos.driver.secret.envkeys=SECRET_ENV_KEY", "--class SecretConfs" ]) finally: dcos_utils.delete_secret(secret_path)
def test_rpc_auth(): secret_name = "sparkauth" rc, stdout, stderr = sdk_cmd.run_raw_cli("{pkg} secret /{secret}".format( pkg=utils.SPARK_PACKAGE_NAME, secret=secret_name)) assert rc == 0, "Failed to generate Spark auth secret, stderr {err} stdout {out}".format( err=stderr, out=stdout) args = [ "--executor-auth-secret", secret_name, "--class", "org.apache.spark.examples.SparkPi" ] utils.run_tests(app_url=utils.SPARK_EXAMPLES, app_args="100", expected_output="Pi is roughly 3", app_name="/spark", args=args)
def test_s3_secrets(): linecount_path = os.path.join(THIS_DIR, 'resources', 'linecount.txt') s3.upload_file(linecount_path) creds = s3.get_credentials() def make_credential_secret(path, val): sdk_security.delete_secret(path) rc, stdout, stderr = sdk_cmd.run_raw_cli( "security secrets create /{} -v {}".format(path, val)) assert rc == 0, "Failed to create secret {}, stderr: {}, stdout: {}".format( path, stderr, stdout) aws_access_key_path = "aws_access_key_id" make_credential_secret(aws_access_key_path, creds.access_key) aws_secret_key_path = "aws_secret_access_key" make_credential_secret(aws_secret_key_path, creds.secret_key) args = [ "--conf spark.mesos.containerizer=mesos", "--conf spark.mesos.driver.secret.names=/{key},/{secret}".format( key=aws_access_key_path, secret=aws_secret_key_path), "--conf spark.mesos.driver.secret.envkeys=AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY", "--class S3Job" ] try: # download/read linecount.txt only utils.run_tests(app_url=utils.dcos_test_jar_url(), app_args="--readUrl {} --countOnly".format( s3.s3n_url('linecount.txt')), expected_output="Read 3 lines", args=args) # download/read linecount.txt, reupload as linecount-secret.txt: utils.run_tests(app_url=utils.dcos_test_jar_url(), app_args="--readUrl {} --writeUrl {}".format( s3.s3n_url('linecount.txt'), s3.s3n_url('linecount-secret.txt')), expected_output="Read 3 lines", args=args) assert len(list(s3.list("linecount-secret.txt"))) > 0 finally: sdk_security.delete_secret(aws_access_key_path) sdk_security.delete_secret(aws_secret_key_path)
def test_file_based_ref_secret(): secret_path = "/spark/secret-name" secret_file_name = "secret.file" secret_value = "secret-value" with open(secret_file_name, 'w') as secret_file: secret_file.write(secret_value) dcos_utils.delete_secret(secret_path) dcos_utils.create_secret(secret_path, secret_file_name, True) try: utils.run_tests( app_url=utils.dcos_test_jar_url(), app_args=auth_token, expected_output=secret_value, args=[ "--conf=spark.mesos.driver.secret.names={}".format( secret_path), "--conf=spark.mesos.driver.secret.filenames={}".format( secret_file_name), "--class SecretConfs" ]) finally: dcos_utils.delete_secret(secret_path) if os.path.exists(secret_file_name): os.remove(secret_file_name)
def test_s3(): def make_credential_secret(envvar, secret_path): rc, stdout, stderr = sdk_cmd.run_raw_cli( "security secrets create {p} -v {e}".format(p=secret_path, e=os.environ[envvar])) assert rc == 0, "Failed to create secret {secret} from envvar {envvar}, stderr: {err}, stdout: {out}".format( secret=secret_path, envvar=envvar, err=stderr, out=stdout) LOGGER.info("Creating AWS secrets") aws_access_key_secret_path = "aws_access_key_id" aws_secret_access_key_path = "aws_secret_access_key" make_credential_secret( envvar="AWS_ACCESS_KEY_ID", secret_path="/{}".format(aws_access_key_secret_path)) make_credential_secret( envvar="AWS_SECRET_ACCESS_KEY", secret_path="/{}".format(aws_secret_access_key_path)) linecount_path = os.path.join(THIS_DIR, 'resources', 'linecount.txt') s3.upload_file(linecount_path) app_args = "--readUrl {} --writeUrl {}".format(s3.s3n_url('linecount.txt'), s3.s3n_url("linecount-out")) args = [ "--conf", "spark.mesos.containerizer=mesos", "--conf", "spark.mesos.driver.secret.names=/{key},/{secret}".format( key=aws_access_key_secret_path, secret=aws_secret_access_key_path), "--conf", "spark.mesos.driver.secret.envkeys=AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY", "--class", "S3Job" ] utils.run_tests(app_url=utils._scala_test_jar_url(), app_args=app_args, expected_output="Read 3 lines", args=args) assert len(list(s3.list("linecount-out"))) > 0 app_args = "--readUrl {} --countOnly".format(s3.s3n_url('linecount.txt')) args = [ "--conf", "spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format( os.environ["AWS_ACCESS_KEY_ID"]), "--conf", "spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format( os.environ["AWS_SECRET_ACCESS_KEY"]), "--class", "S3Job" ] utils.run_tests(app_url=utils._scala_test_jar_url(), app_args=app_args, expected_output="Read 3 lines", args=args) app_args = "--countOnly --readUrl {}".format(s3.s3n_url('linecount.txt')) args = [ "--conf", "spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format( os.environ["AWS_ACCESS_KEY_ID"]), "--conf", "spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format( os.environ["AWS_SECRET_ACCESS_KEY"]), "--class", "S3Job" ] utils.run_tests(app_url=utils._scala_test_jar_url(), app_args=app_args, expected_output="Read 3 lines", args=args)
def test_r(): r_script_path = os.path.join(THIS_DIR, 'jobs', 'R', 'dataframe.R') r_script_url = utils.upload_file(r_script_path) utils.run_tests(app_url=r_script_url, app_args='', expected_output="Justin")
def test_pipeline(kerberos_flag, stop_count, jar_uri, keytab_secret, spark_app_name, jaas_uri=None): stop_count = str(stop_count) kerberized = True if kerberos_flag == "true" else False broker_dns = _kafka_broker_dns() topic = "top1" big_file, big_file_url = "file:///mnt/mesos/sandbox/big.txt", "http://norvig.com/big.txt" # arguments to the application producer_args = " ".join([broker_dns, big_file, topic, kerberos_flag]) uris = "spark.mesos.uris=http://norvig.com/big.txt" if kerberized and jaas_uri is None: jaas_path = os.path.join(THIS_DIR, "resources", "spark-kafka-client-jaas.conf") s3.upload_file(jaas_path) _uri = s3.s3_http_url("spark-kafka-client-jaas.conf") uris += ",{}".format(_uri) else: uris += ",{}".format(jaas_uri) common_args = [ "--conf", "spark.mesos.containerizer=mesos", "--conf", "spark.scheduler.maxRegisteredResourcesWaitingTime=2400s", "--conf", "spark.scheduler.minRegisteredResourcesRatio=1.0", "--conf", uris ] kerberos_args = [ "--conf", "spark.mesos.driver.secret.names={}".format(keytab_secret), "--conf", "spark.mesos.driver.secret.filenames=kafka-client.keytab", "--conf", "spark.mesos.executor.secret.names={}".format(keytab_secret), "--conf", "spark.mesos.executor.secret.filenames=kafka-client.keytab", "--conf", "spark.mesos.task.labels=DCOS_SPACE:{}".format(utils.SPARK_APP_NAME), "--conf", "spark.executorEnv.KRB5_CONFIG_BASE64={}".format(KAFKA_KRB5), "--conf", "spark.mesos.driverEnv.KRB5_CONFIG_BASE64={}".format(KAFKA_KRB5), "--conf", "spark.driver.extraJavaOptions=-Djava.security.auth.login.config=" "/mnt/mesos/sandbox/spark-kafka-client-jaas.conf", "--conf", "spark.executor.extraJavaOptions=" "-Djava.security.auth.login.config=/mnt/mesos/sandbox/spark-kafka-client-jaas.conf", ] producer_config = [ "--conf", "spark.cores.max=2", "--conf", "spark.executor.cores=2", "--class", "KafkaFeeder" ] + common_args if kerberized: producer_config += kerberos_args producer_id = utils.submit_job(app_url=jar_uri, app_args=producer_args, app_name=spark_app_name, args=producer_config) shakedown.wait_for(lambda: _producer_launched(), ignore_exceptions=False, timeout_seconds=600) shakedown.wait_for(lambda: utils.is_service_ready(KAFKA_SERVICE_NAME, 1), ignore_exceptions=False, timeout_seconds=600) consumer_config = [ "--conf", "spark.cores.max=4", "--class", "KafkaConsumer" ] + common_args if kerberized: consumer_config += kerberos_args consumer_args = " ".join([broker_dns, topic, stop_count, kerberos_flag]) utils.run_tests(app_url=jar_uri, app_args=consumer_args, expected_output="Read {} words".format(stop_count), app_name=spark_app_name, args=consumer_config) utils.kill_driver(producer_id, spark_app_name)