def test_driver_executor_tls(): ''' Put keystore and truststore as secrets in DC/OS secret store. Run SparkPi job with TLS enabled, referencing those secrets. Make sure other secrets still show up. ''' python_script_path = os.path.join(THIS_DIR, 'jobs', 'python', 'pi_with_secret.py') python_script_url = utils.upload_file(python_script_path) resources_folder = os.path.join( os.path.dirname(os.path.realpath(__file__)), 'resources') keystore_file = 'server.jks' truststore_file = 'trust.jks' keystore_path = os.path.join(resources_folder, '{}.base64'.format(keystore_file)) truststore_path = os.path.join(resources_folder, '{}.base64'.format(truststore_file)) keystore_secret = '__dcos_base64__keystore' truststore_secret = '__dcos_base64__truststore' my_secret = 'mysecret' my_secret_content = 'secretcontent' shakedown.run_dcos_command( 'security secrets create /{} --value-file {}'.format( keystore_secret, keystore_path)) shakedown.run_dcos_command( 'security secrets create /{} --value-file {}'.format( truststore_secret, truststore_path)) shakedown.run_dcos_command('security secrets create /{} --value {}'.format( my_secret, my_secret_content)) password = '******' try: utils.run_tests( app_url=python_script_url, app_args="30 {} {}".format(my_secret, my_secret_content), expected_output="Pi is roughly 3", args=[ "--keystore-secret-path", keystore_secret, "--truststore-secret-path", truststore_secret, "--private-key-password", format(password), "--keystore-password", format(password), "--truststore-password", format(password), "--conf", "spark.mesos.driver.secret.names={}".format(my_secret), "--conf", "spark.mesos.driver.secret.filenames={}".format(my_secret), "--conf", "spark.mesos.driver.secret.envkeys={}".format(my_secret), ]) finally: shakedown.run_dcos_command( 'security secrets delete /{}'.format(keystore_secret)) shakedown.run_dcos_command( 'security secrets delete /{}'.format(truststore_secret)) shakedown.run_dcos_command( 'security secrets delete /{}'.format(my_secret))
def test_cli_multiple_spaces(): utils.run_tests(app_url=utils.SPARK_EXAMPLES, app_args="30", expected_output="Pi is roughly 3", args=[ "--conf ", "spark.cores.max=2", " --class ", "org.apache.spark.examples.SparkPi" ])
def test_cni(): utils.run_tests(app_url=utils.SPARK_EXAMPLES, app_args="", expected_output="Pi is roughly 3", args=[ "--conf", "spark.mesos.network.name=dcos", "--class", "org.apache.spark.examples.SparkPi" ])
def _run_terasort(): jar_url = TERASORT_JAR utils.run_tests( app_url=jar_url, app_args="hdfs:///terasort_in hdfs:///terasort_out", expected_output="", app_name=SOAK_SPARK_APP_NAME, args=(["--class", "com.github.ehiggs.spark.terasort.TeraSort"] + COMMON_ARGS))
def _run_teravalidate(): jar_url = TERASORT_JAR utils.run_tests( app_url=jar_url, app_args="hdfs:///terasort_out hdfs:///terasort_validate", expected_output="partitions are properly sorted", app_name=SOAK_SPARK_APP_NAME, args=(["--class", "com.github.ehiggs.spark.terasort.TeraValidate"] + COMMON_ARGS))
def _run_teragen(): jar_url = TERASORT_JAR input_size = os.getenv('TERASORT_INPUT_SIZE', '1g') utils.run_tests( app_url=jar_url, app_args="{} hdfs:///terasort_in".format(input_size), expected_output="Number of records written", app_name=SOAK_SPARK_APP_NAME, args=(["--class", "com.github.ehiggs.spark.terasort.TeraGen"] + COMMON_ARGS))
def test_python(): python_script_path = os.path.join(THIS_DIR, 'jobs', 'python', 'pi_with_include.py') python_script_url = utils.upload_file(python_script_path) py_file_path = os.path.join(THIS_DIR, 'jobs', 'python', 'PySparkTestInclude.py') py_file_url = utils.upload_file(py_file_path) utils.run_tests(app_url=python_script_url, app_args="30", expected_output="Pi is roughly 3", args=["--py-files", py_file_url])
def test_history(): job_args = [ "--class", "org.apache.spark.examples.SparkPi", "--conf", "spark.eventLog.enabled=true", "--conf", "spark.eventLog.dir=hdfs://hdfs/history" ] utils.run_tests(app_url=utils.SPARK_EXAMPLES, app_args="100", expected_output="Pi is roughly 3", app_name="/spark", args=(job_args + KERBEROS_ARGS))
def test_jar(app_name=utils.SPARK_APP_NAME): master_url = ("https" if utils.is_strict() else "http") + "://leader.mesos:5050" spark_job_runner_args = '{} dcos \\"*\\" spark:only 2 --auth-token={}'.format( master_url, shakedown.dcos_acs_token()) jar_url = utils.upload_file(os.getenv('TEST_JAR_PATH')) utils.run_tests( app_url=jar_url, app_args=spark_job_runner_args, expected_output="All tests passed", app_name=app_name, args=[ "--class", 'com.typesafe.spark.test.mesos.framework.runners.SparkJobRunner' ])
def test_rpc_auth(): secret_name = "sparkauth" rc, stdout, stderr = sdk_cmd.run_raw_cli("{pkg} secret /{secret}".format( pkg=utils.SPARK_PACKAGE_NAME, secret=secret_name)) assert rc == 0, "Failed to generate Spark auth secret, stderr {err} stdout {out}".format( err=stderr, out=stdout) args = [ "--executor-auth-secret", secret_name, "--class", "org.apache.spark.examples.SparkPi" ] utils.run_tests(app_url=utils.SPARK_EXAMPLES, app_args="100", expected_output="Pi is roughly 3", app_name="/spark", args=args)
def test_terasort_suite(): jar_url = 'https://downloads.mesosphere.io/spark/examples/spark-terasort-1.1-jar-with-dependencies_2.11.jar' teragen_args = ["--class", "com.github.ehiggs.spark.terasort.TeraGen" ] + KERBEROS_ARGS utils.run_tests(app_url=jar_url, app_args="1g hdfs:///terasort_in", expected_output="Number of records written", args=teragen_args) terasort_args = ["--class", "com.github.ehiggs.spark.terasort.TeraSort" ] + KERBEROS_ARGS utils.run_tests(app_url=jar_url, app_args="hdfs:///terasort_in hdfs:///terasort_out", expected_output="", args=terasort_args) teravalidate_args = [ "--class", "com.github.ehiggs.spark.terasort.TeraValidate" ] + KERBEROS_ARGS utils.run_tests(app_url=jar_url, app_args="hdfs:///terasort_out hdfs:///terasort_validate", expected_output="partitions are properly sorted", args=teravalidate_args)
def test_sparkPi(app_name=utils.SPARK_APP_NAME): utils.run_tests(app_url=utils.SPARK_EXAMPLES, app_args="100", expected_output="Pi is roughly 3", app_name=app_name, args=["--class org.apache.spark.examples.SparkPi"])
def test_s3(): def make_credential_secret(envvar, secret_path): rc, stdout, stderr = sdk_cmd.run_raw_cli( "security secrets create {p} -v {e}".format(p=secret_path, e=os.environ[envvar])) assert rc == 0, "Failed to create secret {secret} from envvar {envvar}, stderr: {err}, stdout: {out}".format( secret=secret_path, envvar=envvar, err=stderr, out=stdout) LOGGER.info("Creating AWS secrets") aws_access_key_secret_path = "aws_access_key_id" aws_secret_access_key_path = "aws_secret_access_key" make_credential_secret( envvar="AWS_ACCESS_KEY_ID", secret_path="/{}".format(aws_access_key_secret_path)) make_credential_secret( envvar="AWS_SECRET_ACCESS_KEY", secret_path="/{}".format(aws_secret_access_key_path)) linecount_path = os.path.join(THIS_DIR, 'resources', 'linecount.txt') s3.upload_file(linecount_path) app_args = "--readUrl {} --writeUrl {}".format(s3.s3n_url('linecount.txt'), s3.s3n_url("linecount-out")) args = [ "--conf", "spark.mesos.containerizer=mesos", "--conf", "spark.mesos.driver.secret.names=/{key},/{secret}".format( key=aws_access_key_secret_path, secret=aws_secret_access_key_path), "--conf", "spark.mesos.driver.secret.envkeys=AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY", "--class", "S3Job" ] utils.run_tests(app_url=utils._scala_test_jar_url(), app_args=app_args, expected_output="Read 3 lines", args=args) assert len(list(s3.list("linecount-out"))) > 0 app_args = "--readUrl {} --countOnly".format(s3.s3n_url('linecount.txt')) args = [ "--conf", "spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format( os.environ["AWS_ACCESS_KEY_ID"]), "--conf", "spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format( os.environ["AWS_SECRET_ACCESS_KEY"]), "--class", "S3Job" ] utils.run_tests(app_url=utils._scala_test_jar_url(), app_args=app_args, expected_output="Read 3 lines", args=args) app_args = "--countOnly --readUrl {}".format(s3.s3n_url('linecount.txt')) args = [ "--conf", "spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format( os.environ["AWS_ACCESS_KEY_ID"]), "--conf", "spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format( os.environ["AWS_SECRET_ACCESS_KEY"]), "--class", "S3Job" ] utils.run_tests(app_url=utils._scala_test_jar_url(), app_args=app_args, expected_output="Read 3 lines", args=args)
def test_r(): r_script_path = os.path.join(THIS_DIR, 'jobs', 'R', 'dataframe.R') r_script_url = utils.upload_file(r_script_path) utils.run_tests(app_url=r_script_url, app_args='', expected_output="Justin")
def test_pipeline(kerberos_flag, stop_count, jar_uri, keytab_secret, jaas_uri=None): stop_count = str(stop_count) kerberized = True if kerberos_flag == "true" else False broker_dns = _kafka_broker_dns() topic = "top1" big_file, big_file_url = "file:///mnt/mesos/sandbox/big.txt", "http://norvig.com/big.txt" # arguments to the application producer_args = " ".join([broker_dns, big_file, topic, kerberos_flag]) uris = "spark.mesos.uris=http://norvig.com/big.txt" if kerberized and jaas_uri is None: jaas_path = os.path.join(THIS_DIR, "resources", "spark-kafka-client-jaas.conf") s3.upload_file(jaas_path) _uri = s3.s3_http_url("spark-kafka-client-jaas.conf") uris += ",{}".format(_uri) else: uris += ",{}".format(jaas_uri) common_args = [ "--conf", "spark.mesos.containerizer=mesos", "--conf", "spark.scheduler.maxRegisteredResourcesWaitingTime=2400s", "--conf", "spark.scheduler.minRegisteredResourcesRatio=1.0", "--conf", uris ] kerberos_args = [ "--conf", "spark.mesos.driver.secret.names={}".format(keytab_secret), "--conf", "spark.mesos.driver.secret.filenames=kafka-client.keytab", "--conf", "spark.mesos.executor.secret.names={}".format(keytab_secret), "--conf", "spark.mesos.executor.secret.filenames=kafka-client.keytab", "--conf", "spark.mesos.task.labels=DCOS_SPACE:{}".format(utils.SPARK_APP_NAME), "--conf", "spark.executorEnv.KRB5_CONFIG_BASE64={}".format(KAFKA_KRB5), "--conf", "spark.mesos.driverEnv.KRB5_CONFIG_BASE64={}".format(KAFKA_KRB5), "--conf", "spark.driver.extraJavaOptions=-Djava.security.auth.login.config=" "/mnt/mesos/sandbox/spark-kafka-client-jaas.conf", "--conf", "spark.executor.extraJavaOptions=" "-Djava.security.auth.login.config=/mnt/mesos/sandbox/spark-kafka-client-jaas.conf", ] producer_config = [ "--conf", "spark.cores.max=2", "--conf", "spark.executor.cores=2", "--class", "KafkaFeeder" ] + common_args if kerberized: producer_config += kerberos_args producer_id = utils.submit_job(app_url=jar_uri, app_args=producer_args, app_name=utils.SPARK_APP_NAME, args=producer_config) shakedown.wait_for(lambda: _producer_launched(), ignore_exceptions=False, timeout_seconds=600) shakedown.wait_for(lambda: utils.is_service_ready(KAFKA_SERVICE_NAME, 1), ignore_exceptions=False, timeout_seconds=600) consumer_config = [ "--conf", "spark.cores.max=4", "--class", "KafkaConsumer" ] + common_args if kerberized: consumer_config += kerberos_args consumer_args = " ".join([broker_dns, topic, stop_count, kerberos_flag]) utils.run_tests(app_url=jar_uri, app_args=consumer_args, expected_output="Read {} words".format(stop_count), app_name=utils.SPARK_APP_NAME, args=consumer_config) utils.kill_driver(producer_id, utils.SPARK_APP_NAME)