def test_s3_env(): creds = s3.get_credentials() args = [ "--conf spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format( creds.access_key), "--conf spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format( creds.secret_key) ] args.append("--class S3Job") linecount_path = os.path.join(THIS_DIR, 'resources', 'linecount.txt') s3.upload_file(linecount_path) # download/read linecount.txt only utils.run_tests(app_url=utils.dcos_test_jar_url(), app_args="--readUrl {} --countOnly".format( s3.s3n_url('linecount.txt')), expected_output="Read 3 lines", args=args) # download/read linecount.txt, reupload as linecount-env.txt utils.run_tests(app_url=utils.dcos_test_jar_url(), app_args="--readUrl {} --writeUrl {}".format( s3.s3n_url('linecount.txt'), s3.s3n_url('linecount-env.txt')), expected_output="Read 3 lines", args=args) assert len(list(s3.list("linecount-env.txt"))) > 0
def test_driver_metrics(use_overlay): @retrying.retry(wait_fixed=5000, stop_max_delay=600 * 1000, retry_on_result=lambda res: not res) def wait_for_metric(task_id, expected_metric_name): stdout = sdk_cmd.run_cli("task metrics details {}".format(task_id)) result = expected_metric_name in stdout log.info('Checking for {} in STDOUT:\n{}\nResult: {}'.format( expected_metric_name, stdout, result)) return result app_name = "MockTaskRunner" submit_args = [ "--conf spark.cores.max=1", "--conf spark.mesos.containerizer=mesos", "--class {}".format(app_name) ] if use_overlay: submit_args = submit_args + [ "--conf spark.mesos.network.name=dcos", "--conf spark.mesos.driverEnv.VIRTUAL_NETWORK_ENABLED=true", "--conf spark.executorEnv.VIRTUAL_NETWORK_ENABLED=true" ] expected_metric = "jvm.heap.used" driver_id = utils.submit_job(app_url=utils.dcos_test_jar_url(), app_args="1 300", args=submit_args) wait_for_metric(driver_id, expected_metric) sdk_tasks.check_running(app_name, 1, timeout_seconds=600) executor_id = shakedown.get_service_task_ids(app_name)[0] wait_for_metric(executor_id, expected_metric)
def test_packages_flag(): utils.run_tests( app_url=utils.dcos_test_jar_url(), app_args="20", expected_output="210", args=["--packages com.google.guava:guava:23.0", "--class ProvidedPackages"])
def test_spark_and_kafka(): kerberos_flag = "true" if KERBERIZED_KAFKA else "false" # flag for using kerberized kafka given to app stop_count = "48" # some reasonable number test_pipeline(kerberos_flag=kerberos_flag, jar_uri=utils.dcos_test_jar_url(), keytab_secret="__dcos_base64___keytab", stop_count=stop_count, spark_service_name=utils.SPARK_SERVICE_NAME)
def test_multi_arg_confs(service_name=utils.SPARK_SERVICE_NAME): utils.run_tests( app_url=utils.dcos_test_jar_url(), app_args="", expected_output="spark.driver.extraJavaOptions,-XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Dparam3=\"valA valB\"", service_name=service_name, args=["--conf spark.driver.extraJavaOptions='-XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Dparam3=\\\"valA valB\\\"'", "--class MultiConfs"])
def test_s3_secrets(): linecount_path = os.path.join(THIS_DIR, 'resources', 'linecount.txt') s3.upload_file(linecount_path) creds = s3.get_credentials() def make_credential_secret(path, val): sdk_security.delete_secret(path) rc, stdout, stderr = sdk_cmd.run_raw_cli( "security secrets create /{} -v {}".format(path, val)) assert rc == 0, "Failed to create secret {}, stderr: {}, stdout: {}".format( path, stderr, stdout) aws_access_key_path = "aws_access_key_id" make_credential_secret(aws_access_key_path, creds.access_key) aws_secret_key_path = "aws_secret_access_key" make_credential_secret(aws_secret_key_path, creds.secret_key) args = [ "--conf spark.mesos.containerizer=mesos", "--conf spark.mesos.driver.secret.names=/{key},/{secret}".format( key=aws_access_key_path, secret=aws_secret_key_path), "--conf spark.mesos.driver.secret.envkeys=AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY", "--class S3Job" ] try: # download/read linecount.txt only utils.run_tests(app_url=utils.dcos_test_jar_url(), app_args="--readUrl {} --countOnly".format( s3.s3n_url('linecount.txt')), expected_output="Read 3 lines", args=args) # download/read linecount.txt, reupload as linecount-secret.txt: utils.run_tests(app_url=utils.dcos_test_jar_url(), app_args="--readUrl {} --writeUrl {}".format( s3.s3n_url('linecount.txt'), s3.s3n_url('linecount-secret.txt')), expected_output="Read 3 lines", args=args) assert len(list(s3.list("linecount-secret.txt"))) > 0 finally: sdk_security.delete_secret(aws_access_key_path) sdk_security.delete_secret(aws_secret_key_path)
def test_jars_flag(service_name=utils.SPARK_SERVICE_NAME): uploadedJarUrl = utils.dcos_test_jar_url() jarName = uploadedJarUrl.split("/")[-1] # dcos-spark-scala-assembly-XX-SNAPSHOT.jar utils.run_tests( app_url=utils.SPARK_EXAMPLES, # submit an app that does not include class 'MultiConfs' app_args="", expected_output="spark.driver.extraClassPath,/mnt/mesos/sandbox/{}".format(jarName), service_name=service_name, args=["--jars {}".format(uploadedJarUrl), "--class MultiConfs"])
def test_value_secret(): secret_value = "secret-value" utils.run_tests( app_url=utils.dcos_test_jar_url(), app_args=auth_token, expected_output=secret_value, args=[ "--conf=spark.mesos.driver.secret.values={}".format(secret_value), "--conf=spark.mesos.driver.secret.envkeys=SECRET_ENV_KEY", "--class SecretConfs" ])
def test_supervise_conflict_frameworkid(): job_service_name = "MockTaskRunner" @retrying.retry(wait_fixed=1000, stop_max_delay=600 * 1000, retry_on_result=lambda res: not res) def wait_job_present(present): svc = shakedown.get_service(job_service_name) if present: return svc is not None else: return svc is None job_args = [ "--supervise", "--class", "MockTaskRunner", "--conf", "spark.cores.max=1", "--conf", "spark.executors.cores=1" ] try: driver_id = utils.submit_job(app_url=utils.dcos_test_jar_url(), app_args="1 1800", service_name=utils.SPARK_SERVICE_NAME, args=job_args) log.info("Started supervised driver {}".format(driver_id)) wait_job_present(True) log.info("Job has registered") sdk_tasks.check_running(job_service_name, 1) log.info("Job has running executors") service_info = shakedown.get_service(job_service_name).dict() driver_regex = "spark.mesos.driver.frameworkId={}".format( service_info['id']) kill_status = sdk_cmd.kill_task_with_pattern(driver_regex, service_info['hostname']) wait_job_present(False) wait_job_present(True) log.info("Job has re-registered") sdk_tasks.check_running(job_service_name, 1) log.info("Job has re-started") restarted_service_info = shakedown.get_service(job_service_name).dict() assert service_info['id'] != restarted_service_info[ 'id'], "Job has restarted with same framework Id" finally: kill_info = utils.kill_driver(driver_id, utils.SPARK_SERVICE_NAME) log.info("{}".format(kill_info)) assert json.loads(kill_info)["success"], "Failed to kill spark job" wait_job_present(False)
def _launch_test_task(app_name): log.info('Submitting a Spark Applications with 1 executor') driver_task_id = utils.submit_job(app_url=utils.dcos_test_jar_url(), app_args="1 5", args=["--conf spark.cores.max=1", "--conf spark.executor.cores=1", "--conf spark.mesos.containerizer=mesos", "--conf spark.mesos.rejectOfferDuration=1s", f"--conf spark.mesos.executor.docker.image={utils.SPARK_DOCKER_IMAGE}", f"--class {app_name}" ]) sdk_tasks.check_running(app_name, 1, timeout_seconds=300) return driver_task_id
def _submit_job_and_get_tasks(extra_args=[]): submit_args = [ "--conf spark.driver.cores={}".format(driver_cpus), "--conf spark.cores.max={}".format(executor_cpus), "--conf spark.executor.cores={}".format(executor_cpus), "--class {}".format(app_name) ] + extra_args driver_task_id = utils.submit_job(app_url=utils.dcos_test_jar_url(), app_args="1 600", args=submit_args) sdk_tasks.check_running(app_name, 1, timeout_seconds=300) driver_task = shakedown.get_task(driver_task_id, completed=False) executor_task = shakedown.get_service_tasks(app_name)[0] return (driver_task_id, driver_task, executor_task)
def test_env_based_ref_secret(): secret_path = "/spark/secret-name" secret_value = "secret-value" dcos_utils.delete_secret(secret_path) dcos_utils.create_secret(secret_path, secret_value, False) try: utils.run_tests( app_url=utils.dcos_test_jar_url(), app_args=auth_token, expected_output=secret_value, args=[ "--conf=spark.mesos.driver.secret.names={}".format( secret_path), "--conf=spark.mesos.driver.secret.envkeys=SECRET_ENV_KEY", "--class SecretConfs" ]) finally: dcos_utils.delete_secret(secret_path)
def _verify_submission_rejected(service_name, driver_role=None): app_name = "MockTaskRunner" submit_args = ["--conf spark.cores.max=1", "--class {}".format(app_name)] submission_id = None error = None try: submission_id = utils.submit_job(service_name=service_name, app_url=utils.dcos_test_jar_url(), driver_role=driver_role, app_args="1 300", args=submit_args) except Exception as err: error = err finally: if submission_id: utils.kill_driver(submission_id, service_name=service_name) assert error is not None
def _submit_shuffle_job(sleep=0, extra_args=[], use_cli=True): num_unique_keys = SHUFFLE_JOB_EXPECTED_GROUPS_COUNT num_mappers = 4 value_size_bytes = 100 num_reducers = 4 # Usage: ShuffleApp [numMappers] [numPairs] [valueSize] [numReducers] [sleepBeforeShutdown] return utils.submit_job( app_url=utils.dcos_test_jar_url(), use_cli=use_cli, app_args="{} {} {} {} {}".format(num_mappers, num_unique_keys, value_size_bytes, num_reducers, sleep), args=[ "--conf spark.executor.cores=1", "--conf spark.cores.max={}".format(SHUFFLE_JOB_NUM_EXECUTORS), "--conf spark.scheduler.minRegisteredResourcesRatio=1", "--conf spark.scheduler.maxRegisteredResourcesWaitingTime=3m", "--class ShuffleApp" ] + extra_args)
def _submit_job_and_verify_role(service_name, expected_role, driver_role=None): app_name = "MockTaskRunner" submit_args = ["--conf spark.cores.max=1", "--class {}".format(app_name)] submission_id = utils.submit_job(service_name=service_name, app_url=utils.dcos_test_jar_url(), app_args="1 300", driver_role=driver_role, args=submit_args) try: sdk_tasks.check_running(app_name, 1, timeout_seconds=300) driver_framework = dcos_utils.get_framework_json(app_name, completed=False) log.info("Driver framework:\n{}".format(driver_framework)) assert expected_role == driver_framework["role"], \ "Expected role '{}' but got '{}'".format(expected_role, driver_framework["role"]) except Exception: log.info(f"Cleaning up. Attempting to kill driver: {submission_id}") utils.kill_driver(submission_id, service_name=service_name)
def _submit_job_and_verify_users(user, use_ucr_for_spark_submit, extra_args=[]): app_name = "MockTaskRunner" submit_args = ["--conf spark.cores.max=1", "--class {}".format(app_name)] + extra_args driver_task_id = utils.submit_job(service_name=SERVICE_NAME, app_url=utils.dcos_test_jar_url(), app_args="1 300", args=submit_args) try: sdk_tasks.check_running(app_name, 1, timeout_seconds=300) driver_task = shakedown.get_task(driver_task_id, completed=False) executor_tasks = shakedown.get_service_tasks(app_name) for task in [driver_task] + executor_tasks: log.info(f"Checking task '{task['id']}'") _check_task_user(task, user, use_ucr_for_spark_submit) finally: log.info(f"Cleaning up. Attempting to kill driver: {driver_task_id}") utils.kill_driver(driver_task_id, service_name=SERVICE_NAME)
def test_file_based_ref_secret(): secret_path = "/spark/secret-name" secret_file_name = "secret.file" secret_value = "secret-value" with open(secret_file_name, 'w') as secret_file: secret_file.write(secret_value) dcos_utils.delete_secret(secret_path) dcos_utils.create_secret(secret_path, secret_file_name, True) try: utils.run_tests( app_url=utils.dcos_test_jar_url(), app_args=auth_token, expected_output=secret_value, args=[ "--conf=spark.mesos.driver.secret.names={}".format( secret_path), "--conf=spark.mesos.driver.secret.filenames={}".format( secret_file_name), "--class SecretConfs" ]) finally: dcos_utils.delete_secret(secret_path) if os.path.exists(secret_file_name): os.remove(secret_file_name)