def test_s3_env(): creds = s3.get_credentials() args = [ "--conf spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format( creds.access_key), "--conf spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format( creds.secret_key) ] args.append("--class S3Job") linecount_path = os.path.join(THIS_DIR, 'resources', 'linecount.txt') s3.upload_file(linecount_path) # download/read linecount.txt only utils.run_tests(app_url=utils.dcos_test_jar_url(), app_args="--readUrl {} --countOnly".format( s3.s3n_url('linecount.txt')), expected_output="Read 3 lines", args=args) # download/read linecount.txt, reupload as linecount-env.txt utils.run_tests(app_url=utils.dcos_test_jar_url(), app_args="--readUrl {} --writeUrl {}".format( s3.s3n_url('linecount.txt'), s3.s3n_url('linecount-env.txt')), expected_output="Read 3 lines", args=args) assert len(list(s3.list("linecount-env.txt"))) > 0
def upload_file(file_path): LOGGER.info("Uploading {} to s3://{}/{}".format( file_path, os.environ['S3_BUCKET'], os.environ['S3_PREFIX'])) spark_s3.upload_file(file_path) basename = os.path.basename(file_path) return spark_s3.http_url(basename)
def test_s3_secrets(): linecount_path = os.path.join(THIS_DIR, 'resources', 'linecount.txt') s3.upload_file(linecount_path) creds = s3.get_credentials() def make_credential_secret(path, val): sdk_security.delete_secret(path) rc, stdout, stderr = sdk_cmd.run_raw_cli( "security secrets create /{} -v {}".format(path, val)) assert rc == 0, "Failed to create secret {}, stderr: {}, stdout: {}".format( path, stderr, stdout) aws_access_key_path = "aws_access_key_id" make_credential_secret(aws_access_key_path, creds.access_key) aws_secret_key_path = "aws_secret_access_key" make_credential_secret(aws_secret_key_path, creds.secret_key) args = [ "--conf spark.mesos.containerizer=mesos", "--conf spark.mesos.driver.secret.names=/{key},/{secret}".format( key=aws_access_key_path, secret=aws_secret_key_path), "--conf spark.mesos.driver.secret.envkeys=AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY", "--class S3Job" ] try: # download/read linecount.txt only utils.run_tests(app_url=utils.dcos_test_jar_url(), app_args="--readUrl {} --countOnly".format( s3.s3n_url('linecount.txt')), expected_output="Read 3 lines", args=args) # download/read linecount.txt, reupload as linecount-secret.txt: utils.run_tests(app_url=utils.dcos_test_jar_url(), app_args="--readUrl {} --writeUrl {}".format( s3.s3n_url('linecount.txt'), s3.s3n_url('linecount-secret.txt')), expected_output="Read 3 lines", args=args) assert len(list(s3.list("linecount-secret.txt"))) > 0 finally: sdk_security.delete_secret(aws_access_key_path) sdk_security.delete_secret(aws_secret_key_path)
def test_pipeline(kerberos_flag, stop_count, jar_uri, keytab_secret, spark_app_name, jaas_uri=None): stop_count = str(stop_count) kerberized = True if kerberos_flag == "true" else False broker_dns = _kafka_broker_dns() topic = "top1" big_file, big_file_url = "file:///mnt/mesos/sandbox/big.txt", "http://norvig.com/big.txt" # arguments to the application producer_args = " ".join([broker_dns, big_file, topic, kerberos_flag]) uris = "spark.mesos.uris=http://norvig.com/big.txt" if kerberized and jaas_uri is None: jaas_path = os.path.join(THIS_DIR, "resources", "spark-kafka-client-jaas.conf") s3.upload_file(jaas_path) _uri = s3.s3_http_url("spark-kafka-client-jaas.conf") uris += ",{}".format(_uri) else: uris += ",{}".format(jaas_uri) common_args = [ "--conf", "spark.mesos.containerizer=mesos", "--conf", "spark.scheduler.maxRegisteredResourcesWaitingTime=2400s", "--conf", "spark.scheduler.minRegisteredResourcesRatio=1.0", "--conf", uris ] kerberos_args = [ "--conf", "spark.mesos.driver.secret.names={}".format(keytab_secret), "--conf", "spark.mesos.driver.secret.filenames=kafka-client.keytab", "--conf", "spark.mesos.executor.secret.names={}".format(keytab_secret), "--conf", "spark.mesos.executor.secret.filenames=kafka-client.keytab", "--conf", "spark.mesos.task.labels=DCOS_SPACE:{}".format(utils.SPARK_APP_NAME), "--conf", "spark.executorEnv.KRB5_CONFIG_BASE64={}".format(KAFKA_KRB5), "--conf", "spark.mesos.driverEnv.KRB5_CONFIG_BASE64={}".format(KAFKA_KRB5), "--conf", "spark.driver.extraJavaOptions=-Djava.security.auth.login.config=" "/mnt/mesos/sandbox/spark-kafka-client-jaas.conf", "--conf", "spark.executor.extraJavaOptions=" "-Djava.security.auth.login.config=/mnt/mesos/sandbox/spark-kafka-client-jaas.conf", ] producer_config = [ "--conf", "spark.cores.max=2", "--conf", "spark.executor.cores=2", "--class", "KafkaFeeder" ] + common_args if kerberized: producer_config += kerberos_args producer_id = utils.submit_job(app_url=jar_uri, app_args=producer_args, app_name=spark_app_name, args=producer_config) shakedown.wait_for(lambda: _producer_launched(), ignore_exceptions=False, timeout_seconds=600) shakedown.wait_for(lambda: utils.is_service_ready(KAFKA_SERVICE_NAME, 1), ignore_exceptions=False, timeout_seconds=600) consumer_config = [ "--conf", "spark.cores.max=4", "--class", "KafkaConsumer" ] + common_args if kerberized: consumer_config += kerberos_args consumer_args = " ".join([broker_dns, topic, stop_count, kerberos_flag]) utils.run_tests(app_url=jar_uri, app_args=consumer_args, expected_output="Read {} words".format(stop_count), app_name=spark_app_name, args=consumer_config) utils.kill_driver(producer_id, spark_app_name)
def upload_file(file_path): spark_s3.upload_file(file_path) return spark_s3.http_url(os.path.basename(file_path))
def upload_jaas(): jaas_path = os.path.join(THIS_DIR, "..", "resources", "spark-kafka-client-jaas.conf") s3.upload_file(jaas_path) return s3.http_url("spark-kafka-client-jaas.conf")
def test_s3(): def make_credential_secret(envvar, secret_path): rc, stdout, stderr = sdk_cmd.run_raw_cli( "security secrets create {p} -v {e}".format(p=secret_path, e=os.environ[envvar])) assert rc == 0, "Failed to create secret {secret} from envvar {envvar}, stderr: {err}, stdout: {out}".format( secret=secret_path, envvar=envvar, err=stderr, out=stdout) LOGGER.info("Creating AWS secrets") aws_access_key_secret_path = "aws_access_key_id" aws_secret_access_key_path = "aws_secret_access_key" make_credential_secret( envvar="AWS_ACCESS_KEY_ID", secret_path="/{}".format(aws_access_key_secret_path)) make_credential_secret( envvar="AWS_SECRET_ACCESS_KEY", secret_path="/{}".format(aws_secret_access_key_path)) linecount_path = os.path.join(THIS_DIR, 'resources', 'linecount.txt') s3.upload_file(linecount_path) app_args = "--readUrl {} --writeUrl {}".format(s3.s3n_url('linecount.txt'), s3.s3n_url("linecount-out")) args = [ "--conf", "spark.mesos.containerizer=mesos", "--conf", "spark.mesos.driver.secret.names=/{key},/{secret}".format( key=aws_access_key_secret_path, secret=aws_secret_access_key_path), "--conf", "spark.mesos.driver.secret.envkeys=AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY", "--class", "S3Job" ] utils.run_tests(app_url=utils._scala_test_jar_url(), app_args=app_args, expected_output="Read 3 lines", args=args) assert len(list(s3.list("linecount-out"))) > 0 app_args = "--readUrl {} --countOnly".format(s3.s3n_url('linecount.txt')) args = [ "--conf", "spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format( os.environ["AWS_ACCESS_KEY_ID"]), "--conf", "spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format( os.environ["AWS_SECRET_ACCESS_KEY"]), "--class", "S3Job" ] utils.run_tests(app_url=utils._scala_test_jar_url(), app_args=app_args, expected_output="Read 3 lines", args=args) app_args = "--countOnly --readUrl {}".format(s3.s3n_url('linecount.txt')) args = [ "--conf", "spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format( os.environ["AWS_ACCESS_KEY_ID"]), "--conf", "spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format( os.environ["AWS_SECRET_ACCESS_KEY"]), "--class", "S3Job" ] utils.run_tests(app_url=utils._scala_test_jar_url(), app_args=app_args, expected_output="Read 3 lines", args=args)