Python run_testsの例、spark_utils.run_tests Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_spark.py プロジェクト: super-collider/spark-build

def test_driver_executor_tls():
    '''
    Put keystore and truststore as secrets in DC/OS secret store.
    Run SparkPi job with TLS enabled, referencing those secrets.
    Make sure other secrets still show up.
    '''
    python_script_path = os.path.join(THIS_DIR, 'jobs', 'python',
                                      'pi_with_secret.py')
    python_script_url = utils.upload_file(python_script_path)
    resources_folder = os.path.join(
        os.path.dirname(os.path.realpath(__file__)), 'resources')
    keystore_file = 'server.jks'
    truststore_file = 'trust.jks'
    keystore_path = os.path.join(resources_folder,
                                 '{}.base64'.format(keystore_file))
    truststore_path = os.path.join(resources_folder,
                                   '{}.base64'.format(truststore_file))
    keystore_secret = '__dcos_base64__keystore'
    truststore_secret = '__dcos_base64__truststore'
    my_secret = 'mysecret'
    my_secret_content = 'secretcontent'
    shakedown.run_dcos_command(
        'security secrets create /{} --value-file {}'.format(
            keystore_secret, keystore_path))
    shakedown.run_dcos_command(
        'security secrets create /{} --value-file {}'.format(
            truststore_secret, truststore_path))
    shakedown.run_dcos_command('security secrets create /{} --value {}'.format(
        my_secret, my_secret_content))
    password = '******'
    try:
        utils.run_tests(
            app_url=python_script_url,
            app_args="30 {} {}".format(my_secret, my_secret_content),
            expected_output="Pi is roughly 3",
            args=[
                "--keystore-secret-path",
                keystore_secret,
                "--truststore-secret-path",
                truststore_secret,
                "--private-key-password",
                format(password),
                "--keystore-password",
                format(password),
                "--truststore-password",
                format(password),
                "--conf",
                "spark.mesos.driver.secret.names={}".format(my_secret),
                "--conf",
                "spark.mesos.driver.secret.filenames={}".format(my_secret),
                "--conf",
                "spark.mesos.driver.secret.envkeys={}".format(my_secret),
            ])
    finally:
        shakedown.run_dcos_command(
            'security secrets delete /{}'.format(keystore_secret))
        shakedown.run_dcos_command(
            'security secrets delete /{}'.format(truststore_secret))
        shakedown.run_dcos_command(
            'security secrets delete /{}'.format(my_secret))

コード例 #2

0

ファイルを表示

ファイル: test_hdfs.py プロジェクト: super-collider/spark-build

def _run_terasort_job(terasort_class, app_args, expected_output):
    jar_url = 'https://downloads.mesosphere.io/spark/examples/spark-terasort-1.1-jar-with-dependencies_2.11.jar'
    submit_args = ["--class", terasort_class] + KERBEROS_ARGS
    utils.run_tests(app_url=jar_url,
                    app_args=" ".join(app_args),
                    expected_output=expected_output,
                    args=submit_args)

コード例 #3

0

ファイルを表示

def _test_spark_docker_image(dist):
    utils.require_spark(
        additional_options={'service': {
            'docker-image': dist['image']
        }})
    example_jar_url = EXAMPLES_JAR_PATH_TEMPLATE.format(dist['scala_version'])

    expected_groups_count = 12000
    num_mappers = 4
    value_size_bytes = 100
    num_reducers = 4

    utils.run_tests(
        app_url=example_jar_url,
        app_args=
        f"{num_mappers} {expected_groups_count} {value_size_bytes} {num_reducers}",
        expected_output=str(expected_groups_count),
        args=[
            "--class org.apache.spark.examples.GroupByTest",
            "--conf spark.executor.cores=1", "--conf spark.cores.max=4",
            "--conf spark.scheduler.minRegisteredResourcesRatio=1",
            "--conf spark.scheduler.maxRegisteredResourcesWaitingTime=3m"
        ])

    utils.teardown_spark()

コード例 #4

0

ファイルを表示

def _run_terasort():
    jar_url = TERASORT_JAR
    utils.run_tests(app_url=jar_url,
                    app_args="hdfs:///terasort_in hdfs:///terasort_out",
                    expected_output="",
                    app_name=SOAK_SPARK_APP_NAME,
                    args=(["--class", "com.github.ehiggs.spark.terasort.TeraSort"] + COMMON_ARGS))

コード例 #5

0

ファイルを表示

def _run_teravalidate():
    jar_url = TERASORT_JAR
    utils.run_tests(app_url=jar_url,
                    app_args="hdfs:///terasort_out hdfs:///terasort_validate",
                    expected_output="partitions are properly sorted",
                    app_name=SOAK_SPARK_APP_NAME,
                    args=(["--class", "com.github.ehiggs.spark.terasort.TeraValidate"] + COMMON_ARGS))

コード例 #6

0

ファイルを表示

def test_sparkPi(service_name=utils.SPARK_SERVICE_NAME):
    utils.run_tests(
        app_url=utils.SPARK_EXAMPLES,
        app_args="100",
        expected_output="Pi is roughly 3",
        service_name=service_name,
        args=["--class org.apache.spark.examples.SparkPi"])

コード例 #7

0

ファイルを表示

ファイル: test_spark_distributions.py プロジェクト: jpg-datarobot/spark-build

def _test_spark_docker_image(docker_image):
    utils.upload_dcos_test_jar()
    utils.require_spark(
        additional_options={'service': {
            'docker-image': docker_image
        }})

    expected_groups_count = 12000
    num_mappers = 4
    value_size_bytes = 100
    num_reducers = 4
    sleep = 500

    python_script_path = os.path.join(THIS_DIR, 'jobs', 'python',
                                      'shuffle_app.py')
    python_script_url = utils.upload_file(python_script_path)
    utils.run_tests(
        app_url=python_script_url,
        app_args="{} {} {} {} {}".format(num_mappers, expected_groups_count,
                                         value_size_bytes, num_reducers,
                                         sleep),
        expected_output="Groups count: {}".format(expected_groups_count),
        args=[
            "--conf spark.executor.cores=1", "--conf spark.cores.max=4",
            "--conf spark.scheduler.minRegisteredResourcesRatio=1",
            "--conf spark.scheduler.maxRegisteredResourcesWaitingTime=3m"
        ])

    utils.teardown_spark()

コード例 #8

0

ファイルを表示

def test_packages_flag():
    utils.run_tests(
        app_url=utils.dcos_test_jar_url(),
        app_args="20",
        expected_output="210",
        args=["--packages com.google.guava:guava:23.0",
              "--class ProvidedPackages"])

コード例 #9

0

ファイルを表示

ファイル: test_spark.py プロジェクト: kndarp/spark-build

def test_s3_env():
    creds = s3.get_credentials()
    args = [
        "--conf spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format(
            creds.access_key),
        "--conf spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format(
            creds.secret_key)
    ]
    args.append("--class S3Job")

    linecount_path = os.path.join(THIS_DIR, 'resources', 'linecount.txt')
    s3.upload_file(linecount_path)

    # download/read linecount.txt only
    utils.run_tests(app_url=utils.dcos_test_jar_url(),
                    app_args="--readUrl {} --countOnly".format(
                        s3.s3n_url('linecount.txt')),
                    expected_output="Read 3 lines",
                    args=args)

    # download/read linecount.txt, reupload as linecount-env.txt
    utils.run_tests(app_url=utils.dcos_test_jar_url(),
                    app_args="--readUrl {} --writeUrl {}".format(
                        s3.s3n_url('linecount.txt'),
                        s3.s3n_url('linecount-env.txt')),
                    expected_output="Read 3 lines",
                    args=args)

    assert len(list(s3.list("linecount-env.txt"))) > 0

コード例 #10

0

ファイルを表示

ファイル: test_foldered_spark.py プロジェクト: mediapills/spark-build

def test_foldered_spark(setup_spark):
    utils.run_tests(
        app_url=utils.SPARK_EXAMPLES,
        app_args="100",
        expected_output="Pi is roughly 3",
        service_name=service_name,
        driver_role=driver_role,
        args=["--class org.apache.spark.examples.SparkPi"])

コード例 #11

0

ファイルを表示

ファイル: test_spark.py プロジェクト: kndarp/spark-build

def test_cni():
    utils.run_tests(app_url=utils.SPARK_EXAMPLES,
                    app_args="",
                    expected_output="Pi is roughly 3",
                    args=[
                        "--conf spark.mesos.network.name=dcos",
                        "--class org.apache.spark.examples.SparkPi"
                    ])

コード例 #12

0

ファイルを表示

def test_multi_arg_confs(service_name=utils.SPARK_SERVICE_NAME):
    utils.run_tests(
        app_url=utils.dcos_test_jar_url(),
        app_args="",
        expected_output="spark.driver.extraJavaOptions,-XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Dparam3=\"valA valB\"",
        service_name=service_name,
        args=["--conf spark.driver.extraJavaOptions='-XX:+PrintGCDetails -XX:+PrintGCTimeStamps -Dparam3=\\\"valA valB\\\"'",
              "--class MultiConfs"])

コード例 #13

0

ファイルを表示

def _run_teragen():
    jar_url = TERASORT_JAR
    input_size = os.getenv('TERASORT_INPUT_SIZE', '1g')
    utils.run_tests(app_url=jar_url,
                    app_args="{} hdfs:///terasort_in".format(input_size),
                    expected_output="Number of records written",
                    app_name=SOAK_SPARK_APP_NAME,
                    args=(["--class", "com.github.ehiggs.spark.terasort.TeraGen"] + COMMON_ARGS))

コード例 #14

0

ファイルを表示

ファイル: test_spark.py プロジェクト: kndarp/spark-build

def test_cli_multiple_spaces():
    utils.run_tests(app_url=utils.SPARK_EXAMPLES,
                    app_args="30",
                    expected_output="Pi is roughly 3",
                    args=[
                        "--conf spark.cores.max=2",
                        "--class org.apache.spark.examples.SparkPi"
                    ])

コード例 #15

0

ファイルを表示

def test_python():
    python_script_path = os.path.join(THIS_DIR, 'jobs', 'python', 'pi_with_include.py')
    python_script_url = utils.upload_file(python_script_path)
    py_file_path = os.path.join(THIS_DIR, 'jobs', 'python', 'PySparkTestInclude.py')
    py_file_url = utils.upload_file(py_file_path)
    utils.run_tests(app_url=python_script_url,
                    app_args="30",
                    expected_output="Pi is roughly 3",
                    args=["--py-files {}".format(py_file_url)])

コード例 #16

0

ファイルを表示

def test_pipeline(kerberos_flag, stop_count, jar_uri, keytab_secret, spark_service_name, jaas_uri=None):
    stop_count = str(stop_count)
    kerberized = True if kerberos_flag == "true" else False
    broker_dns = sdk_cmd.svc_cli(KAFKA_PACKAGE_NAME, KAFKA_SERVICE_NAME, 'endpoints broker', json=True)['dns'][0]
    topic = "top1"

    big_file, big_file_url = "file:///mnt/mesos/sandbox/big.txt", "http://norvig.com/big.txt"

    # arguments to the application
    producer_args = " ".join([broker_dns, big_file, topic, kerberos_flag])

    uris = "spark.mesos.uris={}".format(big_file_url)

    if kerberized and jaas_uri is None:
        _uri = upload_jaas()
        uris += ",{}".format(_uri)
    else:
        uris += ",{}".format(jaas_uri)

    common_args = [
        "--conf", "spark.mesos.containerizer=mesos",
        "--conf", "spark.scheduler.maxRegisteredResourcesWaitingTime=2400s",
        "--conf", "spark.scheduler.minRegisteredResourcesRatio=1.0",
        "--conf", uris
    ]

    kerberos_args = get_kerberized_kafka_spark_conf(spark_service_name, keytab_secret)

    producer_config = ["--conf", "spark.cores.max=2", "--conf", "spark.executor.cores=1",
                       "--class", "KafkaFeeder"] + common_args

    if kerberized:
        producer_config += kerberos_args

    producer_id = utils.submit_job(app_url=jar_uri,
                                   app_args=producer_args,
                                   service_name=spark_service_name,
                                   args=producer_config)

    sdk_tasks.check_running(KAFKA_SERVICE_NAME, 1, timeout_seconds=600)

    consumer_config = ["--conf", "spark.cores.max=2", "--conf", "spark.executor.cores=1",
                       "--class", "KafkaConsumer"] + common_args

    if kerberized:
        consumer_config += kerberos_args

    consumer_args = " ".join([broker_dns, topic, stop_count, kerberos_flag])

    try:
        utils.run_tests(app_url=jar_uri,
                    app_args=consumer_args,
                    expected_output="Read {} words".format(stop_count),
                    service_name=spark_service_name,
                    args=consumer_config)
    finally:
        utils.kill_driver(producer_id, spark_service_name)

コード例 #17

0

ファイルを表示

def test_jars_flag(service_name=utils.SPARK_SERVICE_NAME):
    uploadedJarUrl = utils.dcos_test_jar_url()
    jarName = uploadedJarUrl.split("/")[-1] # dcos-spark-scala-assembly-XX-SNAPSHOT.jar
    utils.run_tests(
        app_url=utils.SPARK_EXAMPLES, # submit an app that does not include class 'MultiConfs'
        app_args="",
        expected_output="spark.driver.extraClassPath,/mnt/mesos/sandbox/{}".format(jarName),
        service_name=service_name,
        args=["--jars {}".format(uploadedJarUrl),
              "--class MultiConfs"])

コード例 #18

0

ファイルを表示

def test_jar(service_name=utils.SPARK_SERVICE_NAME):
    master_url = ("https" if sdk_utils.is_strict_mode() else "http") + "://leader.mesos:5050"
    spark_job_runner_args = '{} dcos \\"*\\" spark:only 2 --auth-token={}'.format(
        master_url,
        shakedown.dcos_acs_token())
    utils.run_tests(app_url=utils.upload_mesos_test_jar(),
                    app_args=spark_job_runner_args,
                    expected_output="All tests passed",
                    service_name=service_name,
                    args=['--class com.typesafe.spark.test.mesos.framework.runners.SparkJobRunner'])

コード例 #19

0

ファイルを表示

def test_history(kerberized_spark, hdfs_with_kerberos, setup_history_server):
    job_args = [
        "--class", "org.apache.spark.examples.SparkPi", "--conf",
        "spark.eventLog.enabled=true", "--conf",
        "spark.eventLog.dir=hdfs://hdfs{}".format(HDFS_HISTORY_DIR)
    ]
    utils.run_tests(app_url=utils.SPARK_EXAMPLES,
                    app_args="100",
                    expected_output="Pi is roughly 3",
                    service_name="spark",
                    args=(job_args + SPARK_SUBMIT_HDFS_KERBEROS_ARGS))

コード例 #20

0

ファイルを表示

ファイル: test_hdfs.py プロジェクト: super-collider/spark-build

def test_history():
    job_args = [
        "--class", "org.apache.spark.examples.SparkPi", "--conf",
        "spark.eventLog.enabled=true", "--conf",
        "spark.eventLog.dir=hdfs://hdfs/history"
    ]
    utils.run_tests(app_url=utils.SPARK_EXAMPLES,
                    app_args="100",
                    expected_output="Pi is roughly 3",
                    app_name="/spark",
                    args=(job_args + KERBEROS_ARGS))

コード例 #21

0

ファイルを表示

ファイル: test_spark_secrets.py プロジェクト: stuartpa/spark-build

def test_value_secret():
    secret_value = "secret-value"
    utils.run_tests(
        app_url=utils.dcos_test_jar_url(),
        app_args=auth_token,
        expected_output=secret_value,
        args=[
            "--conf=spark.mesos.driver.secret.values={}".format(secret_value),
            "--conf=spark.mesos.driver.secret.envkeys=SECRET_ENV_KEY",
            "--class SecretConfs"
        ])

コード例 #22

0

ファイルを表示

def test_rpc_auth():
    secret_name = "sparkauth"

    sdk_security.delete_secret(secret_name)
    rc, _, _ = sdk_cmd.run_raw_cli("{} --verbose secret /{}".format(utils.SPARK_PACKAGE_NAME, secret_name))
    assert rc == 0, "Failed to generate Spark auth secret"

    utils.run_tests(
        app_url=utils.SPARK_EXAMPLES,
        app_args="100",
        expected_output="Pi is roughly 3",
        service_name=utils.SPARK_SERVICE_NAME,
        args=["--executor-auth-secret {}".format(secret_name),
              "--class org.apache.spark.examples.SparkPi"])

コード例 #23

0

ファイルを表示

ファイル: test_spark.py プロジェクト: super-collider/spark-build

def test_jar(app_name=utils.SPARK_APP_NAME):
    master_url = ("https"
                  if utils.is_strict() else "http") + "://leader.mesos:5050"
    spark_job_runner_args = '{} dcos \\"*\\" spark:only 2 --auth-token={}'.format(
        master_url, shakedown.dcos_acs_token())
    jar_url = utils.upload_file(os.getenv('TEST_JAR_PATH'))
    utils.run_tests(
        app_url=jar_url,
        app_args=spark_job_runner_args,
        expected_output="All tests passed",
        app_name=app_name,
        args=[
            "--class",
            'com.typesafe.spark.test.mesos.framework.runners.SparkJobRunner'
        ])

コード例 #24

0

ファイルを表示

ファイル: test_spark_secrets.py プロジェクト: stuartpa/spark-build

def test_env_based_ref_secret():
    secret_path = "/spark/secret-name"
    secret_value = "secret-value"
    dcos_utils.delete_secret(secret_path)
    dcos_utils.create_secret(secret_path, secret_value, False)
    try:
        utils.run_tests(
            app_url=utils.dcos_test_jar_url(),
            app_args=auth_token,
            expected_output=secret_value,
            args=[
                "--conf=spark.mesos.driver.secret.names={}".format(
                    secret_path),
                "--conf=spark.mesos.driver.secret.envkeys=SECRET_ENV_KEY",
                "--class SecretConfs"
            ])
    finally:
        dcos_utils.delete_secret(secret_path)

コード例 #25

0

ファイルを表示

ファイル: test_spark.py プロジェクト: super-collider/spark-build

def test_rpc_auth():
    secret_name = "sparkauth"

    rc, stdout, stderr = sdk_cmd.run_raw_cli("{pkg} secret /{secret}".format(
        pkg=utils.SPARK_PACKAGE_NAME, secret=secret_name))
    assert rc == 0, "Failed to generate Spark auth secret, stderr {err} stdout {out}".format(
        err=stderr, out=stdout)

    args = [
        "--executor-auth-secret", secret_name, "--class",
        "org.apache.spark.examples.SparkPi"
    ]

    utils.run_tests(app_url=utils.SPARK_EXAMPLES,
                    app_args="100",
                    expected_output="Pi is roughly 3",
                    app_name="/spark",
                    args=args)

コード例 #26

0

ファイルを表示

ファイル: test_spark.py プロジェクト: kndarp/spark-build

def test_s3_secrets():
    linecount_path = os.path.join(THIS_DIR, 'resources', 'linecount.txt')
    s3.upload_file(linecount_path)

    creds = s3.get_credentials()

    def make_credential_secret(path, val):
        sdk_security.delete_secret(path)
        rc, stdout, stderr = sdk_cmd.run_raw_cli(
            "security secrets create /{} -v {}".format(path, val))
        assert rc == 0, "Failed to create secret {}, stderr: {}, stdout: {}".format(
            path, stderr, stdout)

    aws_access_key_path = "aws_access_key_id"
    make_credential_secret(aws_access_key_path, creds.access_key)
    aws_secret_key_path = "aws_secret_access_key"
    make_credential_secret(aws_secret_key_path, creds.secret_key)

    args = [
        "--conf spark.mesos.containerizer=mesos",
        "--conf spark.mesos.driver.secret.names=/{key},/{secret}".format(
            key=aws_access_key_path, secret=aws_secret_key_path),
        "--conf spark.mesos.driver.secret.envkeys=AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY",
        "--class S3Job"
    ]

    try:
        # download/read linecount.txt only
        utils.run_tests(app_url=utils.dcos_test_jar_url(),
                        app_args="--readUrl {} --countOnly".format(
                            s3.s3n_url('linecount.txt')),
                        expected_output="Read 3 lines",
                        args=args)
        # download/read linecount.txt, reupload as linecount-secret.txt:
        utils.run_tests(app_url=utils.dcos_test_jar_url(),
                        app_args="--readUrl {} --writeUrl {}".format(
                            s3.s3n_url('linecount.txt'),
                            s3.s3n_url('linecount-secret.txt')),
                        expected_output="Read 3 lines",
                        args=args)
        assert len(list(s3.list("linecount-secret.txt"))) > 0
    finally:
        sdk_security.delete_secret(aws_access_key_path)
        sdk_security.delete_secret(aws_secret_key_path)

コード例 #27

0

ファイルを表示

ファイル: test_spark_secrets.py プロジェクト: stuartpa/spark-build

def test_file_based_ref_secret():
    secret_path = "/spark/secret-name"
    secret_file_name = "secret.file"
    secret_value = "secret-value"
    with open(secret_file_name, 'w') as secret_file:
        secret_file.write(secret_value)
    dcos_utils.delete_secret(secret_path)
    dcos_utils.create_secret(secret_path, secret_file_name, True)
    try:
        utils.run_tests(
            app_url=utils.dcos_test_jar_url(),
            app_args=auth_token,
            expected_output=secret_value,
            args=[
                "--conf=spark.mesos.driver.secret.names={}".format(
                    secret_path),
                "--conf=spark.mesos.driver.secret.filenames={}".format(
                    secret_file_name), "--class SecretConfs"
            ])
    finally:
        dcos_utils.delete_secret(secret_path)
        if os.path.exists(secret_file_name):
            os.remove(secret_file_name)

コード例 #28

0

ファイルを表示

ファイル: test_spark.py プロジェクト: super-collider/spark-build

def test_s3():
    def make_credential_secret(envvar, secret_path):
        rc, stdout, stderr = sdk_cmd.run_raw_cli(
            "security secrets create {p} -v {e}".format(p=secret_path,
                                                        e=os.environ[envvar]))
        assert rc == 0, "Failed to create secret {secret} from envvar {envvar}, stderr: {err}, stdout: {out}".format(
            secret=secret_path, envvar=envvar, err=stderr, out=stdout)

    LOGGER.info("Creating AWS secrets")

    aws_access_key_secret_path = "aws_access_key_id"
    aws_secret_access_key_path = "aws_secret_access_key"

    make_credential_secret(
        envvar="AWS_ACCESS_KEY_ID",
        secret_path="/{}".format(aws_access_key_secret_path))
    make_credential_secret(
        envvar="AWS_SECRET_ACCESS_KEY",
        secret_path="/{}".format(aws_secret_access_key_path))

    linecount_path = os.path.join(THIS_DIR, 'resources', 'linecount.txt')
    s3.upload_file(linecount_path)

    app_args = "--readUrl {} --writeUrl {}".format(s3.s3n_url('linecount.txt'),
                                                   s3.s3n_url("linecount-out"))

    args = [
        "--conf", "spark.mesos.containerizer=mesos", "--conf",
        "spark.mesos.driver.secret.names=/{key},/{secret}".format(
            key=aws_access_key_secret_path,
            secret=aws_secret_access_key_path), "--conf",
        "spark.mesos.driver.secret.envkeys=AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY",
        "--class", "S3Job"
    ]
    utils.run_tests(app_url=utils._scala_test_jar_url(),
                    app_args=app_args,
                    expected_output="Read 3 lines",
                    args=args)

    assert len(list(s3.list("linecount-out"))) > 0

    app_args = "--readUrl {} --countOnly".format(s3.s3n_url('linecount.txt'))

    args = [
        "--conf", "spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format(
            os.environ["AWS_ACCESS_KEY_ID"]), "--conf",
        "spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format(
            os.environ["AWS_SECRET_ACCESS_KEY"]), "--class", "S3Job"
    ]
    utils.run_tests(app_url=utils._scala_test_jar_url(),
                    app_args=app_args,
                    expected_output="Read 3 lines",
                    args=args)

    app_args = "--countOnly --readUrl {}".format(s3.s3n_url('linecount.txt'))

    args = [
        "--conf", "spark.mesos.driverEnv.AWS_ACCESS_KEY_ID={}".format(
            os.environ["AWS_ACCESS_KEY_ID"]), "--conf",
        "spark.mesos.driverEnv.AWS_SECRET_ACCESS_KEY={}".format(
            os.environ["AWS_SECRET_ACCESS_KEY"]), "--class", "S3Job"
    ]
    utils.run_tests(app_url=utils._scala_test_jar_url(),
                    app_args=app_args,
                    expected_output="Read 3 lines",
                    args=args)

コード例 #29

0

ファイルを表示

ファイル: test_spark.py プロジェクト: kndarp/spark-build

def test_r():
    r_script_path = os.path.join(THIS_DIR, 'jobs', 'R', 'dataframe.R')
    r_script_url = utils.upload_file(r_script_path)
    utils.run_tests(app_url=r_script_url,
                    app_args='',
                    expected_output="Justin")

コード例 #30

0

ファイルを表示

def test_pipeline(kerberos_flag,
                  stop_count,
                  jar_uri,
                  keytab_secret,
                  spark_app_name,
                  jaas_uri=None):
    stop_count = str(stop_count)
    kerberized = True if kerberos_flag == "true" else False
    broker_dns = _kafka_broker_dns()
    topic = "top1"

    big_file, big_file_url = "file:///mnt/mesos/sandbox/big.txt", "http://norvig.com/big.txt"

    # arguments to the application
    producer_args = " ".join([broker_dns, big_file, topic, kerberos_flag])

    uris = "spark.mesos.uris=http://norvig.com/big.txt"

    if kerberized and jaas_uri is None:
        jaas_path = os.path.join(THIS_DIR, "resources",
                                 "spark-kafka-client-jaas.conf")
        s3.upload_file(jaas_path)
        _uri = s3.s3_http_url("spark-kafka-client-jaas.conf")
        uris += ",{}".format(_uri)
    else:
        uris += ",{}".format(jaas_uri)

    common_args = [
        "--conf", "spark.mesos.containerizer=mesos", "--conf",
        "spark.scheduler.maxRegisteredResourcesWaitingTime=2400s", "--conf",
        "spark.scheduler.minRegisteredResourcesRatio=1.0", "--conf", uris
    ]

    kerberos_args = [
        "--conf",
        "spark.mesos.driver.secret.names={}".format(keytab_secret),
        "--conf",
        "spark.mesos.driver.secret.filenames=kafka-client.keytab",
        "--conf",
        "spark.mesos.executor.secret.names={}".format(keytab_secret),
        "--conf",
        "spark.mesos.executor.secret.filenames=kafka-client.keytab",
        "--conf",
        "spark.mesos.task.labels=DCOS_SPACE:{}".format(utils.SPARK_APP_NAME),
        "--conf",
        "spark.executorEnv.KRB5_CONFIG_BASE64={}".format(KAFKA_KRB5),
        "--conf",
        "spark.mesos.driverEnv.KRB5_CONFIG_BASE64={}".format(KAFKA_KRB5),
        "--conf",
        "spark.driver.extraJavaOptions=-Djava.security.auth.login.config="
        "/mnt/mesos/sandbox/spark-kafka-client-jaas.conf",
        "--conf",
        "spark.executor.extraJavaOptions="
        "-Djava.security.auth.login.config=/mnt/mesos/sandbox/spark-kafka-client-jaas.conf",
    ]

    producer_config = [
        "--conf", "spark.cores.max=2", "--conf", "spark.executor.cores=2",
        "--class", "KafkaFeeder"
    ] + common_args

    if kerberized:
        producer_config += kerberos_args

    producer_id = utils.submit_job(app_url=jar_uri,
                                   app_args=producer_args,
                                   app_name=spark_app_name,
                                   args=producer_config)

    shakedown.wait_for(lambda: _producer_launched(),
                       ignore_exceptions=False,
                       timeout_seconds=600)
    shakedown.wait_for(lambda: utils.is_service_ready(KAFKA_SERVICE_NAME, 1),
                       ignore_exceptions=False,
                       timeout_seconds=600)

    consumer_config = [
        "--conf", "spark.cores.max=4", "--class", "KafkaConsumer"
    ] + common_args

    if kerberized:
        consumer_config += kerberos_args

    consumer_args = " ".join([broker_dns, topic, stop_count, kerberos_flag])

    utils.run_tests(app_url=jar_uri,
                    app_args=consumer_args,
                    expected_output="Read {} words".format(stop_count),
                    app_name=spark_app_name,
                    args=consumer_config)

    utils.kill_driver(producer_id, spark_app_name)