コード例 #1
0
def setup_spark(hdfs_with_kerberos, setup_history_server,
                configure_security_spark, configure_universe):
    try:
        utils.require_spark(use_hdfs=True, use_history=True)
        yield
    finally:
        utils.teardown_spark()
コード例 #2
0
def _test_spark_docker_image(dist):
    utils.require_spark(
        additional_options={'service': {
            'docker-image': dist['image']
        }})
    example_jar_url = EXAMPLES_JAR_PATH_TEMPLATE.format(dist['scala_version'])

    expected_groups_count = 12000
    num_mappers = 4
    value_size_bytes = 100
    num_reducers = 4

    utils.run_tests(
        app_url=example_jar_url,
        app_args=
        f"{num_mappers} {expected_groups_count} {value_size_bytes} {num_reducers}",
        expected_output=str(expected_groups_count),
        args=[
            "--class org.apache.spark.examples.GroupByTest",
            "--conf spark.executor.cores=1", "--conf spark.cores.max=4",
            "--conf spark.scheduler.minRegisteredResourcesRatio=1",
            "--conf spark.scheduler.maxRegisteredResourcesWaitingTime=3m"
        ])

    utils.teardown_spark()
コード例 #3
0
def _test_spark_docker_image(docker_image):
    utils.upload_dcos_test_jar()
    utils.require_spark(
        additional_options={'service': {
            'docker-image': docker_image
        }})

    expected_groups_count = 12000
    num_mappers = 4
    value_size_bytes = 100
    num_reducers = 4
    sleep = 500

    python_script_path = os.path.join(THIS_DIR, 'jobs', 'python',
                                      'shuffle_app.py')
    python_script_url = utils.upload_file(python_script_path)
    utils.run_tests(
        app_url=python_script_url,
        app_args="{} {} {} {} {}".format(num_mappers, expected_groups_count,
                                         value_size_bytes, num_reducers,
                                         sleep),
        expected_output="Groups count: {}".format(expected_groups_count),
        args=[
            "--conf spark.executor.cores=1", "--conf spark.cores.max=4",
            "--conf spark.scheduler.minRegisteredResourcesRatio=1",
            "--conf spark.scheduler.maxRegisteredResourcesWaitingTime=3m"
        ])

    utils.teardown_spark()
コード例 #4
0
def setup_spark(configure_security_spark, configure_universe):
    try:
        utils.upload_dcos_test_jar()
        utils.require_spark()
        yield
    finally:
        utils.teardown_spark()
コード例 #5
0
def setup_spark(configure_security_spark, configure_universe):
    try:
        utils.upload_dcos_test_jar()
        utils.require_spark()

        # We need to pick two nodes with the maximum unused CPU to guarantee that Driver and Executor
        # are not running on the same host.
        available_cpus = []
        cluster_agents = sdk_cmd.cluster_request('GET', '/mesos/slaves').json()
        for agent in cluster_agents["slaves"]:
            available_cpus.append(
                int(float(agent["resources"]["cpus"])) -
                int(float(agent["used_resources"]["cpus"])))

        available_cpus.sort(reverse=True)
        assert len(available_cpus) >= 3, \
            "Expected 3 or more nodes in the cluster to accommodate Dispatcher, " \
            "Driver, and Executor each on a separate node"

        global driver_cpus
        driver_cpus = available_cpus[0]

        global executor_cpus
        executor_cpus = available_cpus[1]

        log.info(
            f"{driver_cpus} cores will be used for driver, {executor_cpus} cores will be used for executor"
        )
        yield
    finally:
        utils.teardown_spark()
コード例 #6
0
ファイル: test_hdfs.py プロジェクト: zencircle/spark-build
def setup_spark(hdfs_with_kerberos, setup_history_server,
                configure_security_spark, configure_universe):
    try:
        additional_options = {
            "hdfs": {
                "config-url":
                "http://api.{}.marathon.l4lb.thisdcos.directory/v1/endpoints".
                format(HDFS_SERVICE_NAME)
            },
            "security": {
                "kerberos": {
                    "enabled": True,
                    "realm": sdk_auth.REALM,
                    "kdc": {
                        "hostname": hdfs_with_kerberos.get_host(),
                        "port": int(hdfs_with_kerberos.get_port())
                    }
                }
            },
            "service": {
                "spark-history-server-url":
                shakedown.dcos_url_path(
                    "/service/{}".format(HISTORY_SERVICE_NAME))
            }
        }
        utils.require_spark(additional_options=additional_options)
        yield
    finally:
        utils.teardown_spark()
コード例 #7
0
def test_foldered_spark():
    service_name = utils.FOLDERED_SPARK_SERVICE_NAME
    zk = 'spark_mesos_dispatcher__path_to_spark'
    utils.require_spark(service_name=service_name, zk=zk)
    test_sparkPi(service_name=service_name)
    utils.teardown_spark(service_name=service_name, zk=zk)
    # reinstall CLI so that it's available for the following tests:
    sdk_cmd.run_cli('package install --cli {} --yes'.format(utils.SPARK_PACKAGE_NAME))
コード例 #8
0
def setup_spark(configure_universe, configure_role_permissions):
    try:
        utils.require_spark()
        zk = 'spark_mesos_dispatcher__path_to_spark'
        utils.require_spark(service_name=service_name, zk=zk)
        yield
    finally:
        utils.teardown_spark(service_name=service_name, zk=zk)
コード例 #9
0
def setup_spark(configure_security_spark, configure_universe):
    try:
        utils.require_spark()
        zk = 'spark_mesos_dispatcher__path_to_spark'
        utils.require_spark(service_name=service_name, zk=zk)
        yield
    finally:
        utils.teardown_spark(service_name=service_name, zk=zk)
コード例 #10
0
ファイル: test_spark.py プロジェクト: kndarp/spark-build
def setup_spark(configure_security, configure_universe):
    try:
        utils.upload_dcos_test_jar()
        utils.require_spark()
        sdk_cmd.run_cli('package install --cli dcos-enterprise-cli --yes')
        yield
    finally:
        utils.teardown_spark()
コード例 #11
0
def setup_spark(configure_security, configure_universe):
    try:
        utils.require_spark()
        utils.upload_file(os.environ["SCALA_TEST_JAR_PATH"])
        shakedown.run_dcos_command(
            'package install --cli dcos-enterprise-cli --yes')
        yield
    finally:
        utils.teardown_spark()
コード例 #12
0
ファイル: test_gpu.py プロジェクト: stuartpa/spark-build
def setup_spark(configure_security_spark, configure_universe):
    try:
        spark_utils.require_spark(
            user="******",  # Run as root on centos
            use_bootstrap_ip=True)  # Needed on GPU nodes
        spark_utils.upload_file(os.environ["SCALA_TEST_JAR_PATH"])
        yield
    finally:
        spark_utils.teardown_spark()
コード例 #13
0
def setup_spark(kerberized_kafka, configure_security_spark,
                configure_universe):
    try:
        # need to do this here also in case this test is run first
        # and the jar hasn't been updated
        utils.upload_file(os.environ["SCALA_TEST_JAR_PATH"])
        utils.require_spark()
        yield
    finally:
        utils.teardown_spark()
コード例 #14
0
def setup_spark(configure_user_permissions, configure_universe, use_ucr_containerizer, user):
    options = {
        "service": {
            "name": SERVICE_NAME,
            "user": user,
            "UCR_containerizer": use_ucr_containerizer
        }
    }

    try:
        utils.upload_dcos_test_jar()
        utils.require_spark(service_name=SERVICE_NAME, additional_options=options, zk=DISPATCHER_ZK)
        yield
    finally:
        utils.teardown_spark(service_name=SERVICE_NAME, zk=DISPATCHER_ZK)
コード例 #15
0
ファイル: test_soak.py プロジェクト: zencircle/spark-build
def setup_module(module):
    if not shakedown.package_installed('spark', SOAK_SPARK_SERVICE_NAME):
        additional_options = {
            "hdfs": {
                "config-url": "http://api.hdfs.marathon.l4lb.thisdcos.directory/v1/endpoints"
            },
            "security": {
                "kerberos": {
                    "enabled": True,
                    "realm": "LOCAL",
                    "kdc": {
                        "hostname": "kdc.marathon.autoip.dcos.thisdcos.directory",
                        "port": 2500
                    }
                }
            }
        }
        utils.require_spark(service_name=SOAK_SPARK_SERVICE_NAME, additional_options=additional_options)
コード例 #16
0
def test_dispatcher_placement(configure_universe):
    constraint = [
        "hostname", "CLUSTER",
        sdk_agents.get_private_agents().pop()["hostname"]
    ]
    service_name = "spark"
    log.info("Running test: service_name='{}', constraints=[[{}]]".format(
        service_name, ','.join(constraint)))

    options = {"service": {"name": service_name, "constraints": [constraint]}}
    try:
        utils.require_spark(service_name=service_name,
                            additional_options=options)

        dispatcher_host = sdk_marathon.get_scheduler_host(service_name)
        log.info("Dispatcher Host: {}".format(dispatcher_host))
        assert constraint[2] == dispatcher_host
    finally:
        utils.teardown_spark(service_name=service_name)
コード例 #17
0
def setup_spark(configure_universe, configure_role_permissions, role,
                enforce_role):
    log.info(
        "Installing Spark: service_name='{}', role='{}', enforce_role='{}'".
        format(SERVICE_NAME, role, enforce_role))
    options = {
        "service": {
            "name": SERVICE_NAME,
            "role": role,
            "enforce_role": enforce_role
        }
    }

    try:
        utils.upload_dcos_test_jar()
        utils.require_spark(service_name=SERVICE_NAME,
                            additional_options=options,
                            zk=DISPATCHER_ZK)
        yield
    finally:
        utils.teardown_spark(service_name=SERVICE_NAME, zk=DISPATCHER_ZK)
コード例 #18
0
def test_unique_vips():

    @retrying.retry(wait_exponential_multiplier=1000, stop_max_attempt_number=7) # ~2 minutes
    def verify_ip_is_reachable(ip):
        ok, _ = sdk_cmd.master_ssh("curl -v {}".format(ip))
        assert ok

    spark1_service_name = "test/groupa/spark"
    spark2_service_name = "test/groupb/spark"
    try:
        utils.require_spark(spark1_service_name)
        utils.require_spark(spark2_service_name)

        dispatcher1_ui_ip = sdk_hosts.vip_host("marathon", "dispatcher.{}".format(spark1_service_name), 4040)
        dispatcher2_ui_ip = sdk_hosts.vip_host("marathon", "dispatcher.{}".format(spark2_service_name), 4040)

        verify_ip_is_reachable(dispatcher1_ui_ip)
        verify_ip_is_reachable(dispatcher2_ui_ip)
    finally:
        utils.teardown_spark(service_name=spark1_service_name)
        utils.teardown_spark(service_name=spark2_service_name)
コード例 #19
0
def test_unique_vips():
    spark1_service_name = "test/groupa/spark"
    spark2_service_name = "test/groupb/spark"
    try:
        utils.require_spark(spark1_service_name)
        utils.require_spark(spark2_service_name)

        dispatcher1_ui = sdk_hosts.vip_host(
            "marathon", "dispatcher.{}".format(spark1_service_name), 4040)
        dispatcher2_ui = sdk_hosts.vip_host(
            "marathon", "dispatcher.{}".format(spark2_service_name), 4040)

        # verify dispatcher-ui is reachable at VIP
        ok, _ = sdk_cmd.master_ssh("curl {}".format(dispatcher1_ui))
        assert ok

        ok, _ = sdk_cmd.master_ssh("curl {}".format(dispatcher2_ui))
        assert ok
    finally:
        sdk_install.uninstall(utils.SPARK_PACKAGE_NAME, spark1_service_name)
        sdk_install.uninstall(utils.SPARK_PACKAGE_NAME, spark2_service_name)
コード例 #20
0
ファイル: fixture_hdfs.py プロジェクト: stuartpa/spark-build
def kerberized_spark(setup_history_server, hdfs_with_kerberos,
                     kerberos_options, configure_security_spark,
                     configure_universe):
    try:
        additional_options = {
            "hdfs": {
                "config-url":
                "http://api.{}.marathon.l4lb.thisdcos.directory/v1/endpoints".
                format(HDFS_SERVICE_NAME)
            },
            "security": kerberos_options,
            "service": {
                "spark-history-server-url":
                shakedown.dcos_url_path(
                    "/service/{}".format(HISTORY_SERVICE_NAME))
            }
        }

        utils.require_spark(additional_options=additional_options)
        yield
    finally:
        utils.teardown_spark()
コード例 #21
0
def test_task_stdout():
    service_name = utils.FOLDERED_SPARK_SERVICE_NAME

    try:
        task_id = service_name.lstrip("/").replace("/", "_")
        utils.require_spark(service_name=service_name)

        task = sdk_cmd._get_task_info(task_id)
        if not task:
            raise Exception("Failed to get '{}' task".format(task_id))

        task_sandbox_path = sdk_cmd.get_task_sandbox_path(task_id)
        if not task_sandbox_path:
            raise Exception("Failed to get '{}' sandbox path".format(task_id))
        agent_id = task["slave_id"]

        task_sandbox = sdk_cmd.cluster_request(
            "GET", "/slave/{}/files/browse?path={}".format(agent_id, task_sandbox_path)
        ).json()
        stdout_file = [f for f in task_sandbox if f["path"].endswith("/stdout")][0]
        assert stdout_file["size"] > 0, "stdout file should have content"
    finally:
        sdk_install.uninstall(utils.SPARK_PACKAGE_NAME, service_name)
コード例 #22
0
ファイル: test_quota.py プロジェクト: mediapills/spark-build
def setup_spark(configure_universe, configure_role_permissions, service_name,
                role, enforce_role):
    log.info(
        "Installing Spark: service_name='{}', role='{}', enforce_role='{}'".
        format(service_name, role, enforce_role))
    options = {
        "service": {
            "name": service_name,
            "role": role,
            "enforce_role": enforce_role
        }
    }

    dispatcher_zk = get_zk_dispatcher(service_name)

    try:
        utils.upload_dcos_test_jar()
        utils.require_spark(service_name=service_name,
                            additional_options=options,
                            zk=dispatcher_zk)
        yield
    finally:
        utils.teardown_spark(service_name=service_name, zk=dispatcher_zk)
コード例 #23
0
def setup_module(module):
    sdk_cmd.run_raw_cli("package install {} --yes --cli".format(
        utils.SPARK_PACKAGE_NAME))
    if not shakedown.package_installed('spark', SOAK_SPARK_SERVICE_NAME):
        additional_options = {
            "hdfs": {
                "config-url":
                "http://api.{}.marathon.l4lb.thisdcos.directory/v1/endpoints".
                format(SOAK_HDFS_SERVICE_NAME)
            },
            "security": {
                "kerberos": {
                    "enabled": True,
                    "realm": "LOCAL",
                    "kdc": {
                        "hostname":
                        "kdc.marathon.autoip.dcos.thisdcos.directory",
                        "port": 2500
                    }
                }
            }
        }
        utils.require_spark(service_name=SOAK_SPARK_SERVICE_NAME,
                            additional_options=additional_options)
コード例 #24
0
def setup_module(module):
    utils.require_spark(service_name=SOAK_SPARK_APP_NAME, use_hdfs=True)
コード例 #25
0
def setup_module(module):
    utils.upload_dcos_test_jar()
    utils.require_spark()
コード例 #26
0
def test_marathon_group():
    app_id = utils.FOLDERED_SPARK_APP_NAME
    utils.require_spark(service_name=app_id, marathon_group=app_id)
    test_sparkPi(app_name=app_id)
    LOGGER.info("Uninstalling app_id={}".format(app_id))
コード例 #27
0
def setup_module(module):
    utils.require_spark()