예제 #1
0
def setup_spark(configure_security_spark, configure_universe):
    try:
        utils.upload_dcos_test_jar()
        utils.require_spark()
        yield
    finally:
        utils.teardown_spark()
def setup_spark(configure_security_spark, configure_universe):
    try:
        utils.upload_dcos_test_jar()
        utils.require_spark()

        # We need to pick two nodes with the maximum unused CPU to guarantee that Driver and Executor
        # are not running on the same host.
        available_cpus = []
        cluster_agents = sdk_cmd.cluster_request('GET', '/mesos/slaves').json()
        for agent in cluster_agents["slaves"]:
            available_cpus.append(
                int(float(agent["resources"]["cpus"])) -
                int(float(agent["used_resources"]["cpus"])))

        available_cpus.sort(reverse=True)
        assert len(available_cpus) >= 3, \
            "Expected 3 or more nodes in the cluster to accommodate Dispatcher, " \
            "Driver, and Executor each on a separate node"

        global driver_cpus
        driver_cpus = available_cpus[0]

        global executor_cpus
        executor_cpus = available_cpus[1]

        log.info(
            f"{driver_cpus} cores will be used for driver, {executor_cpus} cores will be used for executor"
        )
        yield
    finally:
        utils.teardown_spark()
def _test_spark_docker_image(docker_image):
    utils.upload_dcos_test_jar()
    utils.require_spark(
        additional_options={'service': {
            'docker-image': docker_image
        }})

    expected_groups_count = 12000
    num_mappers = 4
    value_size_bytes = 100
    num_reducers = 4
    sleep = 500

    python_script_path = os.path.join(THIS_DIR, 'jobs', 'python',
                                      'shuffle_app.py')
    python_script_url = utils.upload_file(python_script_path)
    utils.run_tests(
        app_url=python_script_url,
        app_args="{} {} {} {} {}".format(num_mappers, expected_groups_count,
                                         value_size_bytes, num_reducers,
                                         sleep),
        expected_output="Groups count: {}".format(expected_groups_count),
        args=[
            "--conf spark.executor.cores=1", "--conf spark.cores.max=4",
            "--conf spark.scheduler.minRegisteredResourcesRatio=1",
            "--conf spark.scheduler.maxRegisteredResourcesWaitingTime=3m"
        ])

    utils.teardown_spark()
예제 #4
0
def setup_spark(configure_security, configure_universe):
    try:
        utils.upload_dcos_test_jar()
        utils.require_spark()
        sdk_cmd.run_cli('package install --cli dcos-enterprise-cli --yes')
        yield
    finally:
        utils.teardown_spark()
예제 #5
0
def setup_spark(configure_user_permissions, configure_universe, use_ucr_containerizer, user):
    options = {
        "service": {
            "name": SERVICE_NAME,
            "user": user,
            "UCR_containerizer": use_ucr_containerizer
        }
    }

    try:
        utils.upload_dcos_test_jar()
        utils.require_spark(service_name=SERVICE_NAME, additional_options=options, zk=DISPATCHER_ZK)
        yield
    finally:
        utils.teardown_spark(service_name=SERVICE_NAME, zk=DISPATCHER_ZK)
예제 #6
0
def setup_spark(configure_universe, configure_role_permissions, role,
                enforce_role):
    log.info(
        "Installing Spark: service_name='{}', role='{}', enforce_role='{}'".
        format(SERVICE_NAME, role, enforce_role))
    options = {
        "service": {
            "name": SERVICE_NAME,
            "role": role,
            "enforce_role": enforce_role
        }
    }

    try:
        utils.upload_dcos_test_jar()
        utils.require_spark(service_name=SERVICE_NAME,
                            additional_options=options,
                            zk=DISPATCHER_ZK)
        yield
    finally:
        utils.teardown_spark(service_name=SERVICE_NAME, zk=DISPATCHER_ZK)
예제 #7
0
def setup_spark(configure_universe, configure_role_permissions, service_name,
                role, enforce_role):
    log.info(
        "Installing Spark: service_name='{}', role='{}', enforce_role='{}'".
        format(service_name, role, enforce_role))
    options = {
        "service": {
            "name": service_name,
            "role": role,
            "enforce_role": enforce_role
        }
    }

    dispatcher_zk = get_zk_dispatcher(service_name)

    try:
        utils.upload_dcos_test_jar()
        utils.require_spark(service_name=service_name,
                            additional_options=options,
                            zk=dispatcher_zk)
        yield
    finally:
        utils.teardown_spark(service_name=service_name, zk=dispatcher_zk)
예제 #8
0
def upload_test_jars(configure_security_spark, configure_universe):
    utils.upload_dcos_test_jar()
예제 #9
0
def test_structured_streaming_recovery(kerberized_spark, kerberized_kafka):
    kafka_brokers = ','.join(
        sdk_cmd.svc_cli(KAFKA_PACKAGE_NAME,
                        KAFKA_SERVICE_NAME,
                        'endpoints broker',
                        json=True)['dns'])
    LOGGER.info("Kafka brokers: {}".format(kafka_brokers))

    _uri = upload_jaas()
    uris = "spark.mesos.uris={}".format(_uri)

    jar_uri = utils.upload_dcos_test_jar()

    kafka_kerberos_args = get_kerberized_kafka_spark_conf(
        utils.SPARK_SERVICE_NAME)
    LOGGER.info("Spark Kerberos configuration for Kafka:\n{}".format(
        '\n'.join(kafka_kerberos_args)))

    common_args = [
        "--conf", "spark.mesos.containerizer=mesos", "--conf",
        "spark.scheduler.maxRegisteredResourcesWaitingTime=2400s", "--conf",
        "spark.scheduler.minRegisteredResourcesRatio=1.0", "--conf", uris
    ] + kafka_kerberos_args

    # configuring streaming job and HDFS folders
    setup_hdfs_paths()

    # running kafka producer
    message_set_a = ["abc"] * 100
    feed_sample_data(jar_uri, kafka_brokers, KAFKA_TEST_TOPIC, common_args,
                     message_set_a)

    spark_submit_args = [
        "--supervise", "--class", "StructuredStreamingWithCheckpointing",
        "--conf", "spark.cores.max=2", "--conf", "spark.executor.cores=1",
        "--conf", "spark.sql.shuffle.partitions=2", "--conf",
        "spark.executor.memory=2g"
    ] + common_args

    application_args = "{} {} {} {}".format(kafka_brokers, KAFKA_TEST_TOPIC,
                                            HDFS_CHECKPOINT_DIR,
                                            SPARK_SECURITY_PROTOCOL)

    driver_task_id = utils.submit_job(app_url=jar_uri,
                                      app_args=application_args,
                                      service_name=utils.SPARK_SERVICE_NAME,
                                      args=(SPARK_SUBMIT_HDFS_KERBEROS_ARGS +
                                            spark_submit_args))

    # Wait until executor is running
    LOGGER.info("Starting supervised driver {}".format(driver_task_id))
    sdk_tasks.check_running(SPARK_APPLICATION_NAME,
                            expected_task_count=1,
                            timeout_seconds=600)

    # validating Structured Streaming topic consumption
    expected_output_a = "{}|  {}".format(message_set_a[0], len(message_set_a))
    LOGGER.info(
        "Validating Structured Streaming topic consumption, waiting for output {}"
        .format(expected_output_a))
    utils.wait_for_running_job_output(driver_task_id, expected_output_a)

    # killing the driver
    service_info = shakedown.get_service(SPARK_APPLICATION_NAME).dict()
    driver_regex = "spark.mesos.driver.frameworkId={}".format(
        service_info['id'])
    sdk_cmd.kill_task_with_pattern(agent_host=service_info['hostname'],
                                   pattern=driver_regex)

    # sending more data to Kafka
    message_set_b = ["def"] * 100
    feed_sample_data(jar_uri, kafka_brokers, KAFKA_TEST_TOPIC,
                     common_args + kafka_kerberos_args, message_set_b)

    # checkpointing validation
    sdk_tasks.check_running(SPARK_APPLICATION_NAME,
                            expected_task_count=1,
                            timeout_seconds=600)
    LOGGER.info("Streaming job has re-started")

    # validating Structured Streaming resumed topic consumption
    expected_output_b = "{}|  {}".format(message_set_b[0], len(message_set_b))
    LOGGER.info(
        "Validating that consumption resumed from checkpoint, waiting for output '{}' and '{}'"
        .format(expected_output_a, expected_output_b))

    utils.wait_for_running_job_output(driver_task_id, expected_output_a)
    utils.wait_for_running_job_output(driver_task_id, expected_output_b)
예제 #10
0
def setup_module(module):
    utils.upload_dcos_test_jar()
    utils.require_spark()