def setup_spark(configure_security_spark, configure_universe): try: utils.upload_dcos_test_jar() utils.require_spark() yield finally: utils.teardown_spark()
def setup_spark(configure_security_spark, configure_universe): try: utils.upload_dcos_test_jar() utils.require_spark() # We need to pick two nodes with the maximum unused CPU to guarantee that Driver and Executor # are not running on the same host. available_cpus = [] cluster_agents = sdk_cmd.cluster_request('GET', '/mesos/slaves').json() for agent in cluster_agents["slaves"]: available_cpus.append( int(float(agent["resources"]["cpus"])) - int(float(agent["used_resources"]["cpus"]))) available_cpus.sort(reverse=True) assert len(available_cpus) >= 3, \ "Expected 3 or more nodes in the cluster to accommodate Dispatcher, " \ "Driver, and Executor each on a separate node" global driver_cpus driver_cpus = available_cpus[0] global executor_cpus executor_cpus = available_cpus[1] log.info( f"{driver_cpus} cores will be used for driver, {executor_cpus} cores will be used for executor" ) yield finally: utils.teardown_spark()
def _test_spark_docker_image(docker_image): utils.upload_dcos_test_jar() utils.require_spark( additional_options={'service': { 'docker-image': docker_image }}) expected_groups_count = 12000 num_mappers = 4 value_size_bytes = 100 num_reducers = 4 sleep = 500 python_script_path = os.path.join(THIS_DIR, 'jobs', 'python', 'shuffle_app.py') python_script_url = utils.upload_file(python_script_path) utils.run_tests( app_url=python_script_url, app_args="{} {} {} {} {}".format(num_mappers, expected_groups_count, value_size_bytes, num_reducers, sleep), expected_output="Groups count: {}".format(expected_groups_count), args=[ "--conf spark.executor.cores=1", "--conf spark.cores.max=4", "--conf spark.scheduler.minRegisteredResourcesRatio=1", "--conf spark.scheduler.maxRegisteredResourcesWaitingTime=3m" ]) utils.teardown_spark()
def setup_spark(configure_security, configure_universe): try: utils.upload_dcos_test_jar() utils.require_spark() sdk_cmd.run_cli('package install --cli dcos-enterprise-cli --yes') yield finally: utils.teardown_spark()
def setup_spark(configure_user_permissions, configure_universe, use_ucr_containerizer, user): options = { "service": { "name": SERVICE_NAME, "user": user, "UCR_containerizer": use_ucr_containerizer } } try: utils.upload_dcos_test_jar() utils.require_spark(service_name=SERVICE_NAME, additional_options=options, zk=DISPATCHER_ZK) yield finally: utils.teardown_spark(service_name=SERVICE_NAME, zk=DISPATCHER_ZK)
def setup_spark(configure_universe, configure_role_permissions, role, enforce_role): log.info( "Installing Spark: service_name='{}', role='{}', enforce_role='{}'". format(SERVICE_NAME, role, enforce_role)) options = { "service": { "name": SERVICE_NAME, "role": role, "enforce_role": enforce_role } } try: utils.upload_dcos_test_jar() utils.require_spark(service_name=SERVICE_NAME, additional_options=options, zk=DISPATCHER_ZK) yield finally: utils.teardown_spark(service_name=SERVICE_NAME, zk=DISPATCHER_ZK)
def setup_spark(configure_universe, configure_role_permissions, service_name, role, enforce_role): log.info( "Installing Spark: service_name='{}', role='{}', enforce_role='{}'". format(service_name, role, enforce_role)) options = { "service": { "name": service_name, "role": role, "enforce_role": enforce_role } } dispatcher_zk = get_zk_dispatcher(service_name) try: utils.upload_dcos_test_jar() utils.require_spark(service_name=service_name, additional_options=options, zk=dispatcher_zk) yield finally: utils.teardown_spark(service_name=service_name, zk=dispatcher_zk)
def upload_test_jars(configure_security_spark, configure_universe): utils.upload_dcos_test_jar()
def test_structured_streaming_recovery(kerberized_spark, kerberized_kafka): kafka_brokers = ','.join( sdk_cmd.svc_cli(KAFKA_PACKAGE_NAME, KAFKA_SERVICE_NAME, 'endpoints broker', json=True)['dns']) LOGGER.info("Kafka brokers: {}".format(kafka_brokers)) _uri = upload_jaas() uris = "spark.mesos.uris={}".format(_uri) jar_uri = utils.upload_dcos_test_jar() kafka_kerberos_args = get_kerberized_kafka_spark_conf( utils.SPARK_SERVICE_NAME) LOGGER.info("Spark Kerberos configuration for Kafka:\n{}".format( '\n'.join(kafka_kerberos_args))) common_args = [ "--conf", "spark.mesos.containerizer=mesos", "--conf", "spark.scheduler.maxRegisteredResourcesWaitingTime=2400s", "--conf", "spark.scheduler.minRegisteredResourcesRatio=1.0", "--conf", uris ] + kafka_kerberos_args # configuring streaming job and HDFS folders setup_hdfs_paths() # running kafka producer message_set_a = ["abc"] * 100 feed_sample_data(jar_uri, kafka_brokers, KAFKA_TEST_TOPIC, common_args, message_set_a) spark_submit_args = [ "--supervise", "--class", "StructuredStreamingWithCheckpointing", "--conf", "spark.cores.max=2", "--conf", "spark.executor.cores=1", "--conf", "spark.sql.shuffle.partitions=2", "--conf", "spark.executor.memory=2g" ] + common_args application_args = "{} {} {} {}".format(kafka_brokers, KAFKA_TEST_TOPIC, HDFS_CHECKPOINT_DIR, SPARK_SECURITY_PROTOCOL) driver_task_id = utils.submit_job(app_url=jar_uri, app_args=application_args, service_name=utils.SPARK_SERVICE_NAME, args=(SPARK_SUBMIT_HDFS_KERBEROS_ARGS + spark_submit_args)) # Wait until executor is running LOGGER.info("Starting supervised driver {}".format(driver_task_id)) sdk_tasks.check_running(SPARK_APPLICATION_NAME, expected_task_count=1, timeout_seconds=600) # validating Structured Streaming topic consumption expected_output_a = "{}| {}".format(message_set_a[0], len(message_set_a)) LOGGER.info( "Validating Structured Streaming topic consumption, waiting for output {}" .format(expected_output_a)) utils.wait_for_running_job_output(driver_task_id, expected_output_a) # killing the driver service_info = shakedown.get_service(SPARK_APPLICATION_NAME).dict() driver_regex = "spark.mesos.driver.frameworkId={}".format( service_info['id']) sdk_cmd.kill_task_with_pattern(agent_host=service_info['hostname'], pattern=driver_regex) # sending more data to Kafka message_set_b = ["def"] * 100 feed_sample_data(jar_uri, kafka_brokers, KAFKA_TEST_TOPIC, common_args + kafka_kerberos_args, message_set_b) # checkpointing validation sdk_tasks.check_running(SPARK_APPLICATION_NAME, expected_task_count=1, timeout_seconds=600) LOGGER.info("Streaming job has re-started") # validating Structured Streaming resumed topic consumption expected_output_b = "{}| {}".format(message_set_b[0], len(message_set_b)) LOGGER.info( "Validating that consumption resumed from checkpoint, waiting for output '{}' and '{}'" .format(expected_output_a, expected_output_b)) utils.wait_for_running_job_output(driver_task_id, expected_output_a) utils.wait_for_running_job_output(driver_task_id, expected_output_b)
def setup_module(module): utils.upload_dcos_test_jar() utils.require_spark()