def setup_spark(hdfs_with_kerberos, setup_history_server, configure_security_spark, configure_universe): try: utils.require_spark(use_hdfs=True, use_history=True) yield finally: utils.teardown_spark()
def _test_spark_docker_image(dist): utils.require_spark( additional_options={'service': { 'docker-image': dist['image'] }}) example_jar_url = EXAMPLES_JAR_PATH_TEMPLATE.format(dist['scala_version']) expected_groups_count = 12000 num_mappers = 4 value_size_bytes = 100 num_reducers = 4 utils.run_tests( app_url=example_jar_url, app_args= f"{num_mappers} {expected_groups_count} {value_size_bytes} {num_reducers}", expected_output=str(expected_groups_count), args=[ "--class org.apache.spark.examples.GroupByTest", "--conf spark.executor.cores=1", "--conf spark.cores.max=4", "--conf spark.scheduler.minRegisteredResourcesRatio=1", "--conf spark.scheduler.maxRegisteredResourcesWaitingTime=3m" ]) utils.teardown_spark()
def _test_spark_docker_image(docker_image): utils.upload_dcos_test_jar() utils.require_spark( additional_options={'service': { 'docker-image': docker_image }}) expected_groups_count = 12000 num_mappers = 4 value_size_bytes = 100 num_reducers = 4 sleep = 500 python_script_path = os.path.join(THIS_DIR, 'jobs', 'python', 'shuffle_app.py') python_script_url = utils.upload_file(python_script_path) utils.run_tests( app_url=python_script_url, app_args="{} {} {} {} {}".format(num_mappers, expected_groups_count, value_size_bytes, num_reducers, sleep), expected_output="Groups count: {}".format(expected_groups_count), args=[ "--conf spark.executor.cores=1", "--conf spark.cores.max=4", "--conf spark.scheduler.minRegisteredResourcesRatio=1", "--conf spark.scheduler.maxRegisteredResourcesWaitingTime=3m" ]) utils.teardown_spark()
def setup_spark(configure_security_spark, configure_universe): try: utils.upload_dcos_test_jar() utils.require_spark() yield finally: utils.teardown_spark()
def setup_spark(configure_security_spark, configure_universe): try: utils.upload_dcos_test_jar() utils.require_spark() # We need to pick two nodes with the maximum unused CPU to guarantee that Driver and Executor # are not running on the same host. available_cpus = [] cluster_agents = sdk_cmd.cluster_request('GET', '/mesos/slaves').json() for agent in cluster_agents["slaves"]: available_cpus.append( int(float(agent["resources"]["cpus"])) - int(float(agent["used_resources"]["cpus"]))) available_cpus.sort(reverse=True) assert len(available_cpus) >= 3, \ "Expected 3 or more nodes in the cluster to accommodate Dispatcher, " \ "Driver, and Executor each on a separate node" global driver_cpus driver_cpus = available_cpus[0] global executor_cpus executor_cpus = available_cpus[1] log.info( f"{driver_cpus} cores will be used for driver, {executor_cpus} cores will be used for executor" ) yield finally: utils.teardown_spark()
def setup_spark(hdfs_with_kerberos, setup_history_server, configure_security_spark, configure_universe): try: additional_options = { "hdfs": { "config-url": "http://api.{}.marathon.l4lb.thisdcos.directory/v1/endpoints". format(HDFS_SERVICE_NAME) }, "security": { "kerberos": { "enabled": True, "realm": sdk_auth.REALM, "kdc": { "hostname": hdfs_with_kerberos.get_host(), "port": int(hdfs_with_kerberos.get_port()) } } }, "service": { "spark-history-server-url": shakedown.dcos_url_path( "/service/{}".format(HISTORY_SERVICE_NAME)) } } utils.require_spark(additional_options=additional_options) yield finally: utils.teardown_spark()
def test_foldered_spark(): service_name = utils.FOLDERED_SPARK_SERVICE_NAME zk = 'spark_mesos_dispatcher__path_to_spark' utils.require_spark(service_name=service_name, zk=zk) test_sparkPi(service_name=service_name) utils.teardown_spark(service_name=service_name, zk=zk) # reinstall CLI so that it's available for the following tests: sdk_cmd.run_cli('package install --cli {} --yes'.format(utils.SPARK_PACKAGE_NAME))
def setup_spark(configure_universe, configure_role_permissions): try: utils.require_spark() zk = 'spark_mesos_dispatcher__path_to_spark' utils.require_spark(service_name=service_name, zk=zk) yield finally: utils.teardown_spark(service_name=service_name, zk=zk)
def setup_spark(configure_security_spark, configure_universe): try: utils.require_spark() zk = 'spark_mesos_dispatcher__path_to_spark' utils.require_spark(service_name=service_name, zk=zk) yield finally: utils.teardown_spark(service_name=service_name, zk=zk)
def setup_spark(configure_security, configure_universe): try: utils.upload_dcos_test_jar() utils.require_spark() sdk_cmd.run_cli('package install --cli dcos-enterprise-cli --yes') yield finally: utils.teardown_spark()
def setup_spark(configure_security, configure_universe): try: utils.require_spark() utils.upload_file(os.environ["SCALA_TEST_JAR_PATH"]) shakedown.run_dcos_command( 'package install --cli dcos-enterprise-cli --yes') yield finally: utils.teardown_spark()
def setup_spark(configure_security_spark, configure_universe): try: spark_utils.require_spark( user="******", # Run as root on centos use_bootstrap_ip=True) # Needed on GPU nodes spark_utils.upload_file(os.environ["SCALA_TEST_JAR_PATH"]) yield finally: spark_utils.teardown_spark()
def setup_spark(kerberized_kafka, configure_security_spark, configure_universe): try: # need to do this here also in case this test is run first # and the jar hasn't been updated utils.upload_file(os.environ["SCALA_TEST_JAR_PATH"]) utils.require_spark() yield finally: utils.teardown_spark()
def setup_spark(configure_user_permissions, configure_universe, use_ucr_containerizer, user): options = { "service": { "name": SERVICE_NAME, "user": user, "UCR_containerizer": use_ucr_containerizer } } try: utils.upload_dcos_test_jar() utils.require_spark(service_name=SERVICE_NAME, additional_options=options, zk=DISPATCHER_ZK) yield finally: utils.teardown_spark(service_name=SERVICE_NAME, zk=DISPATCHER_ZK)
def setup_module(module): if not shakedown.package_installed('spark', SOAK_SPARK_SERVICE_NAME): additional_options = { "hdfs": { "config-url": "http://api.hdfs.marathon.l4lb.thisdcos.directory/v1/endpoints" }, "security": { "kerberos": { "enabled": True, "realm": "LOCAL", "kdc": { "hostname": "kdc.marathon.autoip.dcos.thisdcos.directory", "port": 2500 } } } } utils.require_spark(service_name=SOAK_SPARK_SERVICE_NAME, additional_options=additional_options)
def test_dispatcher_placement(configure_universe): constraint = [ "hostname", "CLUSTER", sdk_agents.get_private_agents().pop()["hostname"] ] service_name = "spark" log.info("Running test: service_name='{}', constraints=[[{}]]".format( service_name, ','.join(constraint))) options = {"service": {"name": service_name, "constraints": [constraint]}} try: utils.require_spark(service_name=service_name, additional_options=options) dispatcher_host = sdk_marathon.get_scheduler_host(service_name) log.info("Dispatcher Host: {}".format(dispatcher_host)) assert constraint[2] == dispatcher_host finally: utils.teardown_spark(service_name=service_name)
def setup_spark(configure_universe, configure_role_permissions, role, enforce_role): log.info( "Installing Spark: service_name='{}', role='{}', enforce_role='{}'". format(SERVICE_NAME, role, enforce_role)) options = { "service": { "name": SERVICE_NAME, "role": role, "enforce_role": enforce_role } } try: utils.upload_dcos_test_jar() utils.require_spark(service_name=SERVICE_NAME, additional_options=options, zk=DISPATCHER_ZK) yield finally: utils.teardown_spark(service_name=SERVICE_NAME, zk=DISPATCHER_ZK)
def test_unique_vips(): @retrying.retry(wait_exponential_multiplier=1000, stop_max_attempt_number=7) # ~2 minutes def verify_ip_is_reachable(ip): ok, _ = sdk_cmd.master_ssh("curl -v {}".format(ip)) assert ok spark1_service_name = "test/groupa/spark" spark2_service_name = "test/groupb/spark" try: utils.require_spark(spark1_service_name) utils.require_spark(spark2_service_name) dispatcher1_ui_ip = sdk_hosts.vip_host("marathon", "dispatcher.{}".format(spark1_service_name), 4040) dispatcher2_ui_ip = sdk_hosts.vip_host("marathon", "dispatcher.{}".format(spark2_service_name), 4040) verify_ip_is_reachable(dispatcher1_ui_ip) verify_ip_is_reachable(dispatcher2_ui_ip) finally: utils.teardown_spark(service_name=spark1_service_name) utils.teardown_spark(service_name=spark2_service_name)
def test_unique_vips(): spark1_service_name = "test/groupa/spark" spark2_service_name = "test/groupb/spark" try: utils.require_spark(spark1_service_name) utils.require_spark(spark2_service_name) dispatcher1_ui = sdk_hosts.vip_host( "marathon", "dispatcher.{}".format(spark1_service_name), 4040) dispatcher2_ui = sdk_hosts.vip_host( "marathon", "dispatcher.{}".format(spark2_service_name), 4040) # verify dispatcher-ui is reachable at VIP ok, _ = sdk_cmd.master_ssh("curl {}".format(dispatcher1_ui)) assert ok ok, _ = sdk_cmd.master_ssh("curl {}".format(dispatcher2_ui)) assert ok finally: sdk_install.uninstall(utils.SPARK_PACKAGE_NAME, spark1_service_name) sdk_install.uninstall(utils.SPARK_PACKAGE_NAME, spark2_service_name)
def kerberized_spark(setup_history_server, hdfs_with_kerberos, kerberos_options, configure_security_spark, configure_universe): try: additional_options = { "hdfs": { "config-url": "http://api.{}.marathon.l4lb.thisdcos.directory/v1/endpoints". format(HDFS_SERVICE_NAME) }, "security": kerberos_options, "service": { "spark-history-server-url": shakedown.dcos_url_path( "/service/{}".format(HISTORY_SERVICE_NAME)) } } utils.require_spark(additional_options=additional_options) yield finally: utils.teardown_spark()
def test_task_stdout(): service_name = utils.FOLDERED_SPARK_SERVICE_NAME try: task_id = service_name.lstrip("/").replace("/", "_") utils.require_spark(service_name=service_name) task = sdk_cmd._get_task_info(task_id) if not task: raise Exception("Failed to get '{}' task".format(task_id)) task_sandbox_path = sdk_cmd.get_task_sandbox_path(task_id) if not task_sandbox_path: raise Exception("Failed to get '{}' sandbox path".format(task_id)) agent_id = task["slave_id"] task_sandbox = sdk_cmd.cluster_request( "GET", "/slave/{}/files/browse?path={}".format(agent_id, task_sandbox_path) ).json() stdout_file = [f for f in task_sandbox if f["path"].endswith("/stdout")][0] assert stdout_file["size"] > 0, "stdout file should have content" finally: sdk_install.uninstall(utils.SPARK_PACKAGE_NAME, service_name)
def setup_spark(configure_universe, configure_role_permissions, service_name, role, enforce_role): log.info( "Installing Spark: service_name='{}', role='{}', enforce_role='{}'". format(service_name, role, enforce_role)) options = { "service": { "name": service_name, "role": role, "enforce_role": enforce_role } } dispatcher_zk = get_zk_dispatcher(service_name) try: utils.upload_dcos_test_jar() utils.require_spark(service_name=service_name, additional_options=options, zk=dispatcher_zk) yield finally: utils.teardown_spark(service_name=service_name, zk=dispatcher_zk)
def setup_module(module): sdk_cmd.run_raw_cli("package install {} --yes --cli".format( utils.SPARK_PACKAGE_NAME)) if not shakedown.package_installed('spark', SOAK_SPARK_SERVICE_NAME): additional_options = { "hdfs": { "config-url": "http://api.{}.marathon.l4lb.thisdcos.directory/v1/endpoints". format(SOAK_HDFS_SERVICE_NAME) }, "security": { "kerberos": { "enabled": True, "realm": "LOCAL", "kdc": { "hostname": "kdc.marathon.autoip.dcos.thisdcos.directory", "port": 2500 } } } } utils.require_spark(service_name=SOAK_SPARK_SERVICE_NAME, additional_options=additional_options)
def setup_module(module): utils.require_spark(service_name=SOAK_SPARK_APP_NAME, use_hdfs=True)
def setup_module(module): utils.upload_dcos_test_jar() utils.require_spark()
def test_marathon_group(): app_id = utils.FOLDERED_SPARK_APP_NAME utils.require_spark(service_name=app_id, marathon_group=app_id) test_sparkPi(app_name=app_id) LOGGER.info("Uninstalling app_id={}".format(app_id))
def setup_module(module): utils.require_spark()