def test_cni_labels(): driver_task_id = utils.submit_job( app_url=utils.SPARK_EXAMPLES, app_args= "3000", # Long enough to examine the Driver's & Executor's task infos args=[ "--conf spark.mesos.network.name=dcos", "--conf spark.mesos.network.labels=key1:val1,key2:val2", "--conf spark.cores.max={}".format(CNI_TEST_NUM_EXECUTORS), "--class org.apache.spark.examples.SparkPi" ]) # Wait until executors are running sdk_tasks.check_running(SPARK_PI_FW_NAME, CNI_TEST_NUM_EXECUTORS, timeout_seconds=600) # Check for network name / labels in Driver task info driver_task = shakedown.get_task(driver_task_id, completed=False) _check_task_network_info(driver_task) # Check for network name / labels in Executor task info executor_task = shakedown.get_service_tasks(SPARK_PI_FW_NAME)[0] _check_task_network_info(executor_task) # Check job output utils.check_job_output(driver_task_id, "Pi is roughly 3")
def check_task_network(task_name, expected_network_name="dcos"): """Tests whether a task (and it's parent pod) is on a given network """ _task = shakedown.get_task(task_id=task_name, completed=False) print(str(_task)) assert _task is not None, "Unable to find task named {}".format(task_name) if type(_task) == list or type(_task) == tuple: assert len(_task) == 1, "Found too many tasks matching {}, got {}"\ .format(task_name, _task) _task = _task[0] for status in _task["statuses"]: print("status = " + str(status)) if status["state"] == "TASK_RUNNING": for network_info in status["container_status"]["network_infos"]: if expected_network_name is not None: assert "name" in network_info, \ "Didn't find network name in NetworkInfo for task {task} with " \ "status:{status}".format(task=task_name, status=status) assert network_info["name"] == expected_network_name, \ "Expected network name:{expected} found:{observed}" \ .format(expected=expected_network_name, observed=network_info["name"]) else: assert "name" not in network_info, \ "Task {task} has network name when it shouldn't has status:{status}" \ .format(task=task_name, status=status)
def check_task_network(task_name, expected_network_name="dcos"): """Tests whether a task (and it's parent pod) is on a given network """ _task = shakedown.get_task(task_id=task_name, completed=False) assert _task is not None, "Unable to find task named {}".format(task_name) if type(_task) == list or type(_task) == tuple: assert len(_task) == 1, "Found too many tasks matching {}, got {}"\ .format(task_name, _task) _task = _task[0] for status in _task["statuses"]: if status["state"] == "TASK_RUNNING": for network_info in status["container_status"]["network_infos"]: if expected_network_name is not None: assert "name" in network_info, \ "Didn't find network name in NetworkInfo for task {task} with " \ "status:{status}".format(task=task_name, status=status) assert network_info["name"] == expected_network_name, \ "Expected network name:{expected} found:{observed}" \ .format(expected=expected_network_name, observed=network_info["name"]) else: assert "name" not in network_info, \ "Task {task} has network name when it shouldn't has status:{status}" \ .format(task=task_name, status=status)
def _wait_for_task_status(task_id, expected_state): completed = expected_state != "TASK_RUNNING" task = shakedown.get_task(task_id, completed=completed) assert task is not None log.info( f"Checking task state for '{task_id}', expected: {expected_state}, actual: {task['state']}" ) return expected_state == task["state"]
def _submit_job_and_get_tasks(extra_args=[]): submit_args = [ "--conf spark.driver.cores={}".format(driver_cpus), "--conf spark.cores.max={}".format(executor_cpus), "--conf spark.executor.cores={}".format(executor_cpus), "--class {}".format(app_name) ] + extra_args driver_task_id = utils.submit_job(app_url=utils.dcos_test_jar_url(), app_args="1 600", args=submit_args) sdk_tasks.check_running(app_name, 1, timeout_seconds=300) driver_task = shakedown.get_task(driver_task_id, completed=False) executor_task = shakedown.get_service_tasks(app_name)[0] return (driver_task_id, driver_task, executor_task)
def check_task_network(task_name, on_overlay, expected_network_name="dcos"): _task = shakedown.get_task(task_id=task_name, completed=False) for status in _task["statuses"]: if status["state"] == "TASK_RUNNING": for network_info in status["container_status"][ "network_infos"]: if on_overlay: assert "name" in network_info, \ "Didn't find network name in NetworkInfo for task {task} with "\ "status:{status}".format(task=task_name, status=status) assert network_info["name"] == expected_network_name, \ "Expected network name:{expected} found:{observed}"\ .format(expected=expected_network_name, observed=network_info["name"]) else: assert "name" not in network_info, \ "Task {task} has network name when it shouldn't has status:{status}"\ .format(task=task_name, status=status)
def test_shuffle_job(submit_args=[], use_ucr_for_spark_submit=True, use_cli_for_spark_submit=True, check_network_labels=False): if not use_ucr_for_spark_submit: submit_args = submit_args + [ "--conf spark.mesos.containerizer=docker", "--conf spark.mesos.executor.docker.parameters=user=99", ] driver_task_id = _submit_shuffle_job(use_cli=use_cli_for_spark_submit, sleep=300, extra_args=submit_args) sdk_tasks.check_running(SHUFFLE_JOB_FW_NAME, SHUFFLE_JOB_NUM_EXECUTORS, timeout_seconds=600) driver_task = shakedown.get_task(driver_task_id, completed=False) _check_task_network(driver_task, is_ucr=use_ucr_for_spark_submit) if check_network_labels and use_ucr_for_spark_submit: _check_task_network_labels(driver_task) executor_tasks = shakedown.get_service_tasks(SHUFFLE_JOB_FW_NAME) for task in executor_tasks: _check_task_network(task, is_ucr=use_ucr_for_spark_submit) if check_network_labels and use_ucr_for_spark_submit: _check_task_network_labels(task) try: utils.wait_for_running_job_output( driver_task_id, "Groups count: {}".format(SHUFFLE_JOB_EXPECTED_GROUPS_COUNT)) finally: log.info("Cleaning up. Attempting to kill driver: {}".format( driver_task_id)) utils.kill_driver(driver_task_id, service_name=CNI_DISPATCHER_SERVICE_NAME)
def _submit_job_and_verify_users(user, use_ucr_for_spark_submit, extra_args=[]): app_name = "MockTaskRunner" submit_args = ["--conf spark.cores.max=1", "--class {}".format(app_name)] + extra_args driver_task_id = utils.submit_job(service_name=SERVICE_NAME, app_url=utils.dcos_test_jar_url(), app_args="1 300", args=submit_args) try: sdk_tasks.check_running(app_name, 1, timeout_seconds=300) driver_task = shakedown.get_task(driver_task_id, completed=False) executor_tasks = shakedown.get_service_tasks(app_name) for task in [driver_task] + executor_tasks: log.info(f"Checking task '{task['id']}'") _check_task_user(task, user, use_ucr_for_spark_submit) finally: log.info(f"Cleaning up. Attempting to kill driver: {driver_task_id}") utils.kill_driver(driver_task_id, service_name=SERVICE_NAME)