def run_tests(app_url, app_args, expected_output, args=[]): task_id = _submit_job(app_url, app_args, args) LOGGER.info('Waiting for task id={} to complete'.format(task_id)) shakedown.wait_for_task_completion(task_id) log = _task_log(task_id) LOGGER.info("task log: {}".format(log)) assert expected_output in log
def _run_tests(app_url, app_args, expected_output, args={}, config={}): task_id = _submit_job(app_url, app_args, args, config) print('Waiting for task id={} to complete'.format(task_id)) shakedown.wait_for_task_completion(task_id) log = _task_log(task_id) print(log) assert expected_output in log
def check_job_output(task_id, expected_output): LOGGER.info('Waiting for task id={} to complete'.format(task_id)) shakedown.wait_for_task_completion(task_id) stdout = _task_log(task_id) if expected_output not in stdout: stderr = _task_log(task_id, "stderr") LOGGER.error("task stdout: {}".format(stdout)) LOGGER.error("task stderr: {}".format(stderr)) raise Exception("{} not found in stdout".format(expected_output))
def check_job_output(task_id, expected_output): log.info('Waiting for task id={} to complete'.format(task_id)) shakedown.wait_for_task_completion(task_id, timeout_sec=JOB_WAIT_TIMEOUT_SECONDS) stdout = _task_log(task_id) if expected_output not in stdout: stderr = _task_log(task_id, "stderr") log.error("task stdout: {}".format(stdout)) log.error("task stderr: {}".format(stderr)) raise Exception("{} not found in stdout".format(expected_output))
def test_offers_suppressed_with_lost_task(): app_name = "MockTaskRunner" driver_task_id = _launch_test_task(app_name) executor_task = shakedown.get_service_tasks(app_name)[0] executor_ip = sdk_networks.get_task_host(executor_task) utils.restart_task_agent_and_verify_state(executor_ip, executor_task, "TASK_LOST") shakedown.wait_for_task_completion(driver_task_id, timeout_sec=utils.JOB_WAIT_TIMEOUT_SECONDS) _check_logged_offers(driver_task_id, 1)
def run_tests(app_path, app_args, expected_output, app_class=None, py_file_path=None): app_resource_url = upload_file(app_path) if py_file_path is not None: py_file_url = upload_file(py_file_path) else: py_file_url = None task_id = submit_job(app_resource_url, app_args, app_class, py_file_url) print('Waiting for task id={} to complete'.format(task_id)) shakedown.wait_for_task_completion(task_id) log = task_log(task_id) print(log) assert expected_output in log
def test_executor_gpus_exceeds_available_gpus(): """ Checks: if executor.gpus exceeds the available gpus, the job never runs. """ num_executors = 2 executor_gpus = 2 driver_task_id = _submit_gpu_app(num_executors=num_executors, executor_gpus=executor_gpus, gpus_max=num_executors * executor_gpus) try: log.info("Waiting for job to complete.") shakedown.wait_for_task_completion(driver_task_id, timeout_sec=240) except TimeoutExpired: log.info("Job failed to complete, as expected.") spark_utils.kill_driver(driver_task_id, spark_utils.SPARK_APP_NAME) return pytest.fail("Did not expect this job to complete.")
def test_gpus_max(): """ Checks that gpus.max is respected. """ gpus_max = 1 app_name = "{}-{}".format(GPU_PI_APP_NAME, str(uuid.uuid4())) driver_task_id = _submit_gpu_app(num_executors=1, executor_gpus=None, gpus_max=gpus_max, app_name=app_name) log.info("Waiting for job to complete.") shakedown.wait_for_task_completion(driver_task_id) # Check total Executor gpus <= gpus.max service = shakedown.get_service(service_name=app_name, completed=True) executor_tasks = service['completed_tasks'] gpus = [task['resources']['gpus'] for task in executor_tasks] log.info("Task gpus: {}".format(str(gpus))) total_gpus = sum(gpus) log.info("Total gpus allocated: {}".format(str(total_gpus))) # We expect total gpus == gpus.max because gpus are allocated greedily. assert total_gpus == gpus_max
def test_offers_suppressed(): driver_task_id = _launch_test_task("MockTaskRunner") shakedown.wait_for_task_completion(driver_task_id, timeout_sec=utils.JOB_WAIT_TIMEOUT_SECONDS) _check_logged_offers(driver_task_id, 0)