def test_run_batch_no_appid(dag, mocker): op = LivyBatchOperator( verify_in="spark", spill_logs=False, task_id="test_run_batch_no_appid", dag=dag, ) spill_logs_spy = mocker.spy(op, "spill_batch_logs") mock_livy_batch_responses( mocker, mock_get=[MockedResponse(200, json_body={"state": "success", "appId": None})], ) with raises(AirflowException) as ae: op.execute({}) print( f"\n\nImitated null Spark appId, then checked status via Spark REST API, " f"got the expected exception:\n<{ae.value}>" ) # spill_logs=True, and Operator had the batch_id by the time error occured. spill_logs_spy.assert_called_once() mock_livy_batch_responses( mocker, mock_get=[ MockedResponse(200, json_body={"state": "success", "noAppId": "here"}) ], ) with raises(AirflowException) as ae: op.execute({}) print( f"\n\nImitated no key for Spark appId, then checked status via Spark REST API, " f"got the expected exception:\n<{ae.value}>" ) assert spill_logs_spy.call_count == 2
def test_run_batch_verify_in_spark_failed(dag, mocker): op = LivyBatchOperator( verify_in="spark", spill_logs=False, task_id="test_run_batch_verify_in_spark_failed", dag=dag, ) spill_logs_spy = mocker.spy(op, "spill_batch_logs") mock_livy_batch_responses( mocker, mock_spark=[ MockedResponse( 200, json_body=[ { "jobId": 1, "status": "SUCCEEDED" }, { "jobId": 2, "status": "FAILED" }, ], ) ], ) with raises(AirflowException) as ae: op.execute({}) print( f"\n\nImitated failed Spark job, then checked status via Spark REST API, " f"got the expected exception:\n<{ae.value}>") # spill_logs=True, and Operator had the batch_id by the time error occured. spill_logs_spy.assert_called_once()
def test_run_session_error_during_status_probing(dag, mocker, code): op = LivySessionOperator( statements=[], spill_logs=True, task_id="test_run_session_error_during_status_probing", dag=dag, ) spill_logs_spy = mocker.spy(op, "spill_session_logs") mock_livy_session_responses( mocker, mock_get_session=[ MockedResponse(code, body=f"Response from server:{code}") ], ) with raises(AirflowException) as ae: op.execute({}) print( f"\n\nImitated {code} response from server during session creation probing , " f"got the expected exception:\n<{ae.value}>") # spill_logs=True, and Operator had the session_id by the time error occured. spill_logs_spy.assert_called_once() op.spill_logs = False with raises(AirflowException): op.execute({}) # spill_logs=False, but error occured and Operator had the session_id. assert spill_logs_spy.call_count == 2
def _mock_spark_response(response_list: Iterable[MockedResponse] = None): if response_list is None: response_list = [ MockedResponse( 200, json_body=[ { "jobId": 1, "status": "SUCCEEDED" }, { "jobId": 2, "status": "SUCCEEDED" }, ], ) ] for resp in response_list: responses.add( responses.GET, f"{URI}/api/v1/applications/{APP_ID}/jobs", status=resp.status, body=resp.body, json=resp.json_body, )
def _mock_create_response(response_list: Iterable[MockedResponse] = None): if response_list is None: response_list = [MockedResponse(201, json_body={"id": BATCH_ID})] for resp in response_list: responses.add( responses.POST, f"{URI}/batches", status=resp.status, body=resp.body, json=resp.json_body, )
def _mock_post_statement_response(response_list: Iterable[MockedResponse] = None): if response_list is None: response_list = [MockedResponse(200, json_body={"id": STATEMENT_ID})] for resp in response_list: responses.add( responses.POST, f"{URI}/sessions/{SESSION_ID}/statements", status=resp.status, body=resp.body, json=resp.json_body, )
def _mock_get_session_response(response_list: Iterable[MockedResponse] = None): if response_list is None: response_list = [MockedResponse(200, json_body={"state": "idle"})] for resp in response_list: responses.add( responses.GET, f"{URI}/sessions/{SESSION_ID}/state", status=resp.status, body=resp.body, json=resp.json_body, )
def _mock_yarn_response(response_list: Iterable[MockedResponse] = None): if response_list is None: response_list = [ MockedResponse(200, json_body={"app": {"finalStatus": "SUCCEEDED"}}) ] for resp in response_list: responses.add( responses.GET, f"{URI}/ws/v1/cluster/apps/{APP_ID}", status=resp.status, body=resp.body, json=resp.json_body, )
def _mock_get_response(response_list: Iterable[MockedResponse] = None): if response_list is None: response_list = [ MockedResponse(200, json_body={"state": "success", "appId": APP_ID}) ] for resp in response_list: responses.add( responses.GET, f"{URI}/batches/{BATCH_ID}", status=resp.status, body=resp.body, json=resp.json_body, )
def test_run_session_error_when_submitting_statement(dag, mocker, code): st1 = LivySessionOperator.Statement(kind="spark", code="x = 1;") st2 = LivySessionOperator.Statement(kind="pyspark", code="print 'hi';") op = LivySessionOperator( statements=[st1, st2], spill_logs=True, task_id="test_run_session_error_when_submitting_statement", dag=dag, ) spill_logs_spy = mocker.spy(op, "spill_session_logs") submit_statement_spy = mocker.spy(op, "submit_statement") mock_livy_session_responses( mocker, mock_post_statement=[ MockedResponse(200, json_body={"id": STATEMENT_ID}), MockedResponse(code, body=f"Response from server:{code}"), MockedResponse(200, json_body={"no id here": "haha"}), ], ) with raises(AirflowException) as ae: op.execute({}) print( f"\n\nImitated {code} response from server during second statement submission, " f"got the expected exception:\n<{ae.value}>") # spill_logs=True, and Operator had the session_id by the time error occured. spill_logs_spy.assert_called_once() assert submit_statement_spy.call_count == 2 op.spill_logs = False with raises(AirflowException): op.execute({}) print( f"\n\nImitated {code} response from server during first statement submission, " f"got the expected exception:\n<{ae.value}>") # spill_logs=False, but error occured and Operator had the session_id. assert spill_logs_spy.call_count == 2 assert submit_statement_spy.call_count == 3
def _mock_get_statement_response(response_list: Iterable[MockedResponse] = None): if response_list is None: response_list = [ MockedResponse( 200, json_body={"state": "available", "output": {"status": "ok"}} ) ] for resp in response_list: responses.add( responses.GET, f"{URI}/sessions/{SESSION_ID}/statements/{STATEMENT_ID}", status=resp.status, body=resp.body, json=resp.json_body, )
def test_run_batch_verify_in_spark_garbled(dag, mocker): op = LivyBatchOperator( verify_in="spark", spill_logs=False, task_id="test_run_batch_verify_in_spark_garbled", dag=dag, ) spill_logs_spy = mocker.spy(op, "spill_batch_logs") mock_livy_batch_responses( mocker, mock_spark=[MockedResponse(200, json_body={"unparseable": "obj"})], ) with raises(AirflowException) as ae: op.execute({}) print(f"\n\nImitated garbled output from Spark REST API, " f"got the expected exception:\n<{ae.value}>") spill_logs_spy.assert_called_once()
def test_run_batch_verify_in_yarn_garbled_response(dag, mocker): op = LivyBatchOperator( verify_in="yarn", spill_logs=False, task_id="test_run_batch_verify_in_spark", dag=dag, ) spill_logs_spy = mocker.spy(op, "spill_batch_logs") mock_livy_batch_responses( mocker, mock_yarn=[ MockedResponse(200, body="<!DOCTYPE html><html>notjson</html>") ], ) with raises(AirflowException) as ae: op.execute({}) print(f"\n\nImitated garbled output from YARN REST API, " f"got the expected exception:\n<{ae.value}>") spill_logs_spy.assert_called_once()
def test_run_batch_verify_in_yarn_failed(dag, mocker): op = LivyBatchOperator( verify_in="yarn", spill_logs=False, task_id="test_run_batch_verify_in_yarn_failed", dag=dag, ) spill_logs_spy = mocker.spy(op, "spill_batch_logs") mock_livy_batch_responses( mocker, mock_yarn=[MockedResponse(200, json_body={"app": {"finalStatus": "NOTGOOD"}})], ) with raises(AirflowException) as ae: op.execute({}) print( f"\n\nImitated failed status from YARN REST API, " f"got the expected exception:\n<{ae.value}>" ) spill_logs_spy.assert_called_once()