Example #1
0
def test_run_batch_no_appid(dag, mocker):
    op = LivyBatchOperator(
        verify_in="spark",
        spill_logs=False,
        task_id="test_run_batch_no_appid",
        dag=dag,
    )
    spill_logs_spy = mocker.spy(op, "spill_batch_logs")
    mock_livy_batch_responses(
        mocker,
        mock_get=[MockedResponse(200, json_body={"state": "success", "appId": None})],
    )
    with raises(AirflowException) as ae:
        op.execute({})
    print(
        f"\n\nImitated null Spark appId, then checked status via Spark REST API, "
        f"got the expected exception:\n<{ae.value}>"
    )
    # spill_logs=True, and Operator had the batch_id by the time error occured.
    spill_logs_spy.assert_called_once()

    mock_livy_batch_responses(
        mocker,
        mock_get=[
            MockedResponse(200, json_body={"state": "success", "noAppId": "here"})
        ],
    )
    with raises(AirflowException) as ae:
        op.execute({})
    print(
        f"\n\nImitated no key for Spark appId, then checked status via Spark REST API, "
        f"got the expected exception:\n<{ae.value}>"
    )
    assert spill_logs_spy.call_count == 2
def test_run_batch_verify_in_spark_failed(dag, mocker):
    op = LivyBatchOperator(
        verify_in="spark",
        spill_logs=False,
        task_id="test_run_batch_verify_in_spark_failed",
        dag=dag,
    )
    spill_logs_spy = mocker.spy(op, "spill_batch_logs")
    mock_livy_batch_responses(
        mocker,
        mock_spark=[
            MockedResponse(
                200,
                json_body=[
                    {
                        "jobId": 1,
                        "status": "SUCCEEDED"
                    },
                    {
                        "jobId": 2,
                        "status": "FAILED"
                    },
                ],
            )
        ],
    )
    with raises(AirflowException) as ae:
        op.execute({})
    print(
        f"\n\nImitated failed Spark job, then checked status via Spark REST API, "
        f"got the expected exception:\n<{ae.value}>")
    # spill_logs=True, and Operator had the batch_id by the time error occured.
    spill_logs_spy.assert_called_once()
Example #3
0
def test_run_session_error_during_status_probing(dag, mocker, code):
    op = LivySessionOperator(
        statements=[],
        spill_logs=True,
        task_id="test_run_session_error_during_status_probing",
        dag=dag,
    )
    spill_logs_spy = mocker.spy(op, "spill_session_logs")
    mock_livy_session_responses(
        mocker,
        mock_get_session=[
            MockedResponse(code, body=f"Response from server:{code}")
        ],
    )
    with raises(AirflowException) as ae:
        op.execute({})
    print(
        f"\n\nImitated {code} response from server during session creation probing , "
        f"got the expected exception:\n<{ae.value}>")
    # spill_logs=True, and Operator had the session_id by the time error occured.
    spill_logs_spy.assert_called_once()
    op.spill_logs = False
    with raises(AirflowException):
        op.execute({})
    # spill_logs=False, but error occured and Operator had the session_id.
    assert spill_logs_spy.call_count == 2
Example #4
0
def _mock_spark_response(response_list: Iterable[MockedResponse] = None):
    if response_list is None:
        response_list = [
            MockedResponse(
                200,
                json_body=[
                    {
                        "jobId": 1,
                        "status": "SUCCEEDED"
                    },
                    {
                        "jobId": 2,
                        "status": "SUCCEEDED"
                    },
                ],
            )
        ]
    for resp in response_list:
        responses.add(
            responses.GET,
            f"{URI}/api/v1/applications/{APP_ID}/jobs",
            status=resp.status,
            body=resp.body,
            json=resp.json_body,
        )
Example #5
0
def _mock_create_response(response_list: Iterable[MockedResponse] = None):
    if response_list is None:
        response_list = [MockedResponse(201, json_body={"id": BATCH_ID})]
    for resp in response_list:
        responses.add(
            responses.POST,
            f"{URI}/batches",
            status=resp.status,
            body=resp.body,
            json=resp.json_body,
        )
def _mock_post_statement_response(response_list: Iterable[MockedResponse] = None):
    if response_list is None:
        response_list = [MockedResponse(200, json_body={"id": STATEMENT_ID})]
    for resp in response_list:
        responses.add(
            responses.POST,
            f"{URI}/sessions/{SESSION_ID}/statements",
            status=resp.status,
            body=resp.body,
            json=resp.json_body,
        )
def _mock_get_session_response(response_list: Iterable[MockedResponse] = None):
    if response_list is None:
        response_list = [MockedResponse(200, json_body={"state": "idle"})]
    for resp in response_list:
        responses.add(
            responses.GET,
            f"{URI}/sessions/{SESSION_ID}/state",
            status=resp.status,
            body=resp.body,
            json=resp.json_body,
        )
Example #8
0
def _mock_yarn_response(response_list: Iterable[MockedResponse] = None):
    if response_list is None:
        response_list = [
            MockedResponse(200, json_body={"app": {"finalStatus": "SUCCEEDED"}})
        ]
    for resp in response_list:
        responses.add(
            responses.GET,
            f"{URI}/ws/v1/cluster/apps/{APP_ID}",
            status=resp.status,
            body=resp.body,
            json=resp.json_body,
        )
Example #9
0
def _mock_get_response(response_list: Iterable[MockedResponse] = None):
    if response_list is None:
        response_list = [
            MockedResponse(200, json_body={"state": "success", "appId": APP_ID})
        ]
    for resp in response_list:
        responses.add(
            responses.GET,
            f"{URI}/batches/{BATCH_ID}",
            status=resp.status,
            body=resp.body,
            json=resp.json_body,
        )
Example #10
0
def test_run_session_error_when_submitting_statement(dag, mocker, code):
    st1 = LivySessionOperator.Statement(kind="spark", code="x = 1;")
    st2 = LivySessionOperator.Statement(kind="pyspark", code="print 'hi';")
    op = LivySessionOperator(
        statements=[st1, st2],
        spill_logs=True,
        task_id="test_run_session_error_when_submitting_statement",
        dag=dag,
    )
    spill_logs_spy = mocker.spy(op, "spill_session_logs")
    submit_statement_spy = mocker.spy(op, "submit_statement")
    mock_livy_session_responses(
        mocker,
        mock_post_statement=[
            MockedResponse(200, json_body={"id": STATEMENT_ID}),
            MockedResponse(code, body=f"Response from server:{code}"),
            MockedResponse(200, json_body={"no id here": "haha"}),
        ],
    )
    with raises(AirflowException) as ae:
        op.execute({})
    print(
        f"\n\nImitated {code} response from server during second statement submission, "
        f"got the expected exception:\n<{ae.value}>")
    # spill_logs=True, and Operator had the session_id by the time error occured.
    spill_logs_spy.assert_called_once()
    assert submit_statement_spy.call_count == 2
    op.spill_logs = False
    with raises(AirflowException):
        op.execute({})
    print(
        f"\n\nImitated {code} response from server during first statement submission, "
        f"got the expected exception:\n<{ae.value}>")
    # spill_logs=False, but error occured and Operator had the session_id.
    assert spill_logs_spy.call_count == 2
    assert submit_statement_spy.call_count == 3
def _mock_get_statement_response(response_list: Iterable[MockedResponse] = None):
    if response_list is None:
        response_list = [
            MockedResponse(
                200, json_body={"state": "available", "output": {"status": "ok"}}
            )
        ]
    for resp in response_list:
        responses.add(
            responses.GET,
            f"{URI}/sessions/{SESSION_ID}/statements/{STATEMENT_ID}",
            status=resp.status,
            body=resp.body,
            json=resp.json_body,
        )
def test_run_batch_verify_in_spark_garbled(dag, mocker):
    op = LivyBatchOperator(
        verify_in="spark",
        spill_logs=False,
        task_id="test_run_batch_verify_in_spark_garbled",
        dag=dag,
    )
    spill_logs_spy = mocker.spy(op, "spill_batch_logs")
    mock_livy_batch_responses(
        mocker,
        mock_spark=[MockedResponse(200, json_body={"unparseable": "obj"})],
    )
    with raises(AirflowException) as ae:
        op.execute({})
    print(f"\n\nImitated garbled output from Spark REST API, "
          f"got the expected exception:\n<{ae.value}>")
    spill_logs_spy.assert_called_once()
def test_run_batch_verify_in_yarn_garbled_response(dag, mocker):
    op = LivyBatchOperator(
        verify_in="yarn",
        spill_logs=False,
        task_id="test_run_batch_verify_in_spark",
        dag=dag,
    )
    spill_logs_spy = mocker.spy(op, "spill_batch_logs")
    mock_livy_batch_responses(
        mocker,
        mock_yarn=[
            MockedResponse(200, body="<!DOCTYPE html><html>notjson</html>")
        ],
    )
    with raises(AirflowException) as ae:
        op.execute({})
    print(f"\n\nImitated garbled output from YARN REST API, "
          f"got the expected exception:\n<{ae.value}>")
    spill_logs_spy.assert_called_once()
Example #14
0
def test_run_batch_verify_in_yarn_failed(dag, mocker):
    op = LivyBatchOperator(
        verify_in="yarn",
        spill_logs=False,
        task_id="test_run_batch_verify_in_yarn_failed",
        dag=dag,
    )
    spill_logs_spy = mocker.spy(op, "spill_batch_logs")
    mock_livy_batch_responses(
        mocker,
        mock_yarn=[MockedResponse(200, json_body={"app": {"finalStatus": "NOTGOOD"}})],
    )
    with raises(AirflowException) as ae:
        op.execute({})
    print(
        f"\n\nImitated failed status from YARN REST API, "
        f"got the expected exception:\n<{ae.value}>"
    )
    spill_logs_spy.assert_called_once()