コード例 #1
0
def test_run_batch_verify_in_spark_failed(dag, mocker):
    op = LivyBatchOperator(
        verify_in="spark",
        spill_logs=False,
        task_id="test_run_batch_verify_in_spark_failed",
        dag=dag,
    )
    spill_logs_spy = mocker.spy(op, "spill_batch_logs")
    mock_livy_batch_responses(
        mocker,
        mock_spark=[
            MockedResponse(
                200,
                json_body=[
                    {
                        "jobId": 1,
                        "status": "SUCCEEDED"
                    },
                    {
                        "jobId": 2,
                        "status": "FAILED"
                    },
                ],
            )
        ],
    )
    with raises(AirflowException) as ae:
        op.execute({})
    print(
        f"\n\nImitated failed Spark job, then checked status via Spark REST API, "
        f"got the expected exception:\n<{ae.value}>")
    # spill_logs=True, and Operator had the batch_id by the time error occured.
    spill_logs_spy.assert_called_once()
コード例 #2
0
def test_submit_batch_malformed_json(dag, mocker):
    op = LivyBatchOperator(task_id="test_submit_batch_malformed_json", dag=dag)
    http_response = mock_http_calls(201, content=b'{"id":{}')
    mocker.patch.object(HttpHook, "get_conn", return_value=http_response)
    with raises(AirflowBadRequest) as bre:
        op.submit_batch()
    print(f"\n\nImitated malformed JSON response when submitting a batch, "
          f"got the expected exception:\n<{bre.value}>")
コード例 #3
0
def test_submit_batch_string_id(dag, mocker):
    op = LivyBatchOperator(task_id="test_submit_batch_string_id", dag=dag)
    http_response = mock_http_calls(
        201, content=b'{"id":"unexpectedly, a string!"}')
    mocker.patch.object(HttpHook, "get_conn", return_value=http_response)
    with raises(AirflowException) as ae:
        op.submit_batch()
    print(f"\n\nImitated server returning a string for a batch ID, "
          f"got the expected exception:\n<{ae.value}>")
コード例 #4
0
def test_run_batch_logs_greater_than_page_size(dag, mocker):
    op = LivyBatchOperator(
        spill_logs=True,
        task_id="test_run_batch_logs_greater_than_page_size",
        dag=dag,
    )
    fetch_log_page_spy = mocker.spy(op, "fetch_log_page")
    mock_livy_batch_responses(mocker, log_lines=321)
    op.execute({})
    assert fetch_log_page_spy.call_count == 4
コード例 #5
0
def test_run_batch_logs_one_page_size(dag, mocker):
    op = LivyBatchOperator(
        spill_logs=True,
        task_id="test_run_batch_logs_one_page_size",
        dag=dag,
    )
    fetch_log_page_spy = mocker.spy(op, "fetch_log_page")
    mock_livy_batch_responses(mocker, log_lines=100)
    op.execute({})
    fetch_log_page_spy.assert_called_once()
コード例 #6
0
def test_submit_batch_bad_response_codes(dag, mocker, code):
    op = LivyBatchOperator(
        task_id=f"test_submit_batch_bad_response_codes_{code}", dag=dag)
    http_response = mock_http_calls(code,
                                    content=b"Error content",
                                    reason="Good reason")
    mocker.patch.object(HttpHook, "get_conn", return_value=http_response)
    with raises(AirflowException) as ae:
        op.submit_batch()
    print(f"\n\nImitated the {code} error response when submitting a batch, "
          f"got the expected exception:\n<{ae.value}>")
コード例 #7
0
def test_run_batch_verify_in_spark(dag, mocker):
    op = LivyBatchOperator(
        verify_in="spark",
        spill_logs=False,
        task_id="test_run_batch_verify_in_spark",
        dag=dag,
    )
    spark_checker_spy = mocker.spy(op, "check_spark_app_status")
    mock_livy_batch_responses(mocker)
    op.execute({})
    spark_checker_spy.assert_called_once()
コード例 #8
0
def test_run_batch_logs_malformed_json(dag, mocker):
    op = LivyBatchOperator(
        spill_logs=True,
        task_id="test_run_batch_logs_greater_than_page_size",
        dag=dag,
    )
    mock_livy_batch_responses(mocker,
                              log_override_response='{"invalid":json]}')
    with raises(AirflowException) as ae:
        op.execute({})
    print(f"\n\nImitated malformed response when calling /logs , "
          f"got the expected exception:\n<{ae.value}>")
コード例 #9
0
def test_run_batch_logs_missing_attrs_in_json(dag, mocker):
    op = LivyBatchOperator(
        spill_logs=True,
        task_id="test_run_batch_logs_missing_attrs_in_json",
        dag=dag,
    )
    mock_livy_batch_responses(mocker,
                              log_override_response='{"id": 1, "from": 2}')
    with raises(AirflowException) as ae:
        op.execute({})
    print(f"\n\nImitated missing attributes when calling /logs , "
          f"got the expected exception:\n<{ae.value}>")
コード例 #10
0
def test_jinja(dag):
    op = LivyBatchOperator(
        name="test_jinja_{{ run_id }}",
        arguments=[
            "{{ run_id|replace(':', '-') }}",
            "prefix {{ custom_param }} postfix",
        ],
        task_id="test_jinja",
        dag=dag,
    )
    op.render_template_fields({"run_id": "hello:world", "custom_param": "custom value"})
    assert op.name == "test_jinja_hello:world"
    assert op.arguments[0] == "hello-world"
    assert op.arguments[1] == "prefix custom value postfix"
コード例 #11
0
def test_run_batch_verify_in_spark_garbled(dag, mocker):
    op = LivyBatchOperator(
        verify_in="spark",
        spill_logs=False,
        task_id="test_run_batch_verify_in_spark_garbled",
        dag=dag,
    )
    spill_logs_spy = mocker.spy(op, "spill_batch_logs")
    mock_livy_batch_responses(
        mocker,
        mock_spark=[MockedResponse(200, json_body={"unparseable": "obj"})],
    )
    with raises(AirflowException) as ae:
        op.execute({})
    print(f"\n\nImitated garbled output from Spark REST API, "
          f"got the expected exception:\n<{ae.value}>")
    spill_logs_spy.assert_called_once()
コード例 #12
0
def test_run_batch_error_before_batch_created(dag, mocker):
    op = LivyBatchOperator(
        spill_logs=True,
        task_id="test_run_batch_error_before_batch_created",
        dag=dag,
    )
    spill_logs_spy = mocker.spy(op, "spill_batch_logs")
    mocker.patch.object(
        BaseHook,
        "_get_connections_from_db",
        return_value=[Connection(host="HOST", port=123)],
    )
    with raises(requests.exceptions.ConnectionError) as ae:
        op.execute({})
    print(f"\n\nNo response from server was mocked, "
          f"got the expected exception:\n<{ae.value}>")
    # Even though we set spill_logs to True, Operator doesn't have a batch_id yet.
    spill_logs_spy.assert_not_called()
コード例 #13
0
def test_invalid_verification(dag):
    with raises(AirflowException) as ae:
        LivyBatchOperator(
            task_id="test_invalid_verification", verify_in="invalid", dag=dag,
        )
    print(
        f"\n\nCreated a batch operator with invalid veification method, "
        f"got the expected exception:\n<{ae.value}>"
    )
コード例 #14
0
def test_run_batch_verify_in_yarn_failed(dag, mocker):
    op = LivyBatchOperator(
        verify_in="yarn",
        spill_logs=False,
        task_id="test_run_batch_verify_in_yarn_failed",
        dag=dag,
    )
    spill_logs_spy = mocker.spy(op, "spill_batch_logs")
    mock_livy_batch_responses(
        mocker,
        mock_yarn=[MockedResponse(200, json_body={"app": {"finalStatus": "NOTGOOD"}})],
    )
    with raises(AirflowException) as ae:
        op.execute({})
    print(
        f"\n\nImitated failed status from YARN REST API, "
        f"got the expected exception:\n<{ae.value}>"
    )
    spill_logs_spy.assert_called_once()
コード例 #15
0
def test_run_batch_verify_in_yarn_garbled_response(dag, mocker):
    op = LivyBatchOperator(
        verify_in="yarn",
        spill_logs=False,
        task_id="test_run_batch_verify_in_spark",
        dag=dag,
    )
    spill_logs_spy = mocker.spy(op, "spill_batch_logs")
    mock_livy_batch_responses(
        mocker,
        mock_yarn=[
            MockedResponse(200, body="<!DOCTYPE html><html>notjson</html>")
        ],
    )
    with raises(AirflowException) as ae:
        op.execute({})
    print(f"\n\nImitated garbled output from YARN REST API, "
          f"got the expected exception:\n<{ae.value}>")
    spill_logs_spy.assert_called_once()
コード例 #16
0
def test_run_batch_no_appid(dag, mocker):
    op = LivyBatchOperator(
        verify_in="spark",
        spill_logs=False,
        task_id="test_run_batch_no_appid",
        dag=dag,
    )
    spill_logs_spy = mocker.spy(op, "spill_batch_logs")
    mock_livy_batch_responses(
        mocker,
        mock_get=[MockedResponse(200, json_body={"state": "success", "appId": None})],
    )
    with raises(AirflowException) as ae:
        op.execute({})
    print(
        f"\n\nImitated null Spark appId, then checked status via Spark REST API, "
        f"got the expected exception:\n<{ae.value}>"
    )
    # spill_logs=True, and Operator had the batch_id by the time error occured.
    spill_logs_spy.assert_called_once()

    mock_livy_batch_responses(
        mocker,
        mock_get=[
            MockedResponse(200, json_body={"state": "success", "noAppId": "here"})
        ],
    )
    with raises(AirflowException) as ae:
        op.execute({})
    print(
        f"\n\nImitated no key for Spark appId, then checked status via Spark REST API, "
        f"got the expected exception:\n<{ae.value}>"
    )
    assert spill_logs_spy.call_count == 2
コード例 #17
0
def test_run_batch_successfully(dag, mocker):
    op = LivyBatchOperator(spill_logs=False,
                           task_id="test_run_batch_successfully",
                           dag=dag)
    spill_logs_spy = mocker.spy(op, "spill_batch_logs")
    submit_batch_spy = mocker.spy(op, "submit_batch")
    mock_livy_batch_responses(mocker)
    op.execute({})

    submit_batch_spy.assert_called_once()
    # spill_logs is False and batch completed successfully, so we don't expect logs.
    spill_logs_spy.assert_not_called()
    op.spill_logs = True
    op.execute({})

    # We set spill_logs to True this time, therefore expecting logs.
    spill_logs_spy.assert_called_once()
コード例 #18
0
def test_run_batch_error_during_status_probing(dag, mocker, code):
    op = LivyBatchOperator(
        spill_logs=True,
        task_id="test_run_batch_error_during_status_probing",
        dag=dag,
    )
    spill_logs_spy = mocker.spy(op, "spill_batch_logs")
    mock_livy_batch_responses(
        mocker,
        mock_get=[MockedResponse(code, body=f"Response from server:{code}")])
    with raises(AirflowException) as ae:
        op.execute({})
    print(
        f"\n\nImitated {code} response from server during batch status probing , "
        f"got the expected exception:\n<{ae.value}>")
    # spill_logs=True, and Operator had the batch_id by the time error occured.
    spill_logs_spy.assert_called_once()
    op.spill_logs = False
    with raises(AirflowException):
        op.execute({})
    # spill_logs=False, but error occured and Operator had the batch_id.
    assert spill_logs_spy.call_count == 2
コード例 #19
0
def test_submit_batch_params(dag, mocker):
    http_conn_id_yarn = "http_conn_id_yarn"
    http_conn_id_spark = "http_conn_id_spark"
    http_conn_id_livy = "http_conn_id_livy"
    timeout_minutes = 4
    poll_period_sec = 5
    verify_in = "yarn"
    op = LivyBatchOperator(
        file="file",
        proxy_user="******",
        class_name="class_name",
        arguments=["arg1", "arg2"],
        jars=["jar1", "jar2"],
        py_files=["py_file1", "py_file2"],
        files=["file1", "file2"],
        driver_memory="driver_memory",
        driver_cores=1,
        executor_memory="executor_memory",
        executor_cores=2,
        num_executors=3,
        archives=["archive1", "archive2"],
        queue="queue",
        name="name",
        conf={"key1": "val1", "key2": 2},
        timeout_minutes=timeout_minutes,
        poll_period_sec=poll_period_sec,
        verify_in=verify_in,
        http_conn_id_livy=http_conn_id_livy,
        http_conn_id_spark=http_conn_id_spark,
        http_conn_id_yarn=http_conn_id_yarn,
        task_id="test_submit_batch_params",
        dag=dag,
    )
    mock_response = Response()
    mock_response._content = b'{"id": 1}'
    patched_hook = mocker.patch.object(HttpHook, "run", return_value=mock_response)

    op.submit_batch()

    assert op.timeout_minutes == timeout_minutes
    assert op.poll_period_sec == poll_period_sec
    assert op.verify_in == verify_in
    assert op.http_conn_id_livy == http_conn_id_livy
    assert op.http_conn_id_spark == http_conn_id_spark
    assert op.http_conn_id_yarn == http_conn_id_yarn
    expected_json = json.loads(
        """{
      "proxyUser": "******",
      "file": "file",
      "className": "class_name",
      "args": [
        "arg2",
        "arg1"
      ],
      "pyFiles": [
        "py_file1",
        "py_file2"
      ],
      "jars": [
        "jar1",
        "jar2"
      ],
      "files": [
        "file1",
        "file2"
      ],
      "driverMemory": "driver_memory",
      "driverCores": 1,
      "executorMemory": "executor_memory",
      "executorCores": 2,
      "numExecutors": 3,
      "archives": [
        "archive1",
        "archive2"
      ],
      "name": "name",
      "queue": "queue",
      "conf": {
        "key1": "val1",
        "key2": 2
      }
    }"""
    )
    actual_args, actual_kwargs = patched_hook._call_matcher(patched_hook.call_args)
    actual_json = find_json_in_args(actual_args, actual_kwargs)
    if actual_json is None:
        raise AssertionError(
            f"Can not find JSON in HttpHook args.\n"
            f"Args:\n{actual_args}\n"
            f"KWArgs (JSON should be under 'data' key):\n{actual_kwargs}"
        )
    else:
        diff = DeepDiff(actual_json, expected_json, ignore_order=True)
        if diff:
            print(f"\nDifference:\n{json.dumps(diff, indent=2)}")
        assert not diff
コード例 #20
0
def test_submit_batch_get_id(dag, mocker):
    op = LivyBatchOperator(task_id="test_submit_batch_get_id", dag=dag)
    http_response = mock_http_calls(201, content=b'{"id": 123}')
    mocker.patch.object(HttpHook, "get_conn", return_value=http_response)
    op.submit_batch()
    assert op.batch_id == 123