コード例 #1
0
def test_suite_edit_on_exsiting_suite_one_datasources_with_datasource_arg_and_batch_kwargs(
    mock_webbrowser,
    mock_subprocess,
    caplog,
    monkeypatch,
    titanic_data_context,
):
    """
    Given:
    - the suite foo exists
    - the a datasource bar exists
    - and the users runs this
    great_expectations suite edit foo --datasource bar --batch-kwargs '{"path": "data/10k.csv"}'

    Then:
    - The user gets a working notebook
    - NOT open Data Docs
    - open jupyter
    """
    project_dir = titanic_data_context.root_directory
    context = DataContext(project_dir)
    context.create_expectation_suite("foo")

    runner = CliRunner(mix_stderr=False)
    batch_kwargs = {
        "path": os.path.join(project_dir, "../", "data", "Titanic.csv")
    }
    monkeypatch.chdir(os.path.dirname(context.root_directory))
    result = runner.invoke(
        cli,
        [
            "--v3-api",
            "suite",
            "edit",
            "foo",
            "--batch-kwargs",
            json.dumps(batch_kwargs),
            "--datasource",
            "mydatasource",
        ],
        catch_exceptions=False,
    )
    stdout = result.output
    assert stdout == ""
    assert result.exit_code == 0

    expected_notebook_path = os.path.join(project_dir, "uncommitted",
                                          "edit_foo.ipynb")
    assert os.path.isfile(expected_notebook_path)
    expected_suite_path = os.path.join(project_dir, "expectations", "foo.json")
    assert os.path.isfile(expected_suite_path)

    assert mock_webbrowser.call_count == 0
    assert mock_subprocess.call_count == 1

    assert_no_logging_messages_or_tracebacks(
        my_caplog=caplog,
        click_result=result,
        allowed_deprecation_message=VALIDATION_OPERATORS_DEPRECATION_MESSAGE,
    )
コード例 #2
0
def test_suite_edit_with_non_existent_datasource_shows_helpful_error_message(
    mock_webbrowser, mock_subprocess, caplog, empty_data_context
):
    """
    The command should:
    - exit with a clear error message
    - NOT open Data Docs
    - NOT open jupyter
    """
    project_dir = empty_data_context.root_directory
    context = DataContext(project_dir)
    context.create_expectation_suite("foo")
    assert context.list_expectation_suites()[0].expectation_suite_name == "foo"

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        f"suite edit foo -d {project_dir} --datasource not_real",
        catch_exceptions=False,
    )
    assert result.exit_code == 1
    assert (
        "Unable to load datasource `not_real` -- no configuration found or invalid configuration."
        in result.output
    )

    assert mock_webbrowser.call_count == 0
    assert mock_subprocess.call_count == 0

    assert_no_logging_messages_or_tracebacks(caplog, result)
コード例 #3
0
def test_suite_list_with_multiple_suites(caplog, monkeypatch,
                                         empty_data_context):
    project_dir = empty_data_context.root_directory
    context = DataContext(project_dir)
    context.create_expectation_suite("a.warning")
    context.create_expectation_suite("b.warning")
    context.create_expectation_suite("c.warning")

    config_file_path = os.path.join(project_dir, "great_expectations.yml")
    assert os.path.exists(config_file_path)

    runner = CliRunner(mix_stderr=False)

    monkeypatch.chdir(os.path.dirname(context.root_directory))
    result = runner.invoke(
        cli,
        f"--v3-api suite list",
        catch_exceptions=False,
    )
    output = result.output
    assert result.exit_code == 0
    assert "3 Expectation Suites found:" in output
    assert "a.warning" in output
    assert "b.warning" in output
    assert "c.warning" in output

    assert_no_logging_messages_or_tracebacks(
        my_caplog=caplog,
        click_result=result,
    )
コード例 #4
0
def test_suite_edit_with_batch_kwargs_unable_to_load_a_batch_raises_helpful_error(
    mock_webbrowser, mock_subprocess, caplog, empty_data_context
):
    """
    The command should:
    - exit with a clear error message
    - NOT open Data Docs
    - NOT open jupyter
    """
    project_dir = empty_data_context.root_directory

    context = DataContext(project_dir)
    context.create_expectation_suite("foo")
    context.add_datasource("source", class_name="PandasDatasource")

    runner = CliRunner(mix_stderr=False)
    batch_kwargs = '{"table": "fake", "datasource": "source"}'
    result = runner.invoke(
        cli,
        ["suite", "edit", "foo", "-d", project_dir, "--batch-kwargs", batch_kwargs],
        catch_exceptions=False,
    )
    stdout = result.output
    assert result.exit_code == 1
    assert "To continue editing this suite" not in stdout
    assert "Please check that your batch_kwargs are able to load a batch." in stdout

    assert mock_webbrowser.call_count == 0
    assert mock_subprocess.call_count == 0

    assert_no_logging_messages_or_tracebacks(caplog, result)
コード例 #5
0
def test_suite_list_with_multiple_suites(
        caplog, empty_data_context
):
    project_dir = empty_data_context.root_directory
    context = DataContext(project_dir)
    context.create_expectation_suite("a.warning")
    context.create_expectation_suite("b.warning")
    context.create_expectation_suite("c.warning")


    runner = CliRunner(mix_stderr=False)

    result = runner.invoke(
    cli,
        "suite list -d {}".format(project_dir),
        catch_exceptions=False,
    )
    output = result.output
    print(output)
    assert result.exit_code == 0
    assert "3 expectation suites available:" in output
    assert "\ta.warning" in output
    assert "\tb.warning" in output
    assert "\tc.warning" in output


    assert_no_logging_messages_or_tracebacks(caplog, result)
コード例 #6
0
def test_suite_edit_with_invalid_json_batch_kwargs_raises_helpful_error(
    mock_webbrowser, mock_subprocess, caplog, empty_data_context
):
    """
    The command should:
    - exit with a clear error message
    - NOT open Data Docs
    - NOT open jupyter
    """
    project_dir = empty_data_context.root_directory
    context = DataContext(project_dir)
    context.create_expectation_suite("foo")

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["suite", "edit", "foo", "-d", project_dir, "--batch-kwargs", "'{foobar}'"],
        catch_exceptions=False,
    )
    stdout = result.output
    assert result.exit_code == 1
    assert "Please check that your batch_kwargs are valid JSON." in stdout

    assert mock_webbrowser.call_count == 0
    assert mock_subprocess.call_count == 0

    assert_no_logging_messages_or_tracebacks(caplog, result)
コード例 #7
0
def test_validation_operator_list_with_two_operators(caplog, empty_data_context):
    project_dir = empty_data_context.root_directory
    context = DataContext(project_dir)
    context.create_expectation_suite("a.warning")
    runner = CliRunner(mix_stderr=False)

    result = runner.invoke(
        cli, "validation-operator list -d {}".format(project_dir), catch_exceptions=False,
    )
    assert result.exit_code == 0
    assert "2 validation operators" in result.output
コード例 #8
0
def test_suite_edit_on_exsiting_suite_one_datasources_with_batch_kwargs_without_datasource_raises_helpful_error(
    mock_webbrowser,
    mock_subprocess,
    caplog,
    monkeypatch,
    titanic_data_context,
):
    """
    Given:
    - the suite foo exists
    - the a datasource exists
    - and the users runs this
    great_expectations suite edit foo --batch-kwargs '{"path": "data/10k.csv"}'

    Then:
    - The user should see a nice error and the program halts before notebook
    compilation.
    - NOT open Data Docs
    - NOT open jupyter
    '"""
    project_dir = titanic_data_context.root_directory
    context = DataContext(project_dir)
    context.create_expectation_suite("foo")

    runner = CliRunner(mix_stderr=False)
    batch_kwargs = {"path": "../data/Titanic.csv"}
    monkeypatch.chdir(os.path.dirname(context.root_directory))
    result = runner.invoke(
        cli,
        [
            "--v3-api",
            "suite",
            "edit",
            "foo",
            "--batch-kwargs",
            json.dumps(batch_kwargs),
        ],
        catch_exceptions=False,
    )
    stdout = result.output
    assert result.exit_code == 1
    assert "Please check that your batch_kwargs are able to load a batch." in stdout
    assert "Unable to load datasource `None`" in stdout

    assert mock_webbrowser.call_count == 0
    assert mock_subprocess.call_count == 0

    assert_no_logging_messages_or_tracebacks(
        my_caplog=caplog,
        click_result=result,
        allowed_deprecation_message=VALIDATION_OPERATORS_DEPRECATION_MESSAGE,
    )
コード例 #9
0
def test_suite_list_with_one_suite(caplog, empty_data_context):
    project_dir = empty_data_context.root_directory
    context = DataContext(project_dir)
    context.create_expectation_suite("a.warning")
    runner = CliRunner(mix_stderr=False)

    result = runner.invoke(
        cli, "suite list -d {}".format(project_dir), catch_exceptions=False,
    )
    assert result.exit_code == 0
    assert "1 Expectation Suite found" in result.output
    assert "a.warning" in result.output
    assert_no_logging_messages_or_tracebacks(caplog, result)
コード例 #10
0
def test_suite_edit_on_exsiting_suite_one_datasources_with_datasource_arg_and_batch_kwargs(
    caplog,
    titanic_data_context,
):
    """
    Given:
    - the suite foo exists
    - the a datasource bar exists
    - and the users runs this
    great_expectations suite edit foo --datasource bar --batch-kwargs '{"path": "data/10k.csv"}'

    Then:
    - The user gets a working notebook
    """
    project_dir = titanic_data_context.root_directory
    context = DataContext(project_dir)
    context.create_expectation_suite("foo")

    runner = CliRunner(mix_stderr=False)
    batch_kwargs = {
        "path": os.path.join(project_dir, "../", "data", "Titanic.csv")
    }
    result = runner.invoke(
        cli,
        [
            "suite",
            "edit",
            "foo",
            "-d",
            project_dir,
            "--batch-kwargs",
            json.dumps(batch_kwargs),
            "--datasource",
            "mydatasource",
            "--no-jupyter",
        ],
        catch_exceptions=False,
    )
    stdout = result.output
    assert result.exit_code == 0
    assert "To continue editing this suite, run" in stdout

    expected_notebook_path = os.path.join(project_dir, "uncommitted",
                                          "foo.ipynb")
    assert os.path.isfile(expected_notebook_path)
    expected_suite_path = os.path.join(project_dir, "expectations", "foo.json")
    assert os.path.isfile(expected_suite_path)
    assert_no_logging_messages_or_tracebacks(caplog, result)
コード例 #11
0
def test_suite_delete_with_one_suite(
    mock_emit, caplog, empty_data_context_stats_enabled
):
    project_dir = empty_data_context_stats_enabled.root_directory
    context = DataContext(project_dir)
    suite = context.create_expectation_suite("a.warning")
    context.save_expectation_suite(suite)
    mock_emit.reset_mock()

    suite_dir = os.path.join(project_dir, "expectations", "a")
    suite_path = os.path.join(suite_dir, "warning.json")
    assert os.path.isfile(suite_path)

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli, "suite delete a.warning -d {}".format(project_dir), catch_exceptions=False,
    )
    assert result.exit_code == 0
    assert "Deleted the expectation suite named: a.warning" in result.output

    # assert not os.path.isdir(suite_dir)
    assert not os.path.isfile(suite_path)

    assert mock_emit.call_count == 2
    assert mock_emit.call_args_list == [
        mock.call(
            {"event_payload": {}, "event": "data_context.__init__", "success": True}
        ),
        mock.call({"event": "cli.suite.delete", "event_payload": {}, "success": True}),
    ]

    assert_no_logging_messages_or_tracebacks(caplog, result)
コード例 #12
0
def create_empty_suite(context: DataContext, expectation_suite_name: str,
                       batch_kwargs) -> None:
    suite = context.create_expectation_suite(expectation_suite_name,
                                             overwrite_existing=False)
    suite.add_citation(comment="New suite added via CLI",
                       batch_kwargs=batch_kwargs)
    context.save_expectation_suite(suite, expectation_suite_name)
コード例 #13
0
def test_suite_edit_with_non_existent_datasource_shows_helpful_error_message(
        caplog, empty_data_context):
    project_dir = empty_data_context.root_directory
    context = DataContext(project_dir)
    context.create_expectation_suite("foo")
    assert context.list_expectation_suites()[0].expectation_suite_name == "foo"

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        "suite edit foo -d {} --datasource not_real".format(project_dir),
        catch_exceptions=False,
    )
    assert result.exit_code == 1
    assert (
        "Unable to load datasource `not_real` -- no configuration found or invalid configuration."
        in result.output)
    assert_no_logging_messages_or_tracebacks(caplog, result)
コード例 #14
0
def test_suite_edit_with_invalid_json_batch_kwargs_raises_helpful_error(
    caplog, empty_data_context
):
    project_dir = empty_data_context.root_directory
    context = DataContext(project_dir)
    context.create_expectation_suite("foo")

    runner = CliRunner(mix_stderr=False)
    result = runner.invoke(
        cli,
        ["suite", "edit", "foo", "-d", project_dir, "--batch-kwargs", "'{foobar}'"],
        catch_exceptions=False,
    )
    stdout = result.output
    assert result.exit_code == 1
    assert "Please check that your batch_kwargs are valid JSON." in stdout
    # assert "Expecting value" in stdout # The exact message in the exception varies in Py 2, Py 3
    assert_no_logging_messages_or_tracebacks(caplog, result)
コード例 #15
0
def test_suite_edit_on_exsiting_suite_one_datasources_with_batch_kwargs_without_datasource_raises_helpful_error(
    caplog,
    titanic_data_context,
):
    """
    Given:
    - the suite foo exists
    - the a datasource exists
    - and the users runs this
    great_expectations suite edit foo --batch-kwargs '{"path": "data/10k.csv"}'

    Then:
    - The user should see a nice error and the program halts before notebook compilation.
    '"""
    project_dir = titanic_data_context.root_directory
    context = DataContext(project_dir)
    context.create_expectation_suite("foo")

    runner = CliRunner(mix_stderr=False)
    batch_kwargs = {"path": "../data/Titanic.csv"}
    result = runner.invoke(
        cli,
        [
            "suite",
            "edit",
            "foo",
            "-d",
            project_dir,
            "--batch-kwargs",
            json.dumps(batch_kwargs),
        ],
        catch_exceptions=False,
    )
    stdout = result.output
    assert result.exit_code == 1
    assert "Please check that your batch_kwargs are able to load a batch." in stdout
    assert "Unable to load datasource `None`" in stdout
    assert_no_logging_messages_or_tracebacks(caplog, result)
コード例 #16
0
def create_empty_suite(context: DataContext, expectation_suite_name: str,
                       batch_kwargs) -> None:
    cli_message("""
Great Expectations will create a new Expectation Suite '{0:s}' and store it here:

  {1:s}
""".format(
        expectation_suite_name,
        context.stores[
            context.expectations_store_name].store_backend.get_url_for_key(
                ExpectationSuiteIdentifier(
                    expectation_suite_name=expectation_suite_name).to_tuple()),
    ))
    suite = context.create_expectation_suite(expectation_suite_name)
    suite.add_citation(comment="New suite added via CLI",
                       batch_kwargs=batch_kwargs)
    context.save_expectation_suite(suite, expectation_suite_name)
コード例 #17
0
def test_validation_operator_list_with_one_operator(caplog,
                                                    empty_data_context):
    project_dir = empty_data_context.root_directory
    context = DataContext(project_dir)
    context.create_expectation_suite("a.warning")
コード例 #18
0
def run_onboarding_data_assistant_result_jupyter_notebook_with_new_cell(
    context: DataContext,
    new_cell: str,
    implicit: bool,
):
    """
    To set this test up we:
    - create a suite
    - write code (as a string) for creating an OnboardingDataAssistantResult
    - add a new cell to the notebook that was passed to this method
    - write both cells to ipynb file

    We then:
    - load the notebook back from disk
    - execute the notebook (Note: this will raise various errors like
      CellExecutionError if any cell in the notebook fails)
    """
    root_dir: str = context.root_directory

    expectation_suite_name: str = "test_suite"
    context.create_expectation_suite(
        expectation_suite_name=expectation_suite_name, overwrite_existing=True
    )

    notebook_path: str = os.path.join(root_dir, f"run_onboarding_data_assistant.ipynb")

    notebook_code_initialization: str = """
    from typing import Optional, Union

    import uuid

    import great_expectations as ge
    from great_expectations.data_context import BaseDataContext
    from great_expectations.validator.validator import Validator
    from great_expectations.rule_based_profiler.data_assistant import (
        DataAssistant,
        OnboardingDataAssistant,
    )
    from great_expectations.rule_based_profiler.types.data_assistant_result import DataAssistantResult
    from great_expectations.rule_based_profiler.helpers.util import get_validator_with_expectation_suite
    import great_expectations.exceptions as ge_exceptions

    context = ge.get_context()

    batch_request: dict = {
        "datasource_name": "taxi_pandas",
        "data_connector_name": "monthly",
        "data_asset_name": "my_reports",
    }

    """

    explicit_instantiation_code: str = """
    validator: Validator = get_validator_with_expectation_suite(
        batch_request=batch_request,
        data_context=context,
        expectation_suite_name=None,
        expectation_suite=None,
        component_name="onboarding_data_assistant",
        persist=False,
    )

    data_assistant: DataAssistant = OnboardingDataAssistant(
        name="test_onboarding_data_assistant",
        validator=validator,
    )

    data_assistant_result: DataAssistantResult = data_assistant.run()
    """

    implicit_invocation_code: str = """
    data_assistant_result: DataAssistantResult = context.assistants.onboarding.run(batch_request=batch_request)
    """

    notebook_code: str
    if implicit:
        notebook_code = notebook_code_initialization + implicit_invocation_code
    else:
        notebook_code = notebook_code_initialization + explicit_instantiation_code

    nb = nbformat.v4.new_notebook()
    nb["cells"] = []
    nb["cells"].append(nbformat.v4.new_code_cell(notebook_code))
    nb["cells"].append(nbformat.v4.new_code_cell(new_cell))

    # Write notebook to path and load it as NotebookNode
    with open(notebook_path, "w") as f:
        nbformat.write(nb, f)

    nb: nbformat.notebooknode.NotebookNode = load_notebook_from_path(
        notebook_path=notebook_path
    )

    # Run notebook
    ep: nbconvert.preprocessors.ExecutePreprocessor = (
        nbconvert.preprocessors.ExecutePreprocessor(timeout=180, kernel_name="python3")
    )
    ep.preprocess(nb, {"metadata": {"path": root_dir}})