def test_suite_edit_on_exsiting_suite_one_datasources_with_datasource_arg_and_batch_kwargs( mock_webbrowser, mock_subprocess, caplog, monkeypatch, titanic_data_context, ): """ Given: - the suite foo exists - the a datasource bar exists - and the users runs this great_expectations suite edit foo --datasource bar --batch-kwargs '{"path": "data/10k.csv"}' Then: - The user gets a working notebook - NOT open Data Docs - open jupyter """ project_dir = titanic_data_context.root_directory context = DataContext(project_dir) context.create_expectation_suite("foo") runner = CliRunner(mix_stderr=False) batch_kwargs = { "path": os.path.join(project_dir, "../", "data", "Titanic.csv") } monkeypatch.chdir(os.path.dirname(context.root_directory)) result = runner.invoke( cli, [ "--v3-api", "suite", "edit", "foo", "--batch-kwargs", json.dumps(batch_kwargs), "--datasource", "mydatasource", ], catch_exceptions=False, ) stdout = result.output assert stdout == "" assert result.exit_code == 0 expected_notebook_path = os.path.join(project_dir, "uncommitted", "edit_foo.ipynb") assert os.path.isfile(expected_notebook_path) expected_suite_path = os.path.join(project_dir, "expectations", "foo.json") assert os.path.isfile(expected_suite_path) assert mock_webbrowser.call_count == 0 assert mock_subprocess.call_count == 1 assert_no_logging_messages_or_tracebacks( my_caplog=caplog, click_result=result, allowed_deprecation_message=VALIDATION_OPERATORS_DEPRECATION_MESSAGE, )
def test_suite_edit_with_non_existent_datasource_shows_helpful_error_message( mock_webbrowser, mock_subprocess, caplog, empty_data_context ): """ The command should: - exit with a clear error message - NOT open Data Docs - NOT open jupyter """ project_dir = empty_data_context.root_directory context = DataContext(project_dir) context.create_expectation_suite("foo") assert context.list_expectation_suites()[0].expectation_suite_name == "foo" runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, f"suite edit foo -d {project_dir} --datasource not_real", catch_exceptions=False, ) assert result.exit_code == 1 assert ( "Unable to load datasource `not_real` -- no configuration found or invalid configuration." in result.output ) assert mock_webbrowser.call_count == 0 assert mock_subprocess.call_count == 0 assert_no_logging_messages_or_tracebacks(caplog, result)
def test_suite_list_with_multiple_suites(caplog, monkeypatch, empty_data_context): project_dir = empty_data_context.root_directory context = DataContext(project_dir) context.create_expectation_suite("a.warning") context.create_expectation_suite("b.warning") context.create_expectation_suite("c.warning") config_file_path = os.path.join(project_dir, "great_expectations.yml") assert os.path.exists(config_file_path) runner = CliRunner(mix_stderr=False) monkeypatch.chdir(os.path.dirname(context.root_directory)) result = runner.invoke( cli, f"--v3-api suite list", catch_exceptions=False, ) output = result.output assert result.exit_code == 0 assert "3 Expectation Suites found:" in output assert "a.warning" in output assert "b.warning" in output assert "c.warning" in output assert_no_logging_messages_or_tracebacks( my_caplog=caplog, click_result=result, )
def test_suite_edit_with_batch_kwargs_unable_to_load_a_batch_raises_helpful_error( mock_webbrowser, mock_subprocess, caplog, empty_data_context ): """ The command should: - exit with a clear error message - NOT open Data Docs - NOT open jupyter """ project_dir = empty_data_context.root_directory context = DataContext(project_dir) context.create_expectation_suite("foo") context.add_datasource("source", class_name="PandasDatasource") runner = CliRunner(mix_stderr=False) batch_kwargs = '{"table": "fake", "datasource": "source"}' result = runner.invoke( cli, ["suite", "edit", "foo", "-d", project_dir, "--batch-kwargs", batch_kwargs], catch_exceptions=False, ) stdout = result.output assert result.exit_code == 1 assert "To continue editing this suite" not in stdout assert "Please check that your batch_kwargs are able to load a batch." in stdout assert mock_webbrowser.call_count == 0 assert mock_subprocess.call_count == 0 assert_no_logging_messages_or_tracebacks(caplog, result)
def test_suite_list_with_multiple_suites( caplog, empty_data_context ): project_dir = empty_data_context.root_directory context = DataContext(project_dir) context.create_expectation_suite("a.warning") context.create_expectation_suite("b.warning") context.create_expectation_suite("c.warning") runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, "suite list -d {}".format(project_dir), catch_exceptions=False, ) output = result.output print(output) assert result.exit_code == 0 assert "3 expectation suites available:" in output assert "\ta.warning" in output assert "\tb.warning" in output assert "\tc.warning" in output assert_no_logging_messages_or_tracebacks(caplog, result)
def test_suite_edit_with_invalid_json_batch_kwargs_raises_helpful_error( mock_webbrowser, mock_subprocess, caplog, empty_data_context ): """ The command should: - exit with a clear error message - NOT open Data Docs - NOT open jupyter """ project_dir = empty_data_context.root_directory context = DataContext(project_dir) context.create_expectation_suite("foo") runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["suite", "edit", "foo", "-d", project_dir, "--batch-kwargs", "'{foobar}'"], catch_exceptions=False, ) stdout = result.output assert result.exit_code == 1 assert "Please check that your batch_kwargs are valid JSON." in stdout assert mock_webbrowser.call_count == 0 assert mock_subprocess.call_count == 0 assert_no_logging_messages_or_tracebacks(caplog, result)
def test_validation_operator_list_with_two_operators(caplog, empty_data_context): project_dir = empty_data_context.root_directory context = DataContext(project_dir) context.create_expectation_suite("a.warning") runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, "validation-operator list -d {}".format(project_dir), catch_exceptions=False, ) assert result.exit_code == 0 assert "2 validation operators" in result.output
def test_suite_edit_on_exsiting_suite_one_datasources_with_batch_kwargs_without_datasource_raises_helpful_error( mock_webbrowser, mock_subprocess, caplog, monkeypatch, titanic_data_context, ): """ Given: - the suite foo exists - the a datasource exists - and the users runs this great_expectations suite edit foo --batch-kwargs '{"path": "data/10k.csv"}' Then: - The user should see a nice error and the program halts before notebook compilation. - NOT open Data Docs - NOT open jupyter '""" project_dir = titanic_data_context.root_directory context = DataContext(project_dir) context.create_expectation_suite("foo") runner = CliRunner(mix_stderr=False) batch_kwargs = {"path": "../data/Titanic.csv"} monkeypatch.chdir(os.path.dirname(context.root_directory)) result = runner.invoke( cli, [ "--v3-api", "suite", "edit", "foo", "--batch-kwargs", json.dumps(batch_kwargs), ], catch_exceptions=False, ) stdout = result.output assert result.exit_code == 1 assert "Please check that your batch_kwargs are able to load a batch." in stdout assert "Unable to load datasource `None`" in stdout assert mock_webbrowser.call_count == 0 assert mock_subprocess.call_count == 0 assert_no_logging_messages_or_tracebacks( my_caplog=caplog, click_result=result, allowed_deprecation_message=VALIDATION_OPERATORS_DEPRECATION_MESSAGE, )
def test_suite_list_with_one_suite(caplog, empty_data_context): project_dir = empty_data_context.root_directory context = DataContext(project_dir) context.create_expectation_suite("a.warning") runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, "suite list -d {}".format(project_dir), catch_exceptions=False, ) assert result.exit_code == 0 assert "1 Expectation Suite found" in result.output assert "a.warning" in result.output assert_no_logging_messages_or_tracebacks(caplog, result)
def test_suite_edit_on_exsiting_suite_one_datasources_with_datasource_arg_and_batch_kwargs( caplog, titanic_data_context, ): """ Given: - the suite foo exists - the a datasource bar exists - and the users runs this great_expectations suite edit foo --datasource bar --batch-kwargs '{"path": "data/10k.csv"}' Then: - The user gets a working notebook """ project_dir = titanic_data_context.root_directory context = DataContext(project_dir) context.create_expectation_suite("foo") runner = CliRunner(mix_stderr=False) batch_kwargs = { "path": os.path.join(project_dir, "../", "data", "Titanic.csv") } result = runner.invoke( cli, [ "suite", "edit", "foo", "-d", project_dir, "--batch-kwargs", json.dumps(batch_kwargs), "--datasource", "mydatasource", "--no-jupyter", ], catch_exceptions=False, ) stdout = result.output assert result.exit_code == 0 assert "To continue editing this suite, run" in stdout expected_notebook_path = os.path.join(project_dir, "uncommitted", "foo.ipynb") assert os.path.isfile(expected_notebook_path) expected_suite_path = os.path.join(project_dir, "expectations", "foo.json") assert os.path.isfile(expected_suite_path) assert_no_logging_messages_or_tracebacks(caplog, result)
def test_suite_delete_with_one_suite( mock_emit, caplog, empty_data_context_stats_enabled ): project_dir = empty_data_context_stats_enabled.root_directory context = DataContext(project_dir) suite = context.create_expectation_suite("a.warning") context.save_expectation_suite(suite) mock_emit.reset_mock() suite_dir = os.path.join(project_dir, "expectations", "a") suite_path = os.path.join(suite_dir, "warning.json") assert os.path.isfile(suite_path) runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, "suite delete a.warning -d {}".format(project_dir), catch_exceptions=False, ) assert result.exit_code == 0 assert "Deleted the expectation suite named: a.warning" in result.output # assert not os.path.isdir(suite_dir) assert not os.path.isfile(suite_path) assert mock_emit.call_count == 2 assert mock_emit.call_args_list == [ mock.call( {"event_payload": {}, "event": "data_context.__init__", "success": True} ), mock.call({"event": "cli.suite.delete", "event_payload": {}, "success": True}), ] assert_no_logging_messages_or_tracebacks(caplog, result)
def create_empty_suite(context: DataContext, expectation_suite_name: str, batch_kwargs) -> None: suite = context.create_expectation_suite(expectation_suite_name, overwrite_existing=False) suite.add_citation(comment="New suite added via CLI", batch_kwargs=batch_kwargs) context.save_expectation_suite(suite, expectation_suite_name)
def test_suite_edit_with_non_existent_datasource_shows_helpful_error_message( caplog, empty_data_context): project_dir = empty_data_context.root_directory context = DataContext(project_dir) context.create_expectation_suite("foo") assert context.list_expectation_suites()[0].expectation_suite_name == "foo" runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, "suite edit foo -d {} --datasource not_real".format(project_dir), catch_exceptions=False, ) assert result.exit_code == 1 assert ( "Unable to load datasource `not_real` -- no configuration found or invalid configuration." in result.output) assert_no_logging_messages_or_tracebacks(caplog, result)
def test_suite_edit_with_invalid_json_batch_kwargs_raises_helpful_error( caplog, empty_data_context ): project_dir = empty_data_context.root_directory context = DataContext(project_dir) context.create_expectation_suite("foo") runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["suite", "edit", "foo", "-d", project_dir, "--batch-kwargs", "'{foobar}'"], catch_exceptions=False, ) stdout = result.output assert result.exit_code == 1 assert "Please check that your batch_kwargs are valid JSON." in stdout # assert "Expecting value" in stdout # The exact message in the exception varies in Py 2, Py 3 assert_no_logging_messages_or_tracebacks(caplog, result)
def test_suite_edit_on_exsiting_suite_one_datasources_with_batch_kwargs_without_datasource_raises_helpful_error( caplog, titanic_data_context, ): """ Given: - the suite foo exists - the a datasource exists - and the users runs this great_expectations suite edit foo --batch-kwargs '{"path": "data/10k.csv"}' Then: - The user should see a nice error and the program halts before notebook compilation. '""" project_dir = titanic_data_context.root_directory context = DataContext(project_dir) context.create_expectation_suite("foo") runner = CliRunner(mix_stderr=False) batch_kwargs = {"path": "../data/Titanic.csv"} result = runner.invoke( cli, [ "suite", "edit", "foo", "-d", project_dir, "--batch-kwargs", json.dumps(batch_kwargs), ], catch_exceptions=False, ) stdout = result.output assert result.exit_code == 1 assert "Please check that your batch_kwargs are able to load a batch." in stdout assert "Unable to load datasource `None`" in stdout assert_no_logging_messages_or_tracebacks(caplog, result)
def create_empty_suite(context: DataContext, expectation_suite_name: str, batch_kwargs) -> None: cli_message(""" Great Expectations will create a new Expectation Suite '{0:s}' and store it here: {1:s} """.format( expectation_suite_name, context.stores[ context.expectations_store_name].store_backend.get_url_for_key( ExpectationSuiteIdentifier( expectation_suite_name=expectation_suite_name).to_tuple()), )) suite = context.create_expectation_suite(expectation_suite_name) suite.add_citation(comment="New suite added via CLI", batch_kwargs=batch_kwargs) context.save_expectation_suite(suite, expectation_suite_name)
def test_validation_operator_list_with_one_operator(caplog, empty_data_context): project_dir = empty_data_context.root_directory context = DataContext(project_dir) context.create_expectation_suite("a.warning")
def run_onboarding_data_assistant_result_jupyter_notebook_with_new_cell( context: DataContext, new_cell: str, implicit: bool, ): """ To set this test up we: - create a suite - write code (as a string) for creating an OnboardingDataAssistantResult - add a new cell to the notebook that was passed to this method - write both cells to ipynb file We then: - load the notebook back from disk - execute the notebook (Note: this will raise various errors like CellExecutionError if any cell in the notebook fails) """ root_dir: str = context.root_directory expectation_suite_name: str = "test_suite" context.create_expectation_suite( expectation_suite_name=expectation_suite_name, overwrite_existing=True ) notebook_path: str = os.path.join(root_dir, f"run_onboarding_data_assistant.ipynb") notebook_code_initialization: str = """ from typing import Optional, Union import uuid import great_expectations as ge from great_expectations.data_context import BaseDataContext from great_expectations.validator.validator import Validator from great_expectations.rule_based_profiler.data_assistant import ( DataAssistant, OnboardingDataAssistant, ) from great_expectations.rule_based_profiler.types.data_assistant_result import DataAssistantResult from great_expectations.rule_based_profiler.helpers.util import get_validator_with_expectation_suite import great_expectations.exceptions as ge_exceptions context = ge.get_context() batch_request: dict = { "datasource_name": "taxi_pandas", "data_connector_name": "monthly", "data_asset_name": "my_reports", } """ explicit_instantiation_code: str = """ validator: Validator = get_validator_with_expectation_suite( batch_request=batch_request, data_context=context, expectation_suite_name=None, expectation_suite=None, component_name="onboarding_data_assistant", persist=False, ) data_assistant: DataAssistant = OnboardingDataAssistant( name="test_onboarding_data_assistant", validator=validator, ) data_assistant_result: DataAssistantResult = data_assistant.run() """ implicit_invocation_code: str = """ data_assistant_result: DataAssistantResult = context.assistants.onboarding.run(batch_request=batch_request) """ notebook_code: str if implicit: notebook_code = notebook_code_initialization + implicit_invocation_code else: notebook_code = notebook_code_initialization + explicit_instantiation_code nb = nbformat.v4.new_notebook() nb["cells"] = [] nb["cells"].append(nbformat.v4.new_code_cell(notebook_code)) nb["cells"].append(nbformat.v4.new_code_cell(new_cell)) # Write notebook to path and load it as NotebookNode with open(notebook_path, "w") as f: nbformat.write(nb, f) nb: nbformat.notebooknode.NotebookNode = load_notebook_from_path( notebook_path=notebook_path ) # Run notebook ep: nbconvert.preprocessors.ExecutePreprocessor = ( nbconvert.preprocessors.ExecutePreprocessor(timeout=180, kernel_name="python3") ) ep.preprocess(nb, {"metadata": {"path": root_dir}})