def test_init_on_existing_project_with_datasource_with_existing_suite_offer_to_build_docs_answer_yes( mock_webbrowser, caplog, initialized_sqlite_project, ): project_dir = initialized_sqlite_project runner = CliRunner(mix_stderr=False) with pytest.warns( UserWarning, match="Warning. An existing `great_expectations.yml` was found"): result = runner.invoke( cli, ["init", "-d", project_dir], input="\n\n", catch_exceptions=False, ) stdout = result.stdout assert result.exit_code == 0 assert mock_webbrowser.call_count == 1 assert ("{}/great_expectations/uncommitted/data_docs/local_site/index.html" .format(project_dir) in mock_webbrowser.call_args[0][0]) assert "Error: invalid input" not in stdout assert "Always know what to expect from your data" in stdout assert "This looks like an existing project that" in stdout assert "appears complete" in stdout assert "Would you like to build & view this project's Data Docs" in stdout assert_no_logging_messages_or_tracebacks(caplog, result)
def test_project_upgrade_already_up_to_date(v10_project_directory, caplog): # test great_expectations project upgrade command with project with config_version 2 # copy v2 yml shutil.copy( file_relative_path( __file__, "../../../test_fixtures/upgrade_helper/great_expectations_v2.yml"), os.path.join(v10_project_directory, "great_expectations.yml"), ) runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["project", "upgrade", "-d", v10_project_directory], input="\n", catch_exceptions=False, ) stdout = result.stdout assert "Checking project..." in stdout assert "Your project is up-to-date - no further upgrade is necessary." in stdout assert_no_logging_messages_or_tracebacks( my_caplog=caplog, click_result=result, allowed_deprecation_message=VALIDATION_OPERATORS_DEPRECATION_MESSAGE, )
def test_cli_datasorce_new(caplog, empty_data_context, filesystem_csv_2): project_root_dir = empty_data_context.root_directory context = DataContext(project_root_dir) assert context.list_datasources() == [] runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["datasource", "new", "-d", project_root_dir], input="1\n1\n%s\nmynewsource\n" % str(filesystem_csv_2), catch_exceptions=False, ) stdout = result.stdout assert "What data would you like Great Expectations to connect to?" in stdout assert "What are you processing your files with?" in stdout assert "Give your new Datasource a short name." in stdout assert "A new datasource 'mynewsource' was added to your project." in stdout assert result.exit_code == 0 config_path = os.path.join(project_root_dir, DataContext.GE_YML) config = yaml.load(open(config_path)) datasources = config["datasources"] assert "mynewsource" in datasources.keys() data_source_class = datasources["mynewsource"]["data_asset_type"][ "class_name"] assert data_source_class == "PandasDataset" assert_no_logging_messages_or_tracebacks(caplog, result)
def test_checkpoint_list_with_no_checkpoints_with_ge_config_v2( mock_emit, caplog, empty_data_context_stats_enabled): context = empty_data_context_stats_enabled root_dir = context.root_directory runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, f"checkpoint list -d {root_dir}", catch_exceptions=False, ) stdout = result.stdout assert result.exit_code == 0 assert "No checkpoints found." in stdout assert "Use the command `great_expectations checkpoint new` to create one" in stdout assert mock_emit.call_count == 2 assert mock_emit.call_args_list == [ mock.call({ "event_payload": {}, "event": "data_context.__init__", "success": True }), mock.call({ "event": "cli.checkpoint.list", "event_payload": { "api_version": "v2" }, "success": True, }), ] assert_no_logging_messages_or_tracebacks(caplog, result)
def test_validation_operator_run_noninteractive_validation_config_file_does_not_exist( caplog, data_context_parameterized_expectation_suite_no_checkpoint_store, filesystem_csv_2, ): """ Non-nteractive mode. Use the --validation_config_file argument to pass the path to a validation config file that does not exist. """ not_so_empty_data_context = ( data_context_parameterized_expectation_suite_no_checkpoint_store) root_dir = not_so_empty_data_context.root_directory os.mkdir(os.path.join(root_dir, "uncommitted")) validation_config_file_path = os.path.join(root_dir, "uncommitted", "validation_config_1.json") runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, [ "validation-operator", "run", "-d", root_dir, "--validation_config_file", validation_config_file_path, ], catch_exceptions=False, ) stdout = result.stdout assert "Failed to process the --validation_config_file argument" in stdout assert result.exit_code == 1 assert_no_logging_messages_or_tracebacks(caplog, result)
def test_validation_operator_run_interactive_golden_path( caplog, data_context_simple_expectation_suite, filesystem_csv_2): """ Interactive mode golden path - pass an existing suite name and an existing validation operator name, select an existing file. """ not_so_empty_data_context = data_context_simple_expectation_suite root_dir = not_so_empty_data_context.root_directory os.mkdir(os.path.join(root_dir, "uncommitted")) runner = CliRunner(mix_stderr=False) csv_path = os.path.join(filesystem_csv_2, "f1.csv") result = runner.invoke( cli, [ "validation-operator", "run", "-d", root_dir, "--name", "default", "--suite", "default", ], input=f"{csv_path}\n", catch_exceptions=False, ) stdout = result.stdout assert "Validation failed" in stdout assert result.exit_code == 1 assert_no_logging_messages_or_tracebacks(caplog, result)
def test_cli_datasource_profile_answering_no(empty_data_context, titanic_sqlite_db, caplog): """ When datasource profile command is called without additional arguments, the command must prompt the user with a confirm (y/n) before profiling. We are verifying that it does that and respects user's "no". """ project_root_dir = empty_data_context.root_directory context = DataContext(project_root_dir) datasource_name = "wow_a_datasource" context = _add_datasource_and_credentials_to_context( context, datasource_name, titanic_sqlite_db) runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, [ "datasource", "profile", datasource_name, "-d", project_root_dir, "--no-view" ], input="n\n", catch_exceptions=False, ) stdout = result.output assert result.exit_code == 0 assert "Profiling 'wow_a_datasource'" in stdout assert "Skipping profiling for now." in stdout assert_no_logging_messages_or_tracebacks(caplog, result)
def test_cli_datasource_profile_on_empty_database(empty_data_context, empty_sqlite_db, caplog): """ We run the datasource profile command against an empty database (no tables). This means that no generator can "see" a list of available data assets. The command must exit with an error message saying that no generator can see any assets. """ project_root_dir = empty_data_context.root_directory context = DataContext(project_root_dir) datasource_name = "wow_a_datasource" context = _add_datasource_and_credentials_to_context( context, datasource_name, empty_sqlite_db) runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, [ "datasource", "profile", datasource_name, "-d", project_root_dir, "--no-view" ], input="n\n", catch_exceptions=False, ) stdout = result.output assert result.exit_code == 1 assert "Profiling 'wow_a_datasource'" in stdout assert "No batch kwargs generators can list available data assets" in stdout assert_no_logging_messages_or_tracebacks(caplog, result)
def test_validation_operator_list_with_one_validation_operator( caplog, filesystem_csv_data_context_with_validation_operators): project_dir = filesystem_csv_data_context_with_validation_operators.root_directory runner = CliRunner(mix_stderr=False) expected_result = """[33mHeads up! This feature is Experimental. It may change. Please give us your feedback![0m[0m 1 Validation Operator found:[0m [0m - [36mname:[0m action_list_operator[0m [36mclass_name:[0m ActionListValidationOperator[0m [36maction_list:[0m store_validation_result (StoreValidationResultAction) => store_evaluation_params (StoreEvaluationParametersAction) => update_data_docs (UpdateDataDocsAction)[0m""" result = runner.invoke( cli, "validation-operator list -d {}".format(project_dir), catch_exceptions=False, ) assert result.exit_code == 0 # _capture_ansi_codes_to_file(result) assert result.output.strip() == expected_result assert_no_logging_messages_or_tracebacks( my_caplog=caplog, click_result=result, allowed_deprecation_message=VALIDATION_OPERATORS_DEPRECATION_MESSAGE, )
def test_validation_operator_run_interactive_pass_non_existing_operator_name( caplog, data_context_parameterized_expectation_suite_no_checkpoint_store, filesystem_csv_2, ): """ Interactive mode: pass an non-existing suite name and an existing validation operator name, select an existing file. """ not_so_empty_data_context = ( data_context_parameterized_expectation_suite_no_checkpoint_store) root_dir = not_so_empty_data_context.root_directory os.mkdir(os.path.join(root_dir, "uncommitted")) runner = CliRunner(mix_stderr=False) csv_path = os.path.join(filesystem_csv_2, "f1.csv") result = runner.invoke( cli, [ "validation-operator", "run", "-d", root_dir, "--name", "this_val_op_does_not_exist", "--suite", "my_dag_node.default", ], input=f"{csv_path}\n", catch_exceptions=False, ) stdout = result.stdout assert "Could not find a validation operator" in stdout assert result.exit_code == 1 assert_no_logging_messages_or_tracebacks(caplog, result)
def test_cli_init_on_complete_existing_project_all_uncommitted_dirs_exist( mock_webbrowser, caplog, tmp_path_factory, ): """ This test walks through the onboarding experience. The user just checked an existing project out of source control and does not yet have an uncommitted directory. """ root_dir = tmp_path_factory.mktemp("hiya") root_dir = str(root_dir) os.makedirs(os.path.join(root_dir, "data")) data_folder_path = os.path.join(root_dir, "data") data_path = os.path.join(root_dir, "data", "Titanic.csv") fixture_path = file_relative_path( __file__, os.path.join("..", "..", "test_sets", "Titanic.csv") ) shutil.copy(fixture_path, data_path) # Create a new project from scratch that we will use for the test in the next step runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["init", "-d", root_dir], input="\n\n1\n1\n{}\n\n\n\n2\n{}\n\n\n\n".format( data_folder_path, data_path, catch_exceptions=False ), ) assert result.exit_code == 0 assert mock_webbrowser.call_count == 1 assert ( "{}/great_expectations/uncommitted/data_docs/local_site/validations/Titanic/warning/".format( root_dir ) in mock_webbrowser.call_args[0][0] ) # Now the test begins - rerun the init on an existing project runner = CliRunner(mix_stderr=False) with pytest.warns( UserWarning, match="Warning. An existing `great_expectations.yml` was found" ): result = runner.invoke( cli, ["init", "-d", root_dir], input="n\n", catch_exceptions=False ) stdout = result.stdout assert mock_webbrowser.call_count == 1 assert result.exit_code == 0 assert "This looks like an existing project that" in stdout assert "appears complete" in stdout assert "ready to roll" in stdout assert "Would you like to build & view this project's Data Docs" in stdout assert_no_logging_messages_or_tracebacks(caplog, result)
def test_checkpoint_run_on_non_existent_validation_operator_with_ge_config_v2( mock_emit, caplog, titanic_data_context_stats_enabled_config_version_2): context = titanic_data_context_stats_enabled_config_version_2 root_dir = context.root_directory csv_path = os.path.join(root_dir, "..", "data", "Titanic.csv") suite = context.create_expectation_suite("iceberg") context.save_expectation_suite(suite) assert context.list_expectation_suite_names() == ["iceberg"] mock_emit.reset_mock() checkpoint_file_path = os.path.join( context.root_directory, DataContextConfigDefaults.CHECKPOINTS_BASE_DIRECTORY.value, "bad_operator.yml", ) bad = { "validation_operator_name": "foo", "batches": [ { "batch_kwargs": { "path": csv_path, "datasource": "mydatasource", "reader_method": "read_csv", }, "expectation_suite_names": ["iceberg"], }, ], } _write_checkpoint_dict_to_file(bad, checkpoint_file_path) runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, f"checkpoint run bad_operator -d {root_dir}", catch_exceptions=False, ) stdout = result.stdout assert result.exit_code == 1 assert ( f"No validation operator `foo` was found in your project. Please verify this in your great_expectations.yml" in stdout) usage_emits = mock_emit.call_args_list assert mock_emit.call_count == 3 assert usage_emits[0][0][0]["success"] is True assert usage_emits[1][0][0]["success"] is False assert usage_emits[2][0][0]["success"] is False assert_no_logging_messages_or_tracebacks( my_caplog=caplog, click_result=result, allowed_deprecation_message= LEGACY_CONFIG_DEFAULT_CHECKPOINT_STORE_MESSAGE, )
def initialized_sqlite_project( mock_webbrowser, caplog, tmp_path_factory, titanic_sqlite_db_file, sa ): """This is an initialized project through the CLI.""" project_dir = str(tmp_path_factory.mktemp("my_rad_project")) engine = sa.create_engine( "sqlite:///{}".format(titanic_sqlite_db_file), pool_recycle=3600 ) inspector = sa.inspect(engine) # get the default schema and table for testing schemas = inspector.get_schema_names() default_schema = schemas[0] tables = [ table_name for table_name in inspector.get_table_names(schema=default_schema) ] default_table = tables[0] runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["init", "-d", project_dir], input="\n\n2\n6\ntitanic\n{url}\n\n\n1\n{schema}\n{table}\nwarning\n\n\n\n".format( url=engine.url, schema=default_schema, table=default_table ), catch_exceptions=False, ) assert result.exit_code == 0 assert mock_webbrowser.call_count == 1 assert ( "{}/great_expectations/uncommitted/data_docs/local_site/validations/warning/".format( project_dir ) in mock_webbrowser.call_args[0][0] ) assert_no_logging_messages_or_tracebacks(caplog, result) context = DataContext(os.path.join(project_dir, DataContext.GE_DIR)) assert isinstance(context, DataContext) assert len(context.list_datasources()) == 1 assert context.list_datasources() == [ { "class_name": "SqlAlchemyDatasource", "name": "titanic", "module_name": "great_expectations.datasource", "credentials": {"url": str(engine.url)}, "data_asset_type": { "class_name": "SqlAlchemyDataset", "module_name": "great_expectations.dataset", }, } ] return project_dir
def test_cli_init_on_new_project_with_broken_excel_file_without_trying_again( caplog, tmp_path_factory ): project_dir = str(tmp_path_factory.mktemp("test_cli_init_diff")) os.makedirs(os.path.join(project_dir, "data")) data_folder_path = os.path.join(project_dir, "data") data_path = os.path.join(project_dir, "data", "broken_excel_file.xls") fixture_path = file_relative_path(__file__, "../../test_sets/broken_excel_file.xls") shutil.copy(fixture_path, data_path) runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["init", "-d", project_dir], input=f"\n\n1\n1\n{data_folder_path}\n\n\n\n2\n{data_path}\nn\n", catch_exceptions=False, ) stdout = result.output assert len(stdout) < 6000, "CLI output is unreasonably long." assert "Always know what to expect from your data" in stdout assert "What data would you like Great Expectations to connect to" in stdout assert "What are you processing your files with" in stdout assert ( "Enter the path of a data file (relative or absolute, s3a:// and gs:// paths are ok too)" in stdout ) assert "Cannot load file." in stdout assert ( "- Please check the file and try again or select a different data file." in stdout ) assert ( "- Error: Excel file format cannot be determined, you must specify an engine manually." in stdout ) or ( "Error: Unsupported format, or corrupt file: Expected BOF record; found b'PRODUCTI'" in stdout ) assert "Try again? [Y/n]:" in stdout assert ( "We have saved your setup progress. When you are ready, run great_expectations init to continue." in stdout ) assert os.path.isdir(os.path.join(project_dir, "great_expectations")) config_path = os.path.join(project_dir, "great_expectations/great_expectations.yml") assert os.path.isfile(config_path) config = yaml.load(open(config_path)) data_source_class = config["datasources"]["data__dir"]["data_asset_type"][ "class_name" ] assert data_source_class == "PandasDataset" assert_no_logging_messages_or_tracebacks(caplog, result)
def test_checkpoint_script_happy_path_executable_failed_validation_with_ge_config_v2( caplog, titanic_data_context_v2_with_checkpoint_suite_and_stats_enabled): """ We call the "checkpoint script" command on a project with a checkpoint. The command should: - create the script (note output is tested in other tests) When run the script should: - execute - return a 1 status code - print a failure message """ context = titanic_data_context_v2_with_checkpoint_suite_and_stats_enabled root_dir = context.root_directory # mangle the csv csv_path = os.path.join(context.root_directory, "..", "data", "Titanic.csv") with open(csv_path, "w") as f: f.write("foo,bar\n1,2\n") runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, f"checkpoint script my_checkpoint -d {root_dir}", catch_exceptions=False, ) assert result.exit_code == 0 assert_no_logging_messages_or_tracebacks( my_caplog=caplog, click_result=result, allowed_deprecation_message= LEGACY_CONFIG_DEFAULT_CHECKPOINT_STORE_MESSAGE, ) script_path = os.path.abspath( os.path.join(root_dir, context.GE_UNCOMMITTED_DIR, "run_my_checkpoint.py")) assert os.path.isfile(script_path) # In travis on osx, python may not execute from the build dir cmdstring = f"python {script_path}" if os.environ.get("TRAVIS_OS_NAME") == "osx": build_dir = os.environ.get("TRAVIS_BUILD_DIR") print(os.listdir(build_dir)) cmdstring = f"python3 {script_path}" print("about to run: " + cmdstring) print(os.curdir) print(os.listdir(os.curdir)) print(os.listdir(os.path.abspath(os.path.join(root_dir, "..")))) status, output = subprocess.getstatusoutput(cmdstring) print(f"\n\nScript exited with code: {status} and output:\n{output}") assert status == 1 assert "Validation failed!" in output
def test_checkpoint_new_specify_datasource_with_ge_config_v2( mock_emit, caplog, titanic_data_context_stats_enabled_config_version_2, titanic_expectation_suite, ): context = titanic_data_context_stats_enabled_config_version_2 root_dir = context.root_directory assert context.list_checkpoints() == [] context.save_expectation_suite(titanic_expectation_suite) assert context.list_expectation_suite_names() == ["Titanic.warning"] mock_emit.reset_mock() runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, f"checkpoint new passengers Titanic.warning -d {root_dir} --datasource mydatasource", input="1\n1\n", catch_exceptions=False, ) stdout = result.stdout assert result.exit_code == 0 assert "A checkpoint named `passengers` was added to your project" in stdout assert mock_emit.call_count == 2 assert mock_emit.call_args_list == [ mock.call({ "event_payload": {}, "event": "data_context.__init__", "success": True }), mock.call({ "event": "cli.checkpoint.new", "event_payload": { "api_version": "v2" }, "success": True, }), ] expected_checkpoint = os.path.join( root_dir, DataContextConfigDefaults.CHECKPOINTS_BASE_DIRECTORY.value, "passengers.yml", ) assert os.path.isfile(expected_checkpoint) # Newup a context for additional assertions context = DataContext(root_dir) assert context.list_checkpoints() == ["passengers"] assert_no_logging_messages_or_tracebacks( my_caplog=caplog, click_result=result, allowed_deprecation_message= LEGACY_CONFIG_DEFAULT_CHECKPOINT_STORE_MESSAGE, )
def test_validation_operator_run_noninteractive_validation_config_file_does_is_misconfigured( caplog, data_context_parameterized_expectation_suite_no_checkpoint_store, filesystem_csv_2, ): """ Non-nteractive mode. Use the --validation_config_file argument to pass the path to a validation config file that is misconfigured - one of the batches does not have expectation_suite_names attribute """ not_so_empty_data_context = ( data_context_parameterized_expectation_suite_no_checkpoint_store) root_dir = not_so_empty_data_context.root_directory os.mkdir(os.path.join(root_dir, "uncommitted")) csv_path = os.path.join(filesystem_csv_2, "f1.csv") validation_config = { "validation_operator_name": "default", "batches": [{ "batch_kwargs": { "path": csv_path, "datasource": "mydatasource", "reader_method": "read_csv", }, "wrong_attribute_expectation_suite_names": ["my_dag_node.default1"], }], } validation_config_file_path = os.path.join(root_dir, "uncommitted", "validation_config_1.json") with open(validation_config_file_path, "w") as f: json.dump(validation_config, f) runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, [ "validation-operator", "run", "-d", root_dir, "--validation_config_file", validation_config_file_path, ], catch_exceptions=False, ) stdout = result.stdout assert ( "is misconfigured: Each batch must have a list of expectation suite names" in stdout) assert result.exit_code == 1 assert_no_logging_messages_or_tracebacks(caplog, result)
def test_project_check_on_missing_ge_dir_guides_user_to_fix( caplog, tmp_path_factory): project_dir = str(tmp_path_factory.mktemp("empty_dir")) runner = CliRunner(mix_stderr=False) result = runner.invoke(cli, ["project", "check-config", "-d", project_dir], catch_exceptions=False) stdout = result.output assert "Checking your config files for validity" in stdout assert "Unfortunately, your config appears to be invalid" in stdout assert "Error: No great_expectations directory was found here!" in stdout assert result.exit_code == 1 assert_no_logging_messages_or_tracebacks(caplog, result)
def test_project_check_on_valid_project_says_so(caplog, titanic_data_context): project_dir = titanic_data_context.root_directory runner = CliRunner(mix_stderr=False) result = runner.invoke(cli, ["project", "check-config", "-d", project_dir], catch_exceptions=False) assert "Checking your config files for validity" in result.output assert "Your config file appears valid" in result.output assert result.exit_code == 0 assert_no_logging_messages_or_tracebacks( my_caplog=caplog, click_result=result, allowed_deprecation_message=VALIDATION_OPERATORS_DEPRECATION_MESSAGE, )
def test_project_check_on_project_with_missing_config_file_guides_user( caplog, titanic_data_context): project_dir = titanic_data_context.root_directory # Remove the config file. os.remove(os.path.join(project_dir, "great_expectations.yml")) runner = CliRunner(mix_stderr=False) result = runner.invoke(cli, ["project", "check-config", "-d", project_dir], catch_exceptions=False) assert result.exit_code == 1 assert "Checking your config files for validity" in result.output assert "Unfortunately, your config appears to be invalid" in result.output assert_no_logging_messages_or_tracebacks(caplog, result)
def test_checkpoint_script_happy_path_generates_script_with_ge_config_v2( mock_emit, caplog, titanic_data_context_v2_with_checkpoint_suite_and_stats_enabled ): context = titanic_data_context_v2_with_checkpoint_suite_and_stats_enabled root_dir = context.root_directory mock_emit.reset_mock() runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, f"checkpoint script my_checkpoint -d {root_dir}", catch_exceptions=False, ) stdout = result.stdout assert result.exit_code == 0 assert ( "A python script was created that runs the checkpoint named: `my_checkpoint`" in stdout ) assert ( "The script is located in `great_expectations/uncommitted/run_my_checkpoint.py`" in stdout ) assert ( "The script can be run with `python great_expectations/uncommitted/run_my_checkpoint.py`" in stdout ) assert mock_emit.call_count == 2 assert mock_emit.call_args_list == [ mock.call( {"event_payload": {}, "event": "data_context.__init__", "success": True} ), mock.call( { "event": "cli.checkpoint.script", "event_payload": {"api_version": "v2"}, "success": True, } ), ] expected_script = os.path.join( root_dir, context.GE_UNCOMMITTED_DIR, "run_my_checkpoint.py" ) assert os.path.isfile(expected_script) assert_no_logging_messages_or_tracebacks( my_caplog=caplog, click_result=result, allowed_deprecation_message=LEGACY_CONFIG_DEFAULT_CHECKPOINT_STORE_MESSAGE, )
def test_checkpoint_script_raises_error_if_python_file_exists_with_ge_config_v2( mock_emit, caplog, titanic_data_context_v2_with_checkpoint_suite_and_stats_enabled): context = titanic_data_context_v2_with_checkpoint_suite_and_stats_enabled root_dir = context.root_directory assert context.list_checkpoints() == ["my_checkpoint"] script_path = os.path.join(root_dir, context.GE_UNCOMMITTED_DIR, "run_my_checkpoint.py") with open(script_path, "w") as f: f.write("script here") assert os.path.isfile(script_path) mock_emit.reset_mock() runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, f"checkpoint script my_checkpoint -d {root_dir}", catch_exceptions=False, ) stdout = result.stdout assert ( "Warning! A script named run_my_checkpoint.py already exists and this command will not overwrite it." in stdout) assert result.exit_code == 1 assert mock_emit.call_count == 2 assert mock_emit.call_args_list == [ mock.call({ "event_payload": {}, "event": "data_context.__init__", "success": True }), mock.call({ "event": "cli.checkpoint.script", "event_payload": { "api_version": "v2" }, "success": False, }), ] # assert the script has original contents with open(script_path) as f: assert f.read() == "script here" assert_no_logging_messages_or_tracebacks( my_caplog=caplog, click_result=result, allowed_deprecation_message= LEGACY_CONFIG_DEFAULT_CHECKPOINT_STORE_MESSAGE, )
def test_checkpoint_run_happy_path_with_failed_validation_with_ge_config_v2( mock_emit, caplog, titanic_data_context_v2_with_checkpoint_suite_and_stats_enabled): context = titanic_data_context_v2_with_checkpoint_suite_and_stats_enabled root_dir = context.root_directory # mangle the csv csv_path = os.path.join(context.root_directory, "..", "data", "Titanic.csv") with open(csv_path, "w") as f: f.write("foo,bar\n1,2\n") mock_emit.reset_mock() runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, f"checkpoint run my_checkpoint -d {root_dir}", catch_exceptions=False, ) stdout = result.stdout print(stdout) assert result.exit_code == 1 assert "Validation failed!" in stdout assert mock_emit.call_count == 5 usage_emits = mock_emit.call_args_list assert usage_emits[0] == mock.call({ "event_payload": {}, "event": "data_context.__init__", "success": True }) assert usage_emits[1][0][0]["event"] == "data_asset.validate" assert usage_emits[1][0][0]["success"] is True assert usage_emits[2][0][0]["event"] == "data_context.build_data_docs" assert usage_emits[2][0][0]["success"] is True assert usage_emits[4] == mock.call({ "event": "cli.checkpoint.run", "event_payload": { "api_version": "v2" }, "success": True, }) assert_no_logging_messages_or_tracebacks( my_caplog=caplog, click_result=result, allowed_deprecation_message= LEGACY_CONFIG_DEFAULT_CHECKPOINT_STORE_MESSAGE, )
def test_cli_datasource_profile_with_invalid_data_asset_arg_answering_no( caplog, empty_data_context, filesystem_csv_2 ): empty_data_context.add_datasource( "my_datasource", module_name="great_expectations.datasource", class_name="PandasDatasource", batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": str(filesystem_csv_2), } }, ) not_so_empty_data_context = empty_data_context project_root_dir = not_so_empty_data_context.root_directory runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, [ "datasource", "profile", "my_datasource", "--data-assets", "bad-bad-asset", "-d", project_root_dir, "--no-view", ], input="2\n", catch_exceptions=False, ) stdout = result.stdout assert ( "Some of the data assets you specified were not found: bad-bad-asset" in stdout ) assert "Choose how to proceed" in stdout assert "Skipping profiling for now." in stdout context = DataContext(project_root_dir) assert len(context.list_datasources()) == 1 expectations_store = context.stores["expectations_store"] suites = expectations_store.list_keys() assert len(suites) == 0 assert_no_logging_messages_or_tracebacks(caplog, result)
def test_checkpoint_new_raises_error_if_checkpoints_directory_is_missing_with_ge_config_v2( mock_emit, caplog, titanic_data_context_stats_enabled_config_version_2, titanic_expectation_suite, ): context = titanic_data_context_stats_enabled_config_version_2 root_dir = context.root_directory checkpoints_dir = os.path.join( root_dir, DataContextConfigDefaults.CHECKPOINTS_BASE_DIRECTORY.value) shutil.rmtree(checkpoints_dir) assert not os.path.isdir(checkpoints_dir) context.save_expectation_suite(titanic_expectation_suite) assert context.list_expectation_suite_names() == ["Titanic.warning"] mock_emit.reset_mock() runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, f"checkpoint new passengers Titanic.warning -d {root_dir}", input="1\n1\n", catch_exceptions=False, ) stdout = result.stdout assert result.exit_code == 1 assert ( 'Attempted to access the "checkpoint_store_name" field with a legacy config version (2.0) and no `checkpoints` directory.' in stdout) assert mock_emit.call_count == 2 assert mock_emit.call_args_list == [ mock.call({ "event_payload": {}, "event": "data_context.__init__", "success": True }), mock.call({ "event": "cli.checkpoint.new", "event_payload": { "api_version": "v2" }, "success": False, }), ] assert_no_logging_messages_or_tracebacks( my_caplog=caplog, click_result=result, )
def test_init_on_existing_project_with_datasource_with_no_suite_create_one( mock_browser, caplog, initialized_project, ): project_dir = initialized_project ge_dir = os.path.join(project_dir, DataContext.GE_DIR) uncommitted_dir = os.path.join(ge_dir, "uncommitted") data_folder_path = os.path.join(project_dir, "data") data_path = os.path.join(project_dir, "data", "Titanic.csv") # mangle the setup to remove all traces of any suite expectations_dir = os.path.join(ge_dir, "expectations") data_docs_dir = os.path.join(uncommitted_dir, "data_docs") validations_dir = os.path.join(uncommitted_dir, "validations") _delete_and_recreate_dir(expectations_dir) _delete_and_recreate_dir(data_docs_dir) _delete_and_recreate_dir(validations_dir) context = DataContext(ge_dir) assert context.list_expectation_suites() == [] runner = CliRunner(mix_stderr=False) with pytest.warns( UserWarning, match="Warning. An existing `great_expectations.yml` was found" ): result = runner.invoke( cli, ["init", "-d", project_dir], input=f"\n2\n{data_path}\nsink_me\n\n\n", catch_exceptions=False, ) stdout = result.stdout assert result.exit_code == 0 assert mock_browser.call_count == 1 assert "Error: invalid input" not in stdout assert "Always know what to expect from your data" in stdout assert ( "Enter the path of a data file (relative or absolute, s3a:// and gs:// paths are ok too)" in stdout ) assert "Generating example Expectation Suite..." in stdout assert "The following Data Docs sites will be built" in stdout assert "Great Expectations is now set up" in stdout assert_no_logging_messages_or_tracebacks(caplog, result)
def test_cli_datasource_list(empty_data_context, empty_sqlite_db, caplog): """Test an empty project and after adding a single datasource.""" project_root_dir = empty_data_context.root_directory context = DataContext(project_root_dir) runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["datasource", "list", "-d", project_root_dir], catch_exceptions=False ) stdout = result.stdout.strip() assert "No Datasources found" in stdout assert context.list_datasources() == [] datasource_name = "wow_a_datasource" _add_datasource_and_credentials_to_context( context, datasource_name, empty_sqlite_db ) runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["datasource", "list", "-d", project_root_dir], catch_exceptions=False ) url = str(empty_sqlite_db.engine.url) expected_output = """\ 1 Datasource found:[0m [0m - [36mname:[0m wow_a_datasource[0m [36mmodule_name:[0m great_expectations.datasource[0m [36mclass_name:[0m SqlAlchemyDatasource[0m [36mbatch_kwargs_generators:[0m[0m [36mdefault:[0m[0m [36mclass_name:[0m TableBatchKwargsGenerator[0m [36mcredentials:[0m[0m [36murl:[0m {}[0m [36mdata_asset_type:[0m[0m [36mclass_name:[0m SqlAlchemyDataset[0m [36mmodule_name:[0m None[0m """.format( url ).strip() stdout = result.stdout.strip() assert stdout == expected_output assert_no_logging_messages_or_tracebacks(caplog, result)
def test_validation_operator_run_noninteractive_golden_path( caplog, data_context_simple_expectation_suite, filesystem_csv_2): """ Non-nteractive mode golden path - use the --validation_config_file argument to pass the path to a valid validation config file """ not_so_empty_data_context = data_context_simple_expectation_suite root_dir = not_so_empty_data_context.root_directory os.mkdir(os.path.join(root_dir, "uncommitted")) csv_path = os.path.join(filesystem_csv_2, "f1.csv") validation_config = { "validation_operator_name": "default", "batches": [{ "batch_kwargs": { "path": csv_path, "datasource": "mydatasource", "reader_method": "read_csv", }, "expectation_suite_names": ["default"], }], } validation_config_file_path = os.path.join(root_dir, "uncommitted", "validation_config_1.json") with open(validation_config_file_path, "w") as f: json.dump(validation_config, f) runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, [ "validation-operator", "run", "-d", root_dir, "--validation_config_file", validation_config_file_path, ], catch_exceptions=False, ) stdout = result.stdout assert "Validation failed" in stdout assert result.exit_code == 1 assert_no_logging_messages_or_tracebacks(caplog, result)
def test_store_list_with_four_stores(caplog, empty_data_context): project_dir = empty_data_context.root_directory runner = CliRunner(mix_stderr=False) expected_result = """\ 5 Stores found:[0m [0m - [36mname:[0m expectations_store[0m [36mclass_name:[0m ExpectationsStore[0m [36mstore_backend:[0m[0m [36mclass_name:[0m TupleFilesystemStoreBackend[0m [36mbase_directory:[0m expectations/[0m [0m - [36mname:[0m validations_store[0m [36mclass_name:[0m ValidationsStore[0m [36mstore_backend:[0m[0m [36mclass_name:[0m TupleFilesystemStoreBackend[0m [36mbase_directory:[0m uncommitted/validations/[0m [0m - [36mname:[0m evaluation_parameter_store[0m [36mclass_name:[0m EvaluationParameterStore[0m [0m - [36mname:[0m checkpoint_store[0m [36mclass_name:[0m CheckpointStore[0m [36mstore_backend:[0m[0m [36mclass_name:[0m TupleFilesystemStoreBackend[0m [36mbase_directory:[0m checkpoints/[0m [36msuppress_store_backend_id:[0m True[0m [0m - [36mname:[0m profiler_store[0m [36mclass_name:[0m ProfilerStore[0m [36mstore_backend:[0m[0m [36mclass_name:[0m TupleFilesystemStoreBackend[0m [36mbase_directory:[0m profilers/[0m [36msuppress_store_backend_id:[0m True[0m""" result = runner.invoke( cli, f"store list -d {project_dir}", catch_exceptions=False, ) print(result.output) assert result.exit_code == 0 assert result.output.strip() == expected_result assert_no_logging_messages_or_tracebacks(caplog, result)
def test_store_list_with_zero_stores(caplog, empty_data_context): project_dir = empty_data_context.root_directory context = DataContext(project_dir) context._project_config.stores = {} context._save_project_config() runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, "store list -d {}".format(project_dir), catch_exceptions=False, ) assert result.exit_code == 1 assert ( "Your configuration file is not a valid yml file likely due to a yml syntax error" in result.output.strip()) assert_no_logging_messages_or_tracebacks(caplog, result)