def test_suite_edit_multiple_datasources_with_generator_with_no_additional_args_with_suite_without_citations( caplog, site_builder_data_context_with_html_store_titanic_random, ): """ Here we verify that the "suite edit" command helps the user to specify the batch kwargs when it is called without the optional arguments that specify the batch. First, we call the "suite new" command to create the expectation suite our test will edit - this step is a just a setup. We call the "suite edit" command without any optional arguments. This means that the command will help us specify the batch kwargs interactively. The data context has two datasources - we choose one of them. It has a generator configured. We choose to use the generator and select a generator asset from the list. """ root_dir = site_builder_data_context_with_html_store_titanic_random.root_directory os.chdir(root_dir) runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["suite", "new", "-d", root_dir, "--suite", "foo_suite", "--no-view"], input="2\n1\n1\n\n", catch_exceptions=False, ) stdout = result.stdout assert result.exit_code == 0 assert "A new Expectation suite 'foo_suite' was added to your project" in stdout # remove the citations from the suite context = DataContext(root_dir) suite = context.get_expectation_suite("foo_suite") suite.meta.pop("citations") context.save_expectation_suite(suite) runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["suite", "edit", "foo_suite", "-d", root_dir, "--no-jupyter"], input="2\n1\n1\n\n", catch_exceptions=False, ) assert result.exit_code == 0 stdout = result.stdout assert "Select a datasource" in stdout assert "Which data would you like to use" in stdout assert "To continue editing this suite, run" in stdout expected_notebook_path = os.path.join(root_dir, "uncommitted", "foo_suite.ipynb") assert os.path.isfile(expected_notebook_path) expected_suite_path = os.path.join(root_dir, "expectations", "foo_suite.json") assert os.path.isfile(expected_suite_path) assert_no_logging_messages_or_tracebacks(caplog, result)
def load_expectation_suite(context: DataContext, suite_name: str) -> ExpectationSuite: """ Load an expectation suite from a given context. Handles a suite name with or without `.json` """ if suite_name.endswith(".json"): suite_name = suite_name[:-5] try: suite = context.get_expectation_suite(suite_name) return suite except ge_exceptions.DataContextError as e: cli_message( f"<red>Could not find a suite named `{suite_name}`.</red> Please check " "the name by running `great_expectations suite list` and try again." ) logger.info(e) # TODO this should try to send a usage statistic failure sys.exit(1)
def load_expectation_suite( # TODO consolidate all the myriad CLI tests into this context: DataContext, suite_name: str, usage_event: str, ) -> ExpectationSuite: """ Load an expectation suite from a given context. Handles a suite name with or without `.json` :param usage_event: """ if suite_name.endswith(".json"): suite_name = suite_name[:-5] try: suite = context.get_expectation_suite(suite_name) return suite except ge_exceptions.DataContextError as e: exit_with_failure_message_and_stats( context, usage_event, f"<red>Could not find a suite named `{suite_name}`.</red> Please check " "the name by running `great_expectations suite list` and try again.", )
def test_suite_new_empty_with_no_jupyter( mock_webbroser, mock_subprocess, caplog, data_context_parameterized_expectation_suite, filesystem_csv_2, ): """ Running "suite new --no-jupyter" should: - make an empty suite - NOT open jupyter - NOT open data docs """ os.mkdir( os.path.join( data_context_parameterized_expectation_suite.root_directory, "uncommitted" ) ) root_dir = data_context_parameterized_expectation_suite.root_directory runner = CliRunner(mix_stderr=False) csv = os.path.join(filesystem_csv_2, "f1.csv") # TODO this test must be updated to remove the --empty flag in the next major release result = runner.invoke( cli, ["suite", "new", "-d", root_dir, "--empty", "--suite", "foo", "--no-jupyter"], input=f"{csv}\n", catch_exceptions=False, ) stdout = result.stdout assert result.exit_code == 0 assert "Enter the path" in stdout assert "Name the new expectation suite" not in stdout assert ( "Great Expectations will choose a couple of columns and generate expectations" not in stdout ) assert "Generating example Expectation Suite..." not in stdout assert "The following Data Docs sites were built" not in stdout assert ( "Great Expectations will create a new Expectation Suite 'foo' and store it here" in stdout ) assert "open a notebook for you now" not in stdout expected_suite_path = os.path.join(root_dir, "expectations", "foo.json") assert os.path.isfile(expected_suite_path) expected_notebook = os.path.join(root_dir, "uncommitted", "edit_foo.ipynb") assert os.path.isfile(expected_notebook) context = DataContext(root_dir) assert "foo" in context.list_expectation_suite_names() suite = context.get_expectation_suite("foo") assert suite.expectations == [] citations = suite.get_citations() citations[0].pop("citation_date") assert citations[0] == { "batch_kwargs": { "data_asset_name": "f1", "datasource": "mydatasource", "path": csv, "reader_method": "read_csv", }, "batch_markers": None, "batch_parameters": None, "comment": "New suite added via CLI", } assert mock_subprocess.call_count == 0 assert mock_webbroser.call_count == 0 assert_no_logging_messages_or_tracebacks(caplog, result)
def test_suite_edit_one_datasources_no_generator_with_no_additional_args_and_no_citations( mock_webbrowser, mock_subprocess, caplog, empty_data_context, filesystem_csv_2 ): """ Here we verify that the "suite edit" command helps the user to specify the batch kwargs when it is called without the optional arguments that specify the batch. First, we call the "suite new" command to create the expectation suite our test will edit - this step is a just a setup. We call the "suite edit" command without any optional arguments. This means that the command will help us specify the batch kwargs interactively. The data context has one datasource. The datasource has no generators configured. The command prompts us to enter the file path. """ empty_data_context.add_datasource( "my_datasource", module_name="great_expectations.datasource", class_name="PandasDatasource", ) not_so_empty_data_context = empty_data_context project_root_dir = not_so_empty_data_context.root_directory root_dir = project_root_dir os.chdir(root_dir) runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["suite", "demo", "-d", root_dir], input="{0:s}\nmy_new_suite\n\n".format( os.path.join(filesystem_csv_2, "f1.csv") ), catch_exceptions=False, ) stdout = result.stdout assert mock_webbrowser.call_count == 1 assert mock_subprocess.call_count == 0 mock_subprocess.reset_mock() mock_webbrowser.reset_mock() assert result.exit_code == 0 assert ( "Great Expectations will store these expectations in a new Expectation Suite 'my_new_suite' here:" in stdout ) # remove the citations from the suite context = DataContext(project_root_dir) suite = context.get_expectation_suite("my_new_suite") suite.meta.pop("citations") context.save_expectation_suite(suite) runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["suite", "edit", "my_new_suite", "-d", root_dir], input="{0:s}\n\n".format(os.path.join(filesystem_csv_2, "f1.csv")), catch_exceptions=False, ) assert result.exit_code == 0 stdout = result.stdout assert "Select a datasource" not in stdout assert "Which data would you like to use" not in stdout assert "Enter the path" in stdout expected_notebook_path = os.path.join( root_dir, "uncommitted", "edit_my_new_suite.ipynb" ) assert os.path.isfile(expected_notebook_path) expected_suite_path = os.path.join(root_dir, "expectations", "my_new_suite.json") assert os.path.isfile(expected_suite_path) assert mock_webbrowser.call_count == 0 assert mock_subprocess.call_count == 1 assert_no_logging_messages_or_tracebacks(caplog, result)
def test_notebook_execution_with_pandas_backend( titanic_data_context_no_data_docs): """ This tests that the notebook is written to disk and executes without error. To set this test up we: - create a scaffold notebook - verify that no validations have happened We then: - execute that notebook (Note this will raise various errors like CellExecutionError if any cell in the notebook fails - create a new context from disk - verify that a validation has been run with our expectation suite """ # Since we'll run the notebook, we use a context with no data docs to avoid # the renderer's default behavior of building and opening docs, which is not # part of this test. context = titanic_data_context_no_data_docs root_dir = context.root_directory uncommitted_dir = os.path.join(root_dir, "uncommitted") suite_name = "my_suite" suite = context.create_expectation_suite(suite_name) csv_path = os.path.join(root_dir, "..", "data", "Titanic.csv") batch_kwargs = {"datasource": "mydatasource", "path": csv_path} # Sanity check test setup assert context.list_expectation_suite_names() == [suite_name] assert context.list_datasources() == [{ "module_name": "great_expectations.datasource", "class_name": "PandasDatasource", "data_asset_type": { "module_name": "great_expectations.dataset", "class_name": "PandasDataset", }, "batch_kwargs_generators": { "mygenerator": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": "../data", } }, "name": "mydatasource", }] assert context.get_validation_result(suite_name) == {} notebook_path = os.path.join(uncommitted_dir, f"{suite_name}.ipynb") assert not os.path.isfile(notebook_path) # Create notebook renderer = SuiteScaffoldNotebookRenderer(titanic_data_context_no_data_docs, suite, batch_kwargs) renderer.render_to_disk(notebook_path) assert os.path.isfile(notebook_path) with open(notebook_path) as f: nb = nbformat.read(f, as_version=4) # Run notebook ep = ExecutePreprocessor(timeout=600, kernel_name="python3") ep.preprocess(nb, {"metadata": {"path": uncommitted_dir}}) # Useful to inspect executed notebook output_notebook = os.path.join(uncommitted_dir, "output.ipynb") with open(output_notebook, "w") as f: nbformat.write(nb, f) # Assertions about output context = DataContext(root_dir) obs_validation_result = context.get_validation_result(suite_name) assert obs_validation_result.statistics == { "evaluated_expectations": 3, "successful_expectations": 3, "unsuccessful_expectations": 0, "success_percent": 100, } suite = context.get_expectation_suite(suite_name) assert suite.expectations
saves validation results to your results store and then updates Data Docs. This makes viewing validation results easy for you and your team. Usage: - Run this file: `python {0}`. - This can be run manually or via a scheduler such as cron. - If your pipeline runner supports python snippets you can paste this into your pipeline. """ import sys from great_expectations import DataContext # checkpoint configuration context = DataContext("{1}") suite = context.get_expectation_suite("{2}") # You can modify your BatchKwargs to select different data batch_kwargs = {3} # checkpoint validation process batch = context.get_batch(batch_kwargs, suite) results = context.run_validation_operator("action_list_operator", [batch]) if not results["success"]: print("Validation Failed!") sys.exit(1) print("Validation Succeeded!") sys.exit(0)
import sys from great_expectations import DataContext # checkpoint configuration context = DataContext("/home/demilsonfayika/staging/results") suite = context.get_expectation_suite("staging.validation") # You can modify your BatchKwargs to select different data batch_kwargs = { "table": "staging", "schema": "public", "datasource": "stagingtable" } # checkpoint validation process batch = context.get_batch(batch_kwargs, suite) results = context.run_validation_operator("action_list_operator", [batch]) if not results["success"]: print('{"result":"fail"}') sys.exit(0) print('{"result":"pass"}') sys.exit(0)
def test_suite_new_empty_suite_creates_empty_suite(mock_webbroser, mock_subprocess, caplog, data_context, filesystem_csv_2): """ Running "suite new --empty" should: - make an empty suite - open jupyter - NOT open data docs """ project_root_dir = data_context.root_directory os.mkdir(os.path.join(project_root_dir, "uncommitted")) root_dir = project_root_dir os.chdir(root_dir) runner = CliRunner(mix_stderr=False) csv = os.path.join(filesystem_csv_2, "f1.csv") result = runner.invoke( cli, ["suite", "new", "-d", root_dir, "--empty", "--suite", "foo"], input=f"{csv}\n", catch_exceptions=False, ) stdout = result.stdout assert result.exit_code == 0 assert "Enter the path" in stdout assert "Name the new expectation suite" not in stdout assert ( "Great Expectations will choose a couple of columns and generate expectations" not in stdout) assert "Generating example Expectation Suite..." not in stdout assert "The following Data Docs sites were built" not in stdout assert "A new Expectation suite 'foo' was added to your project" in stdout assert ( "Because you requested an empty suite, we'll open a notebook for you now to edit it!" in stdout) expected_suite_path = os.path.join(root_dir, "expectations", "foo.json") assert os.path.isfile(expected_suite_path) expected_notebook = os.path.join(root_dir, "uncommitted", "foo.ipynb") assert os.path.isfile(expected_notebook) context = DataContext(root_dir) assert "foo" in context.list_expectation_suite_names() suite = context.get_expectation_suite("foo") assert suite.expectations == [] citations = suite.get_citations() citations[0].pop("citation_date") assert citations[0] == { "batch_kwargs": { "datasource": "mydatasource", "path": csv, "reader_method": "read_csv", }, "batch_markers": None, "batch_parameters": None, "comment": "New suite added via CLI", } assert mock_subprocess.call_count == 1 call_args = mock_subprocess.call_args[0][0] assert call_args[0] == "jupyter" assert call_args[1] == "notebook" assert expected_notebook in call_args[2] assert mock_webbroser.call_count == 0 assert_no_logging_messages_or_tracebacks(caplog, result)
def test_cli_init_on_new_project_extra_whitespace_in_url( mock_webbrowser, caplog, tmp_path_factory, titanic_sqlite_db_file): project_dir = str(tmp_path_factory.mktemp("test_cli_init_diff")) ge_dir = os.path.join(project_dir, "great_expectations") database_path = os.path.join(project_dir, "titanic.db") shutil.copy(titanic_sqlite_db_file, database_path) engine = create_engine("sqlite:///{}".format(database_path)) engine_url_with_added_whitespace = " " + str(engine.url) + " " runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["init", "-d", project_dir], input="Y\n2\n5\ntitanic\n{}\n1\nwarning\n\n".format( engine_url_with_added_whitespace, catch_exceptions=False), ) stdout = result.output assert len(stdout) < 3000, "CLI output is unreasonably long." assert "Always know what to expect from your data" in stdout assert "What data would you like Great Expectations to connect to" in stdout assert "Which database backend are you using" in stdout assert "Give your new data source a short name" in stdout assert "What is the url/connection string for the sqlalchemy connection" in stdout assert "Attempting to connect to your database." in stdout assert "Great Expectations connected to your database" in stdout assert "Which table would you like to use?" in stdout assert "Name the new expectation suite [main.titanic.warning]" in stdout assert ( "Great Expectations will choose a couple of columns and generate expectations about them" in stdout) assert "Generating example Expectation Suite..." in stdout assert "Building" in stdout assert "Data Docs" in stdout assert "A new Expectation suite 'warning' was added to your project" in stdout assert "Great Expectations is now set up" in stdout context = DataContext(ge_dir) assert len(context.list_datasources()) == 1 assert context.list_datasources() == [{ "class_name": "SqlAlchemyDatasource", "name": "titanic" }] first_suite = context.list_expectation_suites()[0] suite = context.get_expectation_suite(first_suite.expectation_suite_name) assert len(suite.expectations) == 13 assert os.path.isdir(ge_dir) config_path = os.path.join(project_dir, "great_expectations/great_expectations.yml") assert os.path.isfile(config_path) config = yaml.load(open(config_path, "r")) data_source_class = config["datasources"]["titanic"]["data_asset_type"][ "class_name"] assert data_source_class == "SqlAlchemyDataset" assert_no_logging_messages_or_tracebacks(caplog, result) assert result.exit_code == 0 assert mock_webbrowser.call_count == 1 assert "{}/great_expectations/uncommitted/data_docs/local_site/validations/warning/".format( project_dir) in mock_webbrowser.call_args[0][0]
def test_notebook_execution_rule_based_profiler_with_pandas_backend( titanic_v013_multi_datasource_pandas_data_context_with_checkpoints_v1_with_empty_store_stats_enabled, bobby_columnar_table_multi_batch, ): """ To set this test up we: - create a suite using Rule-Based Profiler - verify that no validations have happened - create the suite edit notebook by hijacking the private cli method We then: - execute that notebook (Note this will raise various errors like CellExecutionError if any cell in the notebook fails - create a new context from disk - verify that a validation has been run with our expectation suite """ context: DataContext = titanic_v013_multi_datasource_pandas_data_context_with_checkpoints_v1_with_empty_store_stats_enabled root_dir: str = context.root_directory uncommitted_dir: str = os.path.join(root_dir, "uncommitted") expectation_suite_name: str = "warning" context.create_expectation_suite( expectation_suite_name=expectation_suite_name) batch_request: dict = { "datasource_name": "my_datasource", "data_connector_name": "my_basic_data_connector", "data_asset_name": "Titanic_1912", } # Sanity check test setup original_suite: ExpectationSuite = context.get_expectation_suite( expectation_suite_name=expectation_suite_name) assert len(original_suite.expectations) == 0 assert context.list_expectation_suite_names() == [expectation_suite_name] assert context.list_datasources() == [ { "name": "my_datasource", "class_name": "Datasource", "module_name": "great_expectations.datasource", "execution_engine": { "class_name": "PandasExecutionEngine", "module_name": "great_expectations.execution_engine", }, "data_connectors": { "my_basic_data_connector": { "module_name": "great_expectations.datasource.data_connector", "base_directory": f"{root_dir}/../data/titanic", "default_regex": { "pattern": "(.*)\\.csv", "group_names": ["data_asset_name"], }, "class_name": "InferredAssetFilesystemDataConnector", }, "my_special_data_connector": { "glob_directive": "*.csv", "assets": { "users": { "pattern": "(.+)_(\\d+)_(\\d+)\\.csv", "group_names": ["name", "timestamp", "size"], "class_name": "Asset", "base_directory": f"{root_dir}/../data/titanic", "module_name": "great_expectations.datasource.data_connector.asset", } }, "module_name": "great_expectations.datasource.data_connector", "base_directory": f"{root_dir}/../data/titanic", "default_regex": { "pattern": "(.+)\\.csv", "group_names": ["name"] }, "class_name": "ConfiguredAssetFilesystemDataConnector", }, "my_other_data_connector": { "glob_directive": "*.csv", "assets": { "users": { "class_name": "Asset", "module_name": "great_expectations.datasource.data_connector.asset", } }, "module_name": "great_expectations.datasource.data_connector", "base_directory": f"{root_dir}/../data/titanic", "default_regex": { "pattern": "(.+)\\.csv", "group_names": ["name"] }, "class_name": "ConfiguredAssetFilesystemDataConnector", }, "my_runtime_data_connector": { "module_name": "great_expectations.datasource.data_connector", "batch_identifiers": ["pipeline_stage_name", "airflow_run_id"], "class_name": "RuntimeDataConnector", }, }, }, { "name": "my_additional_datasource", "class_name": "Datasource", "module_name": "great_expectations.datasource", "execution_engine": { "module_name": "great_expectations.execution_engine", "class_name": "PandasExecutionEngine", }, "data_connectors": { "my_additional_data_connector": { "module_name": "great_expectations.datasource.data_connector", "default_regex": { "pattern": "(.*)\\.csv", "group_names": ["data_asset_name"], }, "base_directory": f"{root_dir}/../data/titanic", "class_name": "InferredAssetFilesystemDataConnector", } }, }, ] assert context.get_validation_result( expectation_suite_name="warning") == {} # Load profiler configs & loop (run tests for each one) yaml_config: str = bobby_columnar_table_multi_batch["profiler_config"] # Instantiate Profiler profiler_config: dict = yaml.load(yaml_config) # Roundtrip through schema validation to remove any illegal fields add/or restore any missing fields. deserialized_config: dict = ruleBasedProfilerConfigSchema.load( profiler_config) serialized_config: dict = ruleBasedProfilerConfigSchema.dump( deserialized_config) # `class_name`/`module_name` are generally consumed through `instantiate_class_from_config` # so we need to manually remove those values if we wish to use the **kwargs instantiation pattern serialized_config.pop("class_name") serialized_config.pop("module_name") profiler: RuleBasedProfiler = RuleBasedProfiler( **serialized_config, data_context=context, ) profiler_name: str = "bobby_user_workflow" context.save_profiler( profiler=profiler, name=profiler_name, ) # Create notebook # do not want to actually send usage_message, since the function call is not the result of actual usage _suite_edit_workflow( context=context, expectation_suite_name=expectation_suite_name, profile=True, profiler_name=profiler_name, usage_event="test_notebook_execution", interactive_mode=CLISuiteInteractiveFlagCombinations. UNPROMPTED_INTERACTIVE_FALSE_MANUAL_TRUE, no_jupyter=True, create_if_not_exist=False, datasource_name=None, batch_request=batch_request, additional_batch_request_args=None, suppress_usage_message=True, assume_yes=True, ) edit_notebook_path: str = os.path.join(uncommitted_dir, "edit_warning.ipynb") assert os.path.isfile(edit_notebook_path) run_notebook( notebook_path=edit_notebook_path, notebook_dir=uncommitted_dir, string_to_be_replaced= "context.open_data_docs(resource_identifier=validation_result_identifier)", replacement_string="", ) # Assertions about output context = DataContext(context_root_dir=root_dir) obs_validation_result: ExpectationSuiteValidationResult = ( context.get_validation_result(expectation_suite_name="warning")) assert obs_validation_result.statistics == { "evaluated_expectations": 13, "successful_expectations": 13, "unsuccessful_expectations": 0, "success_percent": 100.0, } expected_expectation_configurations: List[ExpectationConfiguration] = [ ExpectationConfiguration( **{ "meta": { "profiler_details": { "metric_configuration": { "domain_kwargs": {}, "metric_dependencies": None, "metric_name": "table.row_count", "metric_value_kwargs": None, }, "num_batches": 1, } }, "kwargs": { "max_value": 1313, "min_value": 1313 }, "expectation_type": "expect_table_row_count_to_be_between", }), ExpectationConfiguration( **{ "meta": { "profiler_details": { "metric_configuration": { "domain_kwargs": { "column": "Unnamed: 0" }, "metric_dependencies": None, "metric_name": "column.min", "metric_value_kwargs": None, }, "num_batches": 1, } }, "kwargs": { "column": "Unnamed: 0", "max_value": 1, "min_value": 1, "mostly": 1.0, }, "expectation_type": "expect_column_min_to_be_between", }), ExpectationConfiguration( **{ "meta": { "profiler_details": { "metric_configuration": { "domain_kwargs": { "column": "Unnamed: 0" }, "metric_dependencies": None, "metric_name": "column.max", "metric_value_kwargs": None, }, "num_batches": 1, } }, "kwargs": { "column": "Unnamed: 0", "max_value": 1313, "min_value": 1313, "mostly": 1.0, }, "expectation_type": "expect_column_max_to_be_between", }), ExpectationConfiguration( **{ "meta": { "profiler_details": { "metric_configuration": { "domain_kwargs": { "column": "Age" }, "metric_dependencies": None, "metric_name": "column.min", "metric_value_kwargs": None, }, "num_batches": 1, } }, "kwargs": { "column": "Age", "max_value": 0.17, "min_value": 0.17, "mostly": 1.0, }, "expectation_type": "expect_column_min_to_be_between", }), ExpectationConfiguration( **{ "meta": { "profiler_details": { "metric_configuration": { "domain_kwargs": { "column": "Age" }, "metric_dependencies": None, "metric_name": "column.max", "metric_value_kwargs": None, }, "num_batches": 1, } }, "kwargs": { "column": "Age", "max_value": 71.0, "min_value": 71.0, "mostly": 1.0, }, "expectation_type": "expect_column_max_to_be_between", }), ExpectationConfiguration( **{ "meta": { "profiler_details": { "metric_configuration": { "domain_kwargs": { "column": "Survived" }, "metric_dependencies": None, "metric_name": "column.min", "metric_value_kwargs": None, }, "num_batches": 1, } }, "kwargs": { "column": "Survived", "max_value": 0, "min_value": 0, "mostly": 1.0, }, "expectation_type": "expect_column_min_to_be_between", }), ExpectationConfiguration( **{ "meta": { "profiler_details": { "metric_configuration": { "domain_kwargs": { "column": "Survived" }, "metric_dependencies": None, "metric_name": "column.max", "metric_value_kwargs": None, }, "num_batches": 1, } }, "kwargs": { "column": "Survived", "max_value": 1, "min_value": 1, "mostly": 1.0, }, "expectation_type": "expect_column_max_to_be_between", }), ExpectationConfiguration( **{ "meta": { "profiler_details": { "metric_configuration": { "domain_kwargs": { "column": "SexCode" }, "metric_dependencies": None, "metric_name": "column.min", "metric_value_kwargs": None, }, "num_batches": 1, } }, "kwargs": { "column": "SexCode", "max_value": 0, "min_value": 0, "mostly": 1.0, }, "expectation_type": "expect_column_min_to_be_between", }), ExpectationConfiguration( **{ "meta": { "profiler_details": { "metric_configuration": { "domain_kwargs": { "column": "SexCode" }, "metric_dependencies": None, "metric_name": "column.max", "metric_value_kwargs": None, }, "num_batches": 1, } }, "kwargs": { "column": "SexCode", "max_value": 1, "min_value": 1, "mostly": 1.0, }, "expectation_type": "expect_column_max_to_be_between", }), ExpectationConfiguration( **{ "meta": {}, "kwargs": { "column": "PClass", "value_set": [ "*", "1st", "2nd", "3rd", ], }, "expectation_type": "expect_column_values_to_be_in_set", }), ExpectationConfiguration( **{ "meta": {}, "kwargs": { "column": "Sex", "value_set": ["female", "male"] }, "expectation_type": "expect_column_values_to_be_in_set", }), ExpectationConfiguration( **{ "meta": {}, "kwargs": { "column": "Survived", "value_set": [0, 1] }, "expectation_type": "expect_column_values_to_be_in_set", }), ExpectationConfiguration( **{ "meta": {}, "kwargs": { "column": "SexCode", "value_set": [0, 1] }, "expectation_type": "expect_column_values_to_be_in_set", }), ] suite: ExpectationSuite = context.get_expectation_suite( expectation_suite_name=expectation_suite_name) expectation_configurations: List[ExpectationConfiguration] = [] expectation_configuration: ExpectationConfiguration for expectation_configuration in suite.expectations: kwargs: dict = expectation_configuration.kwargs key: str value: Any kwargs = { key: sorted(value) if isinstance(value, (list, set, tuple)) else value for key, value in kwargs.items() } expectation_configuration.kwargs = kwargs expectation_configurations.append(expectation_configuration) assert expectation_configurations == expected_expectation_configurations
def test_notebook_execution_onboarding_data_assistant_pandas_backend( titanic_v013_multi_datasource_pandas_data_context_with_checkpoints_v1_with_empty_store_stats_enabled, ): """ To set this test up we: - create a suite using User-Configurable Profiler - verify that no validations have happened - create the suite edit notebook by hijacking the private cli method We then: - execute that notebook (Note this will raise various errors like CellExecutionError if any cell in the notebook fails - create a new context from disk - verify that a validation has been run with our expectation suite """ context: DataContext = titanic_v013_multi_datasource_pandas_data_context_with_checkpoints_v1_with_empty_store_stats_enabled root_dir: str = context.root_directory uncommitted_dir: str = os.path.join(root_dir, "uncommitted") expectation_suite_name: str = "warning" context.create_expectation_suite( expectation_suite_name=expectation_suite_name) batch_request: dict = { "datasource_name": "my_datasource", "data_connector_name": "my_basic_data_connector", "data_asset_name": "Titanic_1912", } # Sanity check test setup original_suite: ExpectationSuite = context.get_expectation_suite( expectation_suite_name=expectation_suite_name) assert len(original_suite.expectations) == 0 assert context.list_expectation_suite_names() == [expectation_suite_name] assert context.list_datasources() == [ { "name": "my_datasource", "class_name": "Datasource", "module_name": "great_expectations.datasource", "execution_engine": { "class_name": "PandasExecutionEngine", "module_name": "great_expectations.execution_engine", }, "data_connectors": { "my_basic_data_connector": { "module_name": "great_expectations.datasource.data_connector", "base_directory": f"{root_dir}/../data/titanic", "default_regex": { "pattern": "(.*)\\.csv", "group_names": ["data_asset_name"], }, "class_name": "InferredAssetFilesystemDataConnector", }, "my_special_data_connector": { "glob_directive": "*.csv", "assets": { "users": { "pattern": "(.+)_(\\d+)_(\\d+)\\.csv", "group_names": ["name", "timestamp", "size"], "class_name": "Asset", "base_directory": f"{root_dir}/../data/titanic", "module_name": "great_expectations.datasource.data_connector.asset", } }, "module_name": "great_expectations.datasource.data_connector", "base_directory": f"{root_dir}/../data/titanic", "default_regex": { "pattern": "(.+)\\.csv", "group_names": ["name"] }, "class_name": "ConfiguredAssetFilesystemDataConnector", }, "my_other_data_connector": { "glob_directive": "*.csv", "assets": { "users": { "class_name": "Asset", "module_name": "great_expectations.datasource.data_connector.asset", } }, "module_name": "great_expectations.datasource.data_connector", "base_directory": f"{root_dir}/../data/titanic", "default_regex": { "pattern": "(.+)\\.csv", "group_names": ["name"] }, "class_name": "ConfiguredAssetFilesystemDataConnector", }, "my_runtime_data_connector": { "module_name": "great_expectations.datasource.data_connector", "batch_identifiers": ["pipeline_stage_name", "airflow_run_id"], "class_name": "RuntimeDataConnector", }, }, }, { "name": "my_additional_datasource", "class_name": "Datasource", "module_name": "great_expectations.datasource", "execution_engine": { "module_name": "great_expectations.execution_engine", "class_name": "PandasExecutionEngine", }, "data_connectors": { "my_additional_data_connector": { "module_name": "great_expectations.datasource.data_connector", "default_regex": { "pattern": "(.*)\\.csv", "group_names": ["data_asset_name"], }, "base_directory": f"{root_dir}/../data/titanic", "class_name": "InferredAssetFilesystemDataConnector", } }, }, ] assert context.get_validation_result( expectation_suite_name="warning") == {} # Create notebook # do not want to actually send usage_message, since the function call is not the result of actual usage _suite_edit_workflow( context=context, expectation_suite_name=expectation_suite_name, profile=True, profiler_name=None, usage_event="test_notebook_execution", interactive_mode=CLISuiteInteractiveFlagCombinations. UNPROMPTED_INTERACTIVE_FALSE_MANUAL_TRUE, no_jupyter=True, create_if_not_exist=False, datasource_name=None, batch_request=batch_request, additional_batch_request_args=None, suppress_usage_message=True, assume_yes=True, ) edit_notebook_path: str = os.path.join(uncommitted_dir, "edit_warning.ipynb") assert os.path.isfile(edit_notebook_path) run_notebook( notebook_path=edit_notebook_path, notebook_dir=uncommitted_dir, string_to_be_replaced= "context.open_data_docs(resource_identifier=validation_result_identifier)", replacement_string="", ) # Assertions about output context = DataContext(context_root_dir=root_dir) obs_validation_result: ExpectationSuiteValidationResult = ( context.get_validation_result(expectation_suite_name="warning")) assert obs_validation_result.statistics == { "evaluated_expectations": 2, "successful_expectations": 2, "unsuccessful_expectations": 0, "success_percent": 100.0, } expected_expectation_configurations: List[ExpectationConfiguration] = [ ExpectationConfiguration( **{ "kwargs": { "max_value": 1313, "min_value": 1313 }, "expectation_type": "expect_table_row_count_to_be_between", "meta": { "profiler_details": { "metric_configuration": { "domain_kwargs": {}, "metric_dependencies": None, "metric_name": "table.row_count", "metric_value_kwargs": None, }, "num_batches": 1, } }, }), ExpectationConfiguration( **{ "kwargs": { "column_set": [ "Age", "Name", "PClass", "Sex", "SexCode", "Survived", "Unnamed: 0", ], "exact_match": None, }, "expectation_type": "expect_table_columns_to_match_set", "meta": { "profiler_details": { "success_ratio": 1.0 } }, }), ] suite: ExpectationSuite = context.get_expectation_suite( expectation_suite_name=expectation_suite_name) expectation_configurations: List[ExpectationConfiguration] = [] expectation_configuration: ExpectationConfiguration for expectation_configuration in suite.expectations: kwargs: dict = expectation_configuration.kwargs key: str value: Any kwargs = { key: sorted(value) if isinstance(value, (list, set, tuple)) else value for key, value in kwargs.items() } expectation_configuration.kwargs = kwargs expectation_configurations.append(expectation_configuration) assert expectation_configurations == expected_expectation_configurations columns_with_expectations: Set[str] expectations_from_suite: Set[str] ( columns_with_expectations, expectations_from_suite, ) = get_set_of_columns_and_expectations_from_suite(suite=suite) expected_expectations: Set[str] = { "expect_table_row_count_to_be_between", "expect_table_columns_to_match_set", } assert columns_with_expectations == set() assert expectations_from_suite == expected_expectations
pipeline. """ import sys from great_expectations import DataContext # checkpoint configuration context = DataContext("{1}") checkpoint = context.get_checkpoint("{0}") # load batches of data batches_to_validate = [] for batch in checkpoint["batches"]: batch_kwargs = batch["batch_kwargs"] for suite_name in batch["expectation_suite_names"]: suite = context.get_expectation_suite(suite_name) batch = context.get_batch(batch_kwargs, suite) batches_to_validate.append(batch) # run the validation operator results = context.run_validation_operator( checkpoint["validation_operator_name"], assets_to_validate=batches_to_validate, # TODO prepare for new RunID - checkpoint name and timestamp # run_id=RunID(checkpoint) ) # take action based on results if not results["success"]: print("Validation Failed!") sys.exit(1)
def test_suite_edit_multiple_datasources_with_generator_with_no_additional_args_with_suite_without_citations( mock_webbrowser, mock_subprocess, caplog, site_builder_data_context_with_html_store_titanic_random, ): """ Here we verify that the "suite edit" command helps the user to specify the batch kwargs when it is called without the optional arguments that specify the batch. First, we call the "suite new" command to create the expectation suite our test will edit - this step is a just a setup. We call the "suite edit" command without any optional arguments. This means that the command will help us specify the batch kwargs interactively. The data context has two datasources - we choose one of them. It has a generator configured. We choose to use the generator and select a generator asset from the list. The command should: - NOT open Data Docs - open jupyter """ root_dir = site_builder_data_context_with_html_store_titanic_random.root_directory os.chdir(root_dir) runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["suite", "demo", "-d", root_dir, "--suite", "foo_suite"], input="2\n1\n1\n\n", catch_exceptions=False, ) assert result.exit_code == 0 assert mock_webbrowser.call_count == 2 assert mock_subprocess.call_count == 0 mock_webbrowser.reset_mock() mock_subprocess.reset_mock() # remove the citations from the suite context = DataContext(root_dir) suite = context.get_expectation_suite("foo_suite") assert isinstance(suite, ExpectationSuite) suite.meta.pop("citations") context.save_expectation_suite(suite) # Actual testing really starts here runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["suite", "edit", "foo_suite", "-d", root_dir,], input="2\n1\n1\n\n", catch_exceptions=False, ) assert result.exit_code == 0 stdout = result.stdout assert "A batch of data is required to edit the suite" in stdout assert "Select a datasource" in stdout assert "Which data would you like to use" in stdout expected_notebook_path = os.path.join( root_dir, "uncommitted", "edit_foo_suite.ipynb" ) assert os.path.isfile(expected_notebook_path) expected_suite_path = os.path.join(root_dir, "expectations", "foo_suite.json") assert os.path.isfile(expected_suite_path) assert mock_webbrowser.call_count == 0 assert mock_subprocess.call_count == 1 assert_no_logging_messages_or_tracebacks(caplog, result)
def test_cli_init_on_new_project_extra_whitespace_in_url( mock_webbrowser, caplog, tmp_path_factory, titanic_sqlite_db_file, sa): project_dir = str(tmp_path_factory.mktemp("test_cli_init_diff")) ge_dir = os.path.join(project_dir, "great_expectations") database_path = os.path.join(project_dir, "titanic.db") shutil.copy(titanic_sqlite_db_file, database_path) engine = sa.create_engine(f"sqlite:///{database_path}", pool_recycle=3600) engine_url_with_added_whitespace = " " + str(engine.url) + " " inspector = sa.inspect(engine) # get the default schema and table for testing schemas = inspector.get_schema_names() default_schema = schemas[0] tables = [ table_name for table_name in inspector.get_table_names(schema=default_schema) ] default_table = tables[0] runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["init", "-d", project_dir], input= "\n\n2\n6\ntitanic\n{url}\n\n\n1\n{schema}\n{table}\nwarning\n\n\n\n". format( url=engine_url_with_added_whitespace, schema=default_schema, table=default_table, ), catch_exceptions=False, ) stdout = result.output assert len(stdout) < 6000, "CLI output is unreasonably long." assert "Always know what to expect from your data" in stdout assert "What data would you like Great Expectations to connect to" in stdout assert "Which database backend are you using" in stdout assert "Give your new Datasource a short name" in stdout assert "What is the url/connection string for the sqlalchemy connection" in stdout assert "Attempting to connect to your database." in stdout assert "Great Expectations connected to your database" in stdout assert ( "You have selected a datasource that is a SQL database. How would you like to specify the data?" in stdout) assert "Name the new Expectation Suite [main.titanic.warning]" in stdout assert ( "Great Expectations will choose a couple of columns and generate expectations about them" in stdout) assert "Generating example Expectation Suite..." in stdout assert "Building" in stdout assert "Data Docs" in stdout assert "Great Expectations is now set up" in stdout context = DataContext(ge_dir) assert len(context.list_datasources()) == 1 assert context.list_datasources() == [{ "class_name": "SqlAlchemyDatasource", "name": "titanic", "module_name": "great_expectations.datasource", "credentials": { "url": str(engine.url) }, "data_asset_type": { "class_name": "SqlAlchemyDataset", "module_name": "great_expectations.dataset", }, }] first_suite = context.list_expectation_suites()[0] suite = context.get_expectation_suite(first_suite.expectation_suite_name) assert len(suite.expectations) == 14 assert os.path.isdir(ge_dir) config_path = os.path.join(project_dir, "great_expectations/great_expectations.yml") assert os.path.isfile(config_path) config = yaml.load(open(config_path)) data_source_class = config["datasources"]["titanic"]["data_asset_type"][ "class_name"] assert data_source_class == "SqlAlchemyDataset" assert_no_logging_messages_or_tracebacks(caplog, result) assert result.exit_code == 0 assert mock_webbrowser.call_count == 1 assert ( "{}/great_expectations/uncommitted/data_docs/local_site/validations/warning/" .format(project_dir) in mock_webbrowser.call_args[0][0])
def test_suite_edit_multiple_datasources_with_generator_with_no_additional_args_with_suite_containing_citations( mock_webbrowser, mock_subprocess, caplog, site_builder_data_context_with_html_store_titanic_random, ): """ Here we verify that the "suite edit" command uses the batch kwargs found in citations in the existing suite when it is called without the optional arguments that specify the batch. First, we call the "suite new" command to create the expectation suite our test will edit - this step is a just a setup. We call the "suite edit" command without any optional arguments. The command should: - NOT open Data Docs - NOT open jupyter """ root_dir = site_builder_data_context_with_html_store_titanic_random.root_directory os.chdir(root_dir) runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["suite", "demo", "-d", root_dir, "--suite", "foo_suite"], input="2\n1\n1\n\n", catch_exceptions=False, ) assert mock_webbrowser.call_count == 2 assert mock_subprocess.call_count == 0 mock_subprocess.reset_mock() mock_webbrowser.reset_mock() assert result.exit_code == 0 context = DataContext(root_dir) suite = context.get_expectation_suite("foo_suite") assert isinstance(suite, ExpectationSuite) runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["suite", "edit", "foo_suite", "-d", root_dir], input="2\n1\n1\n\n", catch_exceptions=False, ) assert result.exit_code == 0 stdout = result.stdout assert "Select a datasource" not in stdout assert "Which data would you like to use" not in stdout expected_notebook_path = os.path.join( root_dir, "uncommitted", "edit_foo_suite.ipynb" ) assert os.path.isfile(expected_notebook_path) expected_suite_path = os.path.join(root_dir, "expectations", "foo_suite.json") assert os.path.isfile(expected_suite_path) assert mock_webbrowser.call_count == 0 assert mock_subprocess.call_count == 1 assert_no_logging_messages_or_tracebacks(caplog, result)
def test_cli_init_on_new_project(mock_webbrowser, caplog, tmp_path_factory, titanic_sqlite_db_file, sa): project_dir = str(tmp_path_factory.mktemp("test_cli_init_diff")) ge_dir = os.path.join(project_dir, "great_expectations") database_path = os.path.join(project_dir, "titanic.db") shutil.copy(titanic_sqlite_db_file, database_path) engine = sa.create_engine(f"sqlite:///{database_path}", pool_recycle=3600) inspector = sa.inspect(engine) # get the default schema and table for testing schemas = inspector.get_schema_names() default_schema = schemas[0] tables = [ table_name for table_name in inspector.get_table_names(schema=default_schema) ] default_table = tables[0] runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["init", "-d", project_dir], input= "\n\n2\n6\ntitanic\n{url}\n\n\n1\n{schema}\n{table}\nwarning\n\n\n\n". format(url=engine.url, schema=default_schema, table=default_table), catch_exceptions=False, ) stdout = result.output assert len(stdout) < 6000, "CLI output is unreasonably long." assert "Always know what to expect from your data" in stdout assert "What data would you like Great Expectations to connect to" in stdout assert "Which database backend are you using" in stdout assert "Give your new Datasource a short name" in stdout assert "What is the url/connection string for the sqlalchemy connection" in stdout assert "Attempting to connect to your database." in stdout assert "Great Expectations connected to your database" in stdout assert ( "You have selected a datasource that is a SQL database. How would you like to specify the data?" in stdout) assert "Name the new Expectation Suite [main.titanic.warning]" in stdout assert ( "Great Expectations will choose a couple of columns and generate expectations about them" in stdout) assert "Generating example Expectation Suite..." in stdout assert "Building" in stdout assert "Data Docs" in stdout assert "Great Expectations is now set up" in stdout context = DataContext(ge_dir) assert len(context.list_datasources()) == 1 assert context.list_datasources( )[0]["class_name"] == "SqlAlchemyDatasource" assert context.list_datasources()[0]["name"] == "titanic" first_suite = context.list_expectation_suites()[0] suite = context.get_expectation_suite(first_suite.expectation_suite_name) assert len(suite.expectations) == 14 assert os.path.isdir(ge_dir) config_path = os.path.join(project_dir, "great_expectations/great_expectations.yml") assert os.path.isfile(config_path) config = yaml.load(open(config_path)) data_source_class = config["datasources"]["titanic"]["data_asset_type"][ "class_name"] assert data_source_class == "SqlAlchemyDataset" # Profilers are v014+ specific os.rmdir(os.path.join(ge_dir, "profilers")) obs_tree = gen_directory_tree_str(ge_dir) # Instead of monkey patching guids, just regex out the guids guid_safe_obs_tree = re.sub(r"[a-z0-9]{32}(?=\.(json|html))", "foobarbazguid", obs_tree) # print(guid_safe_obs_tree) assert (guid_safe_obs_tree == """\ great_expectations/ .gitignore great_expectations.yml checkpoints/ expectations/ .ge_store_backend_id warning.json plugins/ custom_data_docs/ renderers/ styles/ data_docs_custom_styles.css views/ uncommitted/ config_variables.yml data_docs/ local_site/ index.html expectations/ warning.html static/ fonts/ HKGrotesk/ HKGrotesk-Bold.otf HKGrotesk-BoldItalic.otf HKGrotesk-Italic.otf HKGrotesk-Light.otf HKGrotesk-LightItalic.otf HKGrotesk-Medium.otf HKGrotesk-MediumItalic.otf HKGrotesk-Regular.otf HKGrotesk-SemiBold.otf HKGrotesk-SemiBoldItalic.otf images/ favicon.ico glossary_scroller.gif iterative-dev-loop.png logo-long-vector.svg logo-long.png short-logo-vector.svg short-logo.png validation_failed_unexpected_values.gif styles/ data_docs_custom_styles_template.css data_docs_default_styles.css validations/ warning/ 20190926T134241.000000Z/ 20190926T134241.000000Z/ foobarbazguid.html validations/ .ge_store_backend_id warning/ 20190926T134241.000000Z/ 20190926T134241.000000Z/ foobarbazguid.json """) assert_no_logging_messages_or_tracebacks(caplog, result) assert result.exit_code == 0 assert mock_webbrowser.call_count == 1 assert ( "{}/great_expectations/uncommitted/data_docs/local_site/validations/warning/" .format(project_dir) in mock_webbrowser.call_args[0][0])
def test_cli_init_on_new_project(mock_webbrowser, caplog, tmp_path_factory, titanic_sqlite_db_file): project_dir = str(tmp_path_factory.mktemp("test_cli_init_diff")) ge_dir = os.path.join(project_dir, "great_expectations") database_path = os.path.join(project_dir, "titanic.db") shutil.copy(titanic_sqlite_db_file, database_path) engine = create_engine("sqlite:///{}".format(database_path)) runner = CliRunner(mix_stderr=False) result = runner.invoke( cli, ["init", "-d", project_dir], input="Y\n2\n5\ntitanic\n{}\n1\nwarning\n\n".format( engine.url, catch_exceptions=False), ) stdout = result.output assert len(stdout) < 3000, "CLI output is unreasonably long." assert "Always know what to expect from your data" in stdout assert "What data would you like Great Expectations to connect to" in stdout assert "Which database backend are you using" in stdout assert "Give your new data source a short name" in stdout assert "What is the url/connection string for the sqlalchemy connection" in stdout assert "Attempting to connect to your database." in stdout assert "Great Expectations connected to your database" in stdout assert "Which table would you like to use?" in stdout assert "Name the new expectation suite [main.titanic.warning]" in stdout assert ( "Great Expectations will choose a couple of columns and generate expectations about them" in stdout) assert "Generating example Expectation Suite..." in stdout assert "Building" in stdout assert "Data Docs" in stdout assert "A new Expectation suite 'warning' was added to your project" in stdout assert "Great Expectations is now set up" in stdout context = DataContext(ge_dir) assert len(context.list_datasources()) == 1 assert context.list_datasources() == [{ "class_name": "SqlAlchemyDatasource", "name": "titanic" }] first_suite = context.list_expectation_suites()[0] suite = context.get_expectation_suite(first_suite.expectation_suite_name) assert len(suite.expectations) == 13 assert os.path.isdir(ge_dir) config_path = os.path.join(project_dir, "great_expectations/great_expectations.yml") assert os.path.isfile(config_path) config = yaml.load(open(config_path, "r")) data_source_class = config["datasources"]["titanic"]["data_asset_type"][ "class_name"] assert data_source_class == "SqlAlchemyDataset" obs_tree = gen_directory_tree_str(ge_dir) # Instead of monkey patching datetime, just regex out the time directories date_safe_obs_tree = re.sub(r"\d*T\d*\.\d*Z", "9999.9999", obs_tree) # Instead of monkey patching guids, just regex out the guids guid_safe_obs_tree = re.sub(r"[a-z0-9]{32}(?=\.(json|html))", "foobarbazguid", date_safe_obs_tree) assert (guid_safe_obs_tree == """\ great_expectations/ .gitignore great_expectations.yml expectations/ warning.json notebooks/ pandas/ validation_playground.ipynb spark/ validation_playground.ipynb sql/ validation_playground.ipynb plugins/ custom_data_docs/ renderers/ styles/ data_docs_custom_styles.css views/ uncommitted/ config_variables.yml data_docs/ local_site/ index.html expectations/ warning.html static/ fonts/ HKGrotesk/ HKGrotesk-Bold.otf HKGrotesk-BoldItalic.otf HKGrotesk-Italic.otf HKGrotesk-Light.otf HKGrotesk-LightItalic.otf HKGrotesk-Medium.otf HKGrotesk-MediumItalic.otf HKGrotesk-Regular.otf HKGrotesk-SemiBold.otf HKGrotesk-SemiBoldItalic.otf images/ favicon.ico glossary_scroller.gif iterative-dev-loop.png logo-long-vector.svg logo-long.png short-logo-vector.svg short-logo.png validation_failed_unexpected_values.gif styles/ data_docs_custom_styles_template.css data_docs_default_styles.css validations/ warning/ 9999.9999/ foobarbazguid.html validations/ warning/ 9999.9999/ foobarbazguid.json """) assert_no_logging_messages_or_tracebacks(caplog, result) assert result.exit_code == 0 assert mock_webbrowser.call_count == 1 assert "{}/great_expectations/uncommitted/data_docs/local_site/validations/warning/".format( project_dir) in mock_webbrowser.call_args[0][0]
import sys from great_expectations import DataContext # checkpoint configuration context = DataContext("/home/paulcrickard/peoplepipeline/great_expectations") suite = context.get_expectation_suite("people.validate") # You can modify your BatchKwargs to select different data batch_kwargs = { "path": "/home/paulcrickard/peoplepipeline/people.csv", "datasource": "files_datasource", "reader_method": "read_csv", } # checkpoint validation process batch = context.get_batch(batch_kwargs, suite) results = context.run_validation_operator("action_list_operator", [batch]) if not results["success"]: print('{"result":"fail"}') sys.exit(0) print('{"result":"pass"}') sys.exit(0)