def test_sqlalchemy_datasource_processes_dataset_options( test_db_connection_string): datasource = SqlAlchemyDatasource( "SqlAlchemy", credentials={"url": test_db_connection_string}) batch_kwargs = datasource.process_batch_parameters( dataset_options={"caching": False}) batch_kwargs["query"] = "select * from table_1;" batch = datasource.get_batch(batch_kwargs) validator = BridgeValidator(batch, ExpectationSuite(expectation_suite_name="foo")) dataset = validator.get_dataset() assert dataset.caching is False batch_kwargs = datasource.process_batch_parameters( dataset_options={"caching": True}) batch_kwargs["query"] = "select * from table_1;" batch = datasource.get_batch(batch_kwargs) validator = BridgeValidator(batch, ExpectationSuite(expectation_suite_name="foo")) dataset = validator.get_dataset() assert dataset.caching is True batch_kwargs = { "query": "select * from table_1;", "dataset_options": { "caching": False }, } batch = datasource.get_batch(batch_kwargs) validator = BridgeValidator(batch, ExpectationSuite(expectation_suite_name="foo")) dataset = validator.get_dataset() assert dataset.caching is False
def _profile(self, schema: Dict, suite_name: str = None) -> ExpectationSuite: if not suite_name: raise ValueError( "Please provide a suite name when using this profiler.") expectations = [] # TODO add recursion to allow creation of suites for nested schema files if schema["type"] == JsonSchemaTypes.OBJECT.value: for key, details in schema["properties"].items(): expectations.append( self._create_existence_expectation(key, details)) type_expectation = self._create_type_expectation(key, details) if type_expectation: expectations.append(type_expectation) range_expectation = self._create_range_expectation( key, details) if range_expectation: expectations.append(range_expectation) boolean_expectation = self._create_boolean_expectation( key, details) if boolean_expectation: expectations.append(boolean_expectation) set_expectation = self._create_set_expectation(key, details) if set_expectation: expectations.append(set_expectation) string_len_expectation = self._create_string_length_expectation( key, details) if string_len_expectation: expectations.append(string_len_expectation) null_or_not_null_expectation = self._create_null_or_not_null_column_expectation( key, details) if null_or_not_null_expectation: expectations.append(null_or_not_null_expectation) description = schema.get("description", None) meta = None if description: meta = { "notes": { "format": "markdown", "content": [f"### Description:\n{description}"], } } suite = ExpectationSuite(suite_name, expectations=expectations, meta=meta) suite.add_citation( comment=f"This suite was built by the {self.__class__.__name__}", ) return suite
def add_citation_with_batch_request( data_context: DataContext, expectation_suite: ExpectationSuite, batch_request: Optional[Dict[str, Union[str, int, Dict[str, Any]]]] = None, ) -> None: if (expectation_suite is not None and batch_request and isinstance(batch_request, dict) and BatchRequest(**batch_request)): expectation_suite.add_citation( comment="Created suite added via CLI", batch_request=batch_request, ) data_context.save_expectation_suite( expectation_suite=expectation_suite)
def test_validate_with_invalid_result(empty_data_context): context: DataContext = empty_data_context with open( file_relative_path(__file__, "./test_sets/titanic_expectations.json") ) as f: my_expectation_suite_dict: dict = expectationSuiteSchema.loads(f.read()) my_expectation_suite: ExpectationSuite = ExpectationSuite( **my_expectation_suite_dict, data_context=context ) with mock.patch("uuid.uuid1") as uuid: uuid.return_value = "1234" my_df = ge.read_csv( file_relative_path(__file__, "./test_sets/Titanic.csv"), expectation_suite=my_expectation_suite, ) my_df.set_default_expectation_argument("result_format", "COMPLETE") results = my_df.validate() # catch_exceptions=True is default with open( file_relative_path( __file__, "./test_sets/titanic_expected_data_asset_validate_results_with_exceptions.json", ) ) as f: expected_results = expectationSuiteValidationResultSchema.loads(f.read()) del results.meta["great_expectations_version"] del results.meta["expectation_suite_meta"]["great_expectations_version"] for result in results.results: result.exception_info.pop("exception_traceback") assert results.to_json_dict() == expected_results.to_json_dict()
def suite_with_table_and_column_expectations(exp1, exp2, exp3, exp4, column_pair_expectation, table_exp1, table_exp2, table_exp3): suite = ExpectationSuite( expectation_suite_name="warning", expectations=[ exp1, exp2, exp3, exp4, column_pair_expectation, table_exp1, table_exp2, table_exp3, ], meta={"notes": "This is an expectation suite."}, ) assert suite.expectations == [ exp1, exp2, exp3, exp4, column_pair_expectation, table_exp1, table_exp2, table_exp3, ] return suite
def test_spark_datasource_processes_dataset_options( test_folder_connection_path_csv, test_backends, empty_data_context): context: DataContext = empty_data_context if "SparkDFDataset" not in test_backends: pytest.skip( "Spark has not been enabled, so this test must be skipped.") datasource = SparkDFDatasource( "PandasCSV", batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": test_folder_connection_path_csv, } }, ) batch_kwargs = datasource.build_batch_kwargs("subdir_reader", data_asset_name="test") batch_kwargs["dataset_options"] = {"caching": False, "persist": False} batch = datasource.get_batch(batch_kwargs) validator = BridgeValidator( batch, ExpectationSuite(expectation_suite_name="foo", data_context=context)) dataset = validator.get_dataset() assert dataset.caching is False assert dataset._persist is False
def suite_with_column_pair_and_table_expectations( table_exp1, table_exp2, table_exp3, column_pair_expectation, empty_data_context_stats_enabled, ): context: DataContext = empty_data_context_stats_enabled suite = ExpectationSuite( expectation_suite_name="warning", expectations=[ column_pair_expectation, table_exp1, table_exp2, table_exp3, ], meta={"notes": "This is an expectation suite."}, data_context=context, ) assert suite.expectations == [ column_pair_expectation, table_exp1, table_exp2, table_exp3, ] return suite
def test_suite_does_not_overwrite_existing_version_metadata(empty_data_context): context: DataContext = empty_data_context suite = ExpectationSuite( "foo", meta={"great_expectations_version": "0.0.0"}, data_context=context ) assert "great_expectations_version" in suite.meta.keys() assert suite.meta["great_expectations_version"] == "0.0.0"
def test_sqlalchemy_source_limit(sqlitedb_engine, empty_data_context): context: DataContext = empty_data_context df1 = pd.DataFrame({ "col_1": [1, 2, 3, 4, 5], "col_2": ["a", "b", "c", "d", "e"] }) df2 = pd.DataFrame({ "col_1": [0, 1, 2, 3, 4], "col_2": ["b", "c", "d", "e", "f"] }) df1.to_sql(name="table_1", con=sqlitedb_engine, index=True) df2.to_sql(name="table_2", con=sqlitedb_engine, index=True, schema="main") datasource = SqlAlchemyDatasource("SqlAlchemy", engine=sqlitedb_engine) limited_batch = datasource.get_batch({ "table": "table_1", "limit": 1, "offset": 2 }) assert isinstance(limited_batch, Batch) limited_dataset = BridgeValidator( limited_batch, expectation_suite=ExpectationSuite("test", data_context=context), expectation_engine=SqlAlchemyDataset, ).get_dataset() assert limited_dataset._table.name.startswith( "ge_temp_") # we have generated a temporary table assert len(limited_dataset.head(10)) == 1 # and it is only one row long assert limited_dataset.head( 10)["col_1"][0] == 3 # offset should have been applied
def test_sqlalchemy_source_templating(sqlitedb_engine, empty_data_context): context: DataContext = empty_data_context datasource = SqlAlchemyDatasource( engine=sqlitedb_engine, batch_kwargs_generators={ "foo": { "class_name": "QueryBatchKwargsGenerator" } }, ) generator = datasource.get_batch_kwargs_generator("foo") generator.add_query(data_asset_name="test", query="select 'cat' as ${col_name};") batch = datasource.get_batch( generator.build_batch_kwargs( "test", query_parameters={"col_name": "animal_name"})) dataset = BridgeValidator( batch, expectation_suite=ExpectationSuite("test", data_context=context), expectation_engine=SqlAlchemyDataset, ).get_dataset() res = dataset.expect_column_to_exist("animal_name") assert res.success is True res = dataset.expect_column_values_to_be_in_set("animal_name", ["cat"]) assert res.success is True
def different_suite(exp1, exp4, empty_data_context): context: DataContext = empty_data_context return ExpectationSuite( expectation_suite_name="warning", expectations=[exp1, exp4], meta={"notes": "This is an expectation suite."}, data_context=context, )
def equivalent_suite(exp1, exp3): return ExpectationSuite( expectation_suite_name="danger", expectations=[exp1, exp3], meta={ "notes": "This is another expectation suite, with a different name and meta" }, )
def baseline_suite(exp1, exp2, empty_data_context_stats_enabled): context: DataContext = empty_data_context_stats_enabled return ExpectationSuite( expectation_suite_name="warning", expectations=[exp1, exp2], meta={"notes": "This is an expectation suite."}, data_context=context, )
def test_anonymize_object_info_with_core_ge_object( anonymizer_with_consistent_salt: Anonymizer, ): anonymized_result: dict = anonymizer_with_consistent_salt._anonymize_object_info( anonymized_info_dict={}, object_=ExpectationSuite(expectation_suite_name="my_suite"), ) assert anonymized_result == {"parent_class": "ExpectationSuite"}
def domain_success_runtime_suite(exp1, exp2, exp3, exp4, exp5, empty_data_context_stats_enabled): context: DataContext = empty_data_context_stats_enabled return ExpectationSuite( expectation_suite_name="warning", expectations=[exp1, exp2, exp3, exp4, exp5], meta={"notes": "This is an expectation suite."}, data_context=context, )
def equivalent_suite(exp1, exp3, empty_data_context): context: DataContext = empty_data_context return ExpectationSuite( expectation_suite_name="danger", expectations=[exp1, exp3], meta={ "notes": "This is another expectation suite, with a different name and meta" }, data_context=context, )
def ge_cloud_suite(ge_cloud_id, exp1, exp2, exp3, empty_data_context_stats_enabled): context: DataContext = empty_data_context_stats_enabled for exp in (exp1, exp2, exp3): exp.ge_cloud_id = ge_cloud_id return ExpectationSuite( expectation_suite_name="warning", expectations=[exp1, exp2, exp3], meta={"notes": "This is an expectation suite."}, ge_cloud_id=ge_cloud_id, data_context=context, )
def test_data_asset_expectation_suite(): asset = DataAsset() default_suite = ExpectationSuite( expectation_suite_name="default", data_asset_type="DataAsset", meta={"great_expectations_version": ge_version}, expectations=[], ) # We should have a default-initialized suite stored internally and available for getting assert asset._expectation_suite == default_suite assert asset.get_expectation_suite() == default_suite
def test_meta_version_warning(): asset = ge.data_asset.DataAsset() with pytest.warns(UserWarning) as w: suite = ExpectationSuite(expectations=[], expectation_suite_name="test") # mangle the metadata suite.meta = {"foo": "bar"} out = asset.validate(expectation_suite=suite) assert ( w[0].message.args[0] == "WARNING: No great_expectations version found in configuration object." ) with pytest.warns(UserWarning) as w: suite = ExpectationSuite( expectations=[], expectation_suite_name="test", meta={"great_expectations_version": "0.0.0"}, ) # mangle the metadata suite.meta = {"great_expectations_version": "0.0.0"} out = asset.validate(expectation_suite=suite) assert ( w[0].message.args[0] == "WARNING: This configuration object was built using version 0.0.0 of great_expectations, but is currently " "being validated by version %s." % ge.__version__)
def run( self, variables: Optional[Dict[str, Any]] = None, rules: Optional[Dict[str, Dict[str, Any]]] = None, expectation_suite_name: Optional[str] = None, include_citation: bool = True, ) -> ExpectationSuite: """ Args: :param variables attribute name/value pairs (overrides) :param rules name/(configuration-dictionary) (overrides) :param expectation_suite_name: A name for returned Expectation suite. :param include_citation: Whether or not to include the Profiler config in the metadata for the ExpectationSuite produced by the Profiler :return: Set of rule evaluation results in the form of an ExpectationSuite """ effective_variables: Optional[ ParameterContainer] = self.reconcile_profiler_variables( variables=variables) effective_rules: List[Rule] = self.reconcile_profiler_rules( rules=rules) if expectation_suite_name is None: expectation_suite_name = ( f"tmp.profiler_{self.__class__.__name__}_suite_{str(uuid.uuid4())[:8]}" ) expectation_suite: ExpectationSuite = ExpectationSuite( expectation_suite_name=expectation_suite_name, data_context=self._data_context, ) if include_citation: expectation_suite.add_citation( comment= "Suite created by Rule-Based Profiler with the configuration included.", profiler_config=self._citation, ) rule: Rule for rule in effective_rules: expectation_configurations: List[ ExpectationConfiguration] = rule.generate( variables=effective_variables, ) expectation_configuration: ExpectationConfiguration for expectation_configuration in expectation_configurations: expectation_suite._add_expectation( expectation_configuration=expectation_configuration, send_usage_event=False, ) return expectation_suite
def single_expectation_suite_with_expectation_ge_cloud_id( exp1, empty_data_context_stats_enabled ): exp1_with_ge_cloud_id = deepcopy(exp1) exp1_with_ge_cloud_id.ge_cloud_id = UUID("0faf94a9-f53a-41fb-8e94-32f218d4a774") context: DataContext = empty_data_context_stats_enabled return ExpectationSuite( expectation_suite_name="warning", expectations=[exp1_with_ge_cloud_id], meta={"notes": "This is an expectation suite."}, data_context=context, )
def get_batch_kwargs(self, suite: ExpectationSuite, batch_kwargs: Union[dict, BatchKwargs]): if isinstance(batch_kwargs, dict): return self._fix_path_in_batch_kwargs(batch_kwargs) citations = suite.meta.get("citations") if not citations: return self._fix_path_in_batch_kwargs(batch_kwargs) citations = suite.get_citations(require_batch_kwargs=True) if not citations: return None citation = citations[-1] batch_kwargs = citation.get("batch_kwargs") return self._fix_path_in_batch_kwargs(batch_kwargs)
def test_pandas_datasource_processes_dataset_options(test_folder_connection_path_csv): datasource = PandasDatasource( "PandasCSV", batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": test_folder_connection_path_csv, } }, ) batch_kwargs = datasource.build_batch_kwargs( "subdir_reader", data_asset_name="test" ) batch_kwargs["dataset_options"] = {"caching": False} batch = datasource.get_batch(batch_kwargs) validator = BridgeValidator(batch, ExpectationSuite(expectation_suite_name="foo")) dataset = validator.get_dataset() assert dataset.caching is False
def test_validate_catch_non_existent_expectation(): df = ge.dataset.PandasDataset({"x": [1, 2, 3, 4, 5]}) validation_config_non_existent_expectation = ExpectationSuite( expectation_suite_name="default", meta={"great_expectations_version": ge.__version__}, expectations=[ ExpectationConfiguration( expectation_type="non_existent_expectation", kwargs={"column": "x"}) ], ) results = df.validate( expectation_suite=validation_config_non_existent_expectation) assert ("object has no attribute 'non_existent_expectation'" in results.results[0].exception_info["exception_message"])
def test_validate_catch_invalid_parameter(): df = ge.dataset.PandasDataset({"x": [1, 2, 3, 4, 5]}) validation_config_invalid_parameter = ExpectationSuite( expectation_suite_name="default", meta={"great_expectations_version": ge.__version__}, expectations=[ ExpectationConfiguration( expectation_type="expect_column_values_to_be_between", kwargs={"column": "x", "min_value": 6, "max_value": 5}, ) ], ) result = df.validate(expectation_suite=validation_config_invalid_parameter) assert ( "min_value cannot be greater than max_value" in result.results[0].exception_info["exception_message"] )
def test_validate_with_invalid_result_catch_exceptions_false(empty_data_context): context: DataContext = empty_data_context with open( file_relative_path(__file__, "./test_sets/titanic_expectations.json") ) as f: my_expectation_suite_dict: dict = expectationSuiteSchema.loads(f.read()) my_expectation_suite: ExpectationSuite = ExpectationSuite( **my_expectation_suite_dict, data_context=context ) with mock.patch("uuid.uuid1") as uuid: uuid.return_value = "1234" my_df = ge.read_csv( file_relative_path(__file__, "./test_sets/Titanic.csv"), expectation_suite=my_expectation_suite, ) my_df.set_default_expectation_argument("result_format", "COMPLETE") with pytest.raises(InvalidCacheValueError): with pytest.warns(Warning, match=r"No great_expectations version found"): my_df.validate(catch_exceptions=False)
def test_expectations_store(empty_data_context): context: DataContext = empty_data_context my_store = ExpectationsStore() with pytest.raises(TypeError): my_store.set("not_a_ValidationResultIdentifier") ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.warning")) my_store.set( ns_1, ExpectationSuite(expectation_suite_name="a.b.c.warning", data_context=context), ) ns_1_dict: dict = my_store.get(ns_1) ns_1_suite: ExpectationSuite = ExpectationSuite(**ns_1_dict, data_context=context) assert ns_1_suite == ExpectationSuite( expectation_suite_name="a.b.c.warning", data_context=context) ns_2 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.failure")) my_store.set( ns_2, ExpectationSuite(expectation_suite_name="a.b.c.failure", data_context=context), ) ns_2_dict: dict = my_store.get(ns_2) ns_2_suite: ExpectationSuite = ExpectationSuite(**ns_2_dict, data_context=context) assert ns_2_suite == ExpectationSuite( expectation_suite_name="a.b.c.failure", data_context=context) assert set(my_store.list_keys()) == { ns_1, ns_2, }
def test_batch_request_sql_with_schema( data_context_with_sql_data_connectors_including_schema_for_testing_get_batch, ): context: DataContext = ( data_context_with_sql_data_connectors_including_schema_for_testing_get_batch ) df_table_expected_my_first_data_asset: pd.DataFrame = pd.DataFrame({ "col_1": [1, 2, 3, 4, 5], "col_2": ["a", "b", "c", "d", "e"] }) df_table_expected_my_second_data_asset: pd.DataFrame = pd.DataFrame({ "col_1": [0, 1, 2, 3, 4], "col_2": ["b", "c", "d", "e", "f"] }) batch_request: dict validator: Validator df_table_actual: pd.DataFrame # Exercise RuntimeDataConnector using SQL query against database table with empty schema name. batch_request = { "datasource_name": "test_sqlite_db_datasource", "data_connector_name": "my_runtime_data_connector", "data_asset_name": "test_asset", "runtime_parameters": { "query": "SELECT * FROM table_1" }, "batch_identifiers": { "pipeline_stage_name": "core_processing", "airflow_run_id": 1234567890, }, } validator = context.get_validator( batch_request=RuntimeBatchRequest(**batch_request), expectation_suite=ExpectationSuite("my_expectation_suite", data_context=context), ) df_table_actual = validator.head(n_rows=0, fetch_all=True).drop(columns=["index"]) assert df_table_actual.equals(df_table_expected_my_first_data_asset) # Exercise RuntimeDataConnector using SQL query against database table with non-empty ("main") schema name. batch_request = { "datasource_name": "test_sqlite_db_datasource", "data_connector_name": "my_runtime_data_connector", "data_asset_name": "test_asset", "runtime_parameters": { "query": "SELECT * FROM main.table_2" }, "batch_identifiers": { "pipeline_stage_name": "core_processing", "airflow_run_id": 1234567890, }, } validator = context.get_validator( batch_request=RuntimeBatchRequest(**batch_request), expectation_suite=ExpectationSuite("my_expectation_suite", data_context=context), ) df_table_actual = validator.head(n_rows=0, fetch_all=True).drop(columns=["index"]) assert df_table_actual.equals(df_table_expected_my_second_data_asset) # Exercise InferredAssetSqlDataConnector using data_asset_name introspected with schema from table, named "table_1". batch_request = { "datasource_name": "test_sqlite_db_datasource", "data_connector_name": "my_inferred_data_connector", "data_asset_name": "main.table_1", } validator = context.get_validator( batch_request=BatchRequest(**batch_request), expectation_suite=ExpectationSuite("my_expectation_suite", data_context=context), ) df_table_actual = validator.head(n_rows=0, fetch_all=True).drop(columns=["index"]) assert df_table_actual.equals(df_table_expected_my_first_data_asset) # Exercise InferredAssetSqlDataConnector using data_asset_name introspected with schema from table, named "table_2". batch_request = { "datasource_name": "test_sqlite_db_datasource", "data_connector_name": "my_inferred_data_connector", "data_asset_name": "main.table_2", } validator = context.get_validator( batch_request=BatchRequest(**batch_request), expectation_suite=ExpectationSuite("my_expectation_suite", data_context=context), ) df_table_actual = validator.head(n_rows=0, fetch_all=True).drop(columns=["index"]) assert df_table_actual.equals(df_table_expected_my_second_data_asset) # Exercise ConfiguredAssetSqlDataConnector using data_asset_name corresponding to "table_1" (implicitly). batch_request = { "datasource_name": "test_sqlite_db_datasource", "data_connector_name": "my_configured_data_connector", "data_asset_name": "my_first_data_asset", } validator = context.get_validator( batch_request=BatchRequest(**batch_request), expectation_suite=ExpectationSuite("my_expectation_suite", data_context=context), ) df_table_actual = validator.head(n_rows=0, fetch_all=True).drop(columns=["index"]) assert df_table_actual.equals(df_table_expected_my_first_data_asset) # Exercise ConfiguredAssetSqlDataConnector using data_asset_name corresponding to "table_2" (implicitly). batch_request = { "datasource_name": "test_sqlite_db_datasource", "data_connector_name": "my_configured_data_connector", "data_asset_name": "my_second_data_asset", } validator = context.get_validator( batch_request=BatchRequest(**batch_request), expectation_suite=ExpectationSuite("my_expectation_suite", data_context=context), ) df_table_actual = validator.head(n_rows=0, fetch_all=True).drop(columns=["index"]) assert df_table_actual.equals(df_table_expected_my_second_data_asset) # Exercise ConfiguredAssetSqlDataConnector using data_asset_name corresponding to "table_1" (explicitly). batch_request = { "datasource_name": "test_sqlite_db_datasource", "data_connector_name": "my_configured_data_connector", "data_asset_name": "table_1", } validator = context.get_validator( batch_request=BatchRequest(**batch_request), expectation_suite=ExpectationSuite("my_expectation_suite", data_context=context), ) df_table_actual = validator.head(n_rows=0, fetch_all=True).drop(columns=["index"]) assert df_table_actual.equals(df_table_expected_my_first_data_asset) # Exercise ConfiguredAssetSqlDataConnector using data_asset_name corresponding to "table_2" (explicitly). batch_request = { "datasource_name": "test_sqlite_db_datasource", "data_connector_name": "my_configured_data_connector", "data_asset_name": "table_2", } validator = context.get_validator( batch_request=BatchRequest(**batch_request), expectation_suite=ExpectationSuite("my_expectation_suite", data_context=context), ) df_table_actual = validator.head(n_rows=0, fetch_all=True).drop(columns=["index"]) assert df_table_actual.equals(df_table_expected_my_second_data_asset)
def test_get_and_save_expectation_suite(tmp_path_factory): directory_name = str( tmp_path_factory.mktemp("test_get_and_save_expectation_config")) df = ge.dataset.PandasDataset({ "x": [1, 2, 4], "y": [1, 2, 5], "z": ["hello", "jello", "mello"], }) df.expect_column_values_to_be_in_set("x", [1, 2, 4]) df.expect_column_values_to_be_in_set("y", [1, 2, 4], catch_exceptions=True, include_config=True) df.expect_column_values_to_match_regex("z", "ello") ### First test set ### output_config = ExpectationSuite( expectations=[ ExpectationConfiguration( expectation_type="expect_column_values_to_be_in_set", kwargs={ "column": "x", "value_set": [1, 2, 4] }, ), ExpectationConfiguration( expectation_type="expect_column_values_to_match_regex", kwargs={ "column": "z", "regex": "ello" }, ), ], expectation_suite_name="default", data_asset_type="Dataset", meta={"great_expectations_version": ge.__version__}, ) assert output_config == df.get_expectation_suite() df.save_expectation_suite(directory_name + "/temp1.json") with open(directory_name + "/temp1.json") as infile: loaded_config = expectationSuiteSchema.loads(infile.read()) assert output_config == loaded_config ### Second test set ### output_config = ExpectationSuite( expectations=[ ExpectationConfiguration( expectation_type="expect_column_values_to_be_in_set", kwargs={ "column": "x", "value_set": [1, 2, 4] }, ), ExpectationConfiguration( expectation_type="expect_column_values_to_be_in_set", kwargs={ "column": "y", "value_set": [1, 2, 4] }, ), ExpectationConfiguration( expectation_type="expect_column_values_to_match_regex", kwargs={ "column": "z", "regex": "ello" }, ), ], expectation_suite_name="default", data_asset_type="Dataset", meta={"great_expectations_version": ge.__version__}, ) assert output_config == df.get_expectation_suite( discard_failed_expectations=False) df.save_expectation_suite(directory_name + "/temp2.json", discard_failed_expectations=False) with open(directory_name + "/temp2.json") as infile: loaded_suite = expectationSuiteSchema.loads(infile.read()) assert output_config == loaded_suite ### Third test set ### output_config = ExpectationSuite( expectations=[ ExpectationConfiguration( expectation_type="expect_column_values_to_be_in_set", kwargs={ "column": "x", "value_set": [1, 2, 4], "result_format": "BASIC", }, ), ExpectationConfiguration( expectation_type="expect_column_values_to_match_regex", kwargs={ "column": "z", "regex": "ello", "result_format": "BASIC" }, ), ], expectation_suite_name="default", data_asset_type="Dataset", meta={"great_expectations_version": ge.__version__}, ) assert output_config == df.get_expectation_suite( discard_result_format_kwargs=False, discard_include_config_kwargs=False, discard_catch_exceptions_kwargs=False, ) df.save_expectation_suite( directory_name + "/temp3.json", discard_result_format_kwargs=False, discard_include_config_kwargs=False, discard_catch_exceptions_kwargs=False, ) with open(directory_name + "/temp3.json") as infile: loaded_suite = expectationSuiteSchema.loads(infile.read()) assert output_config == loaded_suite
def test_ExpectationSuitePageRenderer_render_expectation_suite_notes( empty_data_context, ): context: DataContext = empty_data_context result = ExpectationSuitePageRenderer._render_expectation_suite_notes( ExpectationSuite(expectation_suite_name="test", meta={"notes": "*hi*"}, data_context=context)) # print(RenderedContent.rendered_content_list_to_json(result.text)) assert RenderedContent.rendered_content_list_to_json(result.text) == [ "This Expectation suite currently contains 0 total Expectations across 0 columns.", "*hi*", ] result = ExpectationSuitePageRenderer._render_expectation_suite_notes( ExpectationSuite( expectation_suite_name="test", meta={"notes": ["*alpha*", "_bravo_", "charlie"]}, data_context=context, )) # print(RenderedContent.rendered_content_list_to_json(result.text)) assert RenderedContent.rendered_content_list_to_json(result.text) == [ "This Expectation suite currently contains 0 total Expectations across 0 columns.", "*alpha*", "_bravo_", "charlie", ] result = ExpectationSuitePageRenderer._render_expectation_suite_notes( ExpectationSuite( expectation_suite_name="test", meta={ "notes": { "format": "string", "content": ["*alpha*", "_bravo_", "charlie"], } }, data_context=context, )) # print(RenderedContent.rendered_content_list_to_json(result.text)) assert RenderedContent.rendered_content_list_to_json(result.text) == [ "This Expectation suite currently contains 0 total Expectations across 0 columns.", "*alpha*", "_bravo_", "charlie", ] result = ExpectationSuitePageRenderer._render_expectation_suite_notes( ExpectationSuite( expectation_suite_name="test", meta={"notes": { "format": "markdown", "content": "*alpha*" }}, data_context=context, )) # print(RenderedContent.rendered_content_list_to_json(result.text)) try: mistune.markdown("*test*") assert RenderedContent.rendered_content_list_to_json(result.text) == [ "This Expectation suite currently contains 0 total Expectations across 0 columns.", { "content_block_type": "markdown", "styling": { "parent": {} }, "markdown": "*alpha*", }, ] except OSError: assert RenderedContent.rendered_content_list_to_json(result.text) == [ "This Expectation suite currently contains 0 total Expectations across 0 columns.", "*alpha*", ] result = ExpectationSuitePageRenderer._render_expectation_suite_notes( ExpectationSuite( expectation_suite_name="test", meta={ "notes": { "format": "markdown", "content": ["*alpha*", "_bravo_", "charlie"], } }, data_context=context, )) # print(RenderedContent.rendered_content_list_to_json(result.text)) try: mistune.markdown("*test*") assert RenderedContent.rendered_content_list_to_json(result.text) == [ "This Expectation suite currently contains 0 total Expectations across 0 columns.", { "content_block_type": "markdown", "styling": { "parent": {} }, "markdown": "*alpha*", }, { "content_block_type": "markdown", "styling": { "parent": {} }, "markdown": "_bravo_", }, { "content_block_type": "markdown", "styling": { "parent": {} }, "markdown": "charlie", }, ] except OSError: assert RenderedContent.rendered_content_list_to_json(result.text) == [ "This Expectation suite currently contains 0 total Expectations across 0 columns.", "*alpha*", "_bravo_", "charlie", ]