def test_sqlalchemy_datasource_processes_dataset_options( test_db_connection_string): datasource = SqlAlchemyDatasource( "SqlAlchemy", credentials={"url": test_db_connection_string}) batch_kwargs = datasource.process_batch_parameters( dataset_options={"caching": False}) batch_kwargs["query"] = "select * from table_1;" batch = datasource.get_batch(batch_kwargs) validator = Validator(batch, ExpectationSuite(expectation_suite_name="foo")) dataset = validator.get_dataset() assert dataset.caching is False batch_kwargs = datasource.process_batch_parameters( dataset_options={"caching": True}) batch_kwargs["query"] = "select * from table_1;" batch = datasource.get_batch(batch_kwargs) validator = Validator(batch, ExpectationSuite(expectation_suite_name="foo")) dataset = validator.get_dataset() assert dataset.caching is True batch_kwargs = { "query": "select * from table_1;", "dataset_options": { "caching": False }, } batch = datasource.get_batch(batch_kwargs) validator = Validator(batch, ExpectationSuite(expectation_suite_name="foo")) dataset = validator.get_dataset() assert dataset.caching is False
def test_ExpectationsStore_with_DatabaseStoreBackend(): # Use sqlite so we don't require postgres for this test. connection_kwargs = {"drivername": "sqlite"} # First, demonstrate that we pick up default configuration my_store = ExpectationsStore(store_backend={ "class_name": "DatabaseStoreBackend", "credentials": connection_kwargs }) with pytest.raises(TypeError): my_store.get("not_a_ExpectationSuiteIdentifier") ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.warning")) my_store.set(ns_1, ExpectationSuite(expectation_suite_name="a.b.c.warning")) assert my_store.get(ns_1) == ExpectationSuite( expectation_suite_name="a.b.c.warning") ns_2 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.failure")) my_store.set(ns_2, ExpectationSuite(expectation_suite_name="a.b.c.failure")) assert my_store.get(ns_2) == ExpectationSuite( expectation_suite_name="a.b.c.failure") assert set(my_store.list_keys()) == { ns_1, ns_2, }
def test_expectation_summary_in_ExpectationSuitePageRenderer_render_expectation_suite_notes( ): result = ExpectationSuitePageRenderer._render_expectation_suite_notes( ExpectationSuite(expectation_suite_name="test", meta={}, expectations=None)) # print(RenderedContent.rendered_content_list_to_json(result.text)) assert RenderedContent.rendered_content_list_to_json(result.text) == [ 'This Expectation suite currently contains 0 total Expectations across 0 columns.' ] result = ExpectationSuitePageRenderer._render_expectation_suite_notes( ExpectationSuite( expectation_suite_name="test", meta={"notes": { "format": "markdown", "content": ["hi"] }})) # print(RenderedContent.rendered_content_list_to_json(result.text)) try: mistune.markdown("*test*") assert RenderedContent.rendered_content_list_to_json(result.text) == [ 'This Expectation suite currently contains 0 total Expectations across 0 columns.', { 'content_block_type': 'markdown', 'styling': { 'parent': {} }, 'markdown': 'hi' } ] except OSError: assert RenderedContent.rendered_content_list_to_json(result.text) == [ 'This Expectation suite currently contains 0 total Expectations across 0 columns.', 'hi', ] result = ExpectationSuitePageRenderer._render_expectation_suite_notes( ExpectationSuite( expectation_suite_name="test", meta={}, expectations=[ ExpectationConfiguration( expectation_type="expect_table_row_count_to_be_between", kwargs={ "min_value": 0, "max_value": None }), ExpectationConfiguration( expectation_type="expect_column_to_exist", kwargs={"column": "x"}), ExpectationConfiguration( expectation_type="expect_column_to_exist", kwargs={"column": "y"}) ])) # print(RenderedContent.rendered_content_list_to_json(result.text)[0]) assert RenderedContent.rendered_content_list_to_json( result.text )[0] == 'This Expectation suite currently contains 3 total Expectations across 2 columns.'
def validate_expectations(cls, df_ge: PandasDataset, specs: SchemaParserResult) -> defaultdict[list]: """ Validates the dynamic expectations from the schema via the great expectations library. """ invalid_elements = defaultdict(list) suite = ExpectationSuite( expectation_suite_name="custom_specifications") for column in specs.expectation_definitions.keys(): for expectation in specs.expectation_definitions[column]: kwargs_extended = dict(expectation['kwargs']) kwargs_extended['column'] = column suite.append_expectation( ExpectationConfiguration( expectation_type=expectation['expectation_type'], kwargs=kwargs_extended)) # noinspection PyTypeChecker result = df_ge.validate(expectation_suite=suite, result_format="BASIC") for expectation_result in result.results: if expectation_result.exception_info['raised_exception']: continue column_name = expectation_result.expectation_config.kwargs[ "column"] n_invalid = expectation_result.result['unexpected_count'] invalid_elements[column_name].append(n_invalid) return invalid_elements
def _profile(self, schema: dict, suite_name: str = None) -> ExpectationSuite: if not suite_name: raise ValueError( "Please provide a suite name when using this profiler.") expectations = [] # TODO add recursion to allow creation of suites for nested schema files if schema["type"] == JsonSchemaTypes.OBJECT.value: for key, details in schema["properties"].items(): expectations.append( self._create_existence_expectation(key, details)) type_expectation = self._create_type_expectation(key, details) if type_expectation: expectations.append(type_expectation) range_expectation = self._create_range_expectation( key, details) if range_expectation: expectations.append(range_expectation) boolean_expectation = self._create_boolean_expectation( key, details) if boolean_expectation: expectations.append(boolean_expectation) set_expectation = self._create_set_expectation(key, details) if set_expectation: expectations.append(set_expectation) string_len_expectation = self._create_string_length_expectation( key, details) if string_len_expectation: expectations.append(string_len_expectation) null_or_not_null_expectation = self._create_null_or_not_null_column_expectation( key, details) if null_or_not_null_expectation: expectations.append(null_or_not_null_expectation) description = schema.get("description", None) meta = None if description: meta = { "notes": { "format": "markdown", "content": [f"### Description:\n{description}"], } } suite = ExpectationSuite(suite_name, expectations=expectations, meta=meta) suite.add_citation( comment=f"This suite was built by the {self.__class__.__name__}", ) return suite
def test_ge_cloud_validator_updates_self_suite_with_ge_cloud_ids_on_save( mock_emit, mock_context_get_suite, mock_context_save_suite, multi_batch_taxi_validator_ge_cloud_mode, empty_data_context_stats_enabled, ): """ This checks that Validator in ge_cloud_mode properly updates underlying Expectation Suite on save. The multi_batch_taxi_validator_ge_cloud_mode fixture has a suite with a single expectation. :param mock_context_get_suite: Under normal circumstances, this would be ExpectationSuite object returned from GE Cloud :param mock_context_save_suite: Under normal circumstances, this would trigger post or patch to GE Cloud """ context: DataContext = empty_data_context_stats_enabled mock_suite = ExpectationSuite( expectation_suite_name="validating_taxi_data", expectations=[ ExpectationConfiguration( expectation_type="expect_column_values_to_be_between", kwargs={ "column": "passenger_count", "min_value": 0, "max_value": 99 }, meta={"notes": "This is an expectation."}, ge_cloud_id=UUID("0faf94a9-f53a-41fb-8e94-32f218d4a774"), ), ExpectationConfiguration( expectation_type="expect_column_values_to_be_between", kwargs={ "column": "trip_distance", "min_value": 11, "max_value": 22 }, meta={"notes": "This is an expectation."}, ge_cloud_id=UUID("3e8eee33-b425-4b36-a831-6e9dd31ad5af"), ), ], data_context=context, meta={"notes": "This is an expectation suite."}, ) mock_context_save_suite.return_value = True mock_context_get_suite.return_value = mock_suite multi_batch_taxi_validator_ge_cloud_mode.expect_column_values_to_be_between( column="trip_distance", min_value=11, max_value=22) multi_batch_taxi_validator_ge_cloud_mode.save_expectation_suite() assert (multi_batch_taxi_validator_ge_cloud_mode.get_expectation_suite(). to_json_dict() == mock_suite.to_json_dict()) # add_expectation() will not send usage_statistics event when called from a Validator assert mock_emit.call_count == 0 assert mock_emit.call_args_list == []
def test_sqlalchemy_source_limit(sqlitedb_engine): df1 = pd.DataFrame({ "col_1": [1, 2, 3, 4, 5], "col_2": ["a", "b", "c", "d", "e"] }) df2 = pd.DataFrame({ "col_1": [0, 1, 2, 3, 4], "col_2": ["b", "c", "d", "e", "f"] }) df1.to_sql("table_1", con=sqlitedb_engine, index=True) df2.to_sql("table_2", con=sqlitedb_engine, index=True, schema="main") datasource = SqlAlchemyDatasource("SqlAlchemy", engine=sqlitedb_engine) limited_batch = datasource.get_batch({ "table": "table_1", "limit": 1, "offset": 2 }) assert isinstance(limited_batch, Batch) limited_dataset = Validator( limited_batch, expectation_suite=ExpectationSuite("test"), expectation_engine=SqlAlchemyDataset, ).get_dataset() assert limited_dataset._table.name.startswith( "ge_tmp_") # we have generated a temporary table assert len(limited_dataset.head(10)) == 1 # and it is only one row long assert limited_dataset.head( 10)["col_1"][0] == 3 # offset should have been applied
def save_expectation_suite_usage_statistics( data_context: "DataContext", # noqa: F821 expectation_suite: ExpectationSuite, expectation_suite_name: Optional[str] = None, **kwargs, ) -> dict: try: data_context_id = data_context.data_context_id except AttributeError: data_context_id = None anonymizer = _anonymizers.get(data_context_id, None) if anonymizer is None: anonymizer = Anonymizer(data_context_id) _anonymizers[data_context_id] = anonymizer payload = {} if expectation_suite_name is None: if isinstance(expectation_suite, ExpectationSuite): expectation_suite_name = expectation_suite.expectation_suite_name elif isinstance(expectation_suite, dict): expectation_suite_name = expectation_suite.get("expectation_suite_name") # noinspection PyBroadException try: payload["anonymized_expectation_suite_name"] = anonymizer.anonymize( obj=expectation_suite_name ) except Exception as e: logger.debug( f"{UsageStatsExceptionPrefix.EMIT_EXCEPTION.value}: {e} type: {type(e)}, save_expectation_suite_usage_statistics: Unable to create anonymized_expectation_suite_name payload field" ) return payload
def save_expectation_suite( self, expectation_suite: ExpectationSuite, expectation_suite_name: Optional[str] = None, overwrite_existing: bool = True, **kwargs: Dict[str, Any], ): """Save the provided expectation suite into the DataContext. Args: expectation_suite: the suite to save expectation_suite_name: the name of this expectation suite. If no name is provided the name will \ be read from the suite overwrite_existing: bool setting whether to overwrite existing ExpectationSuite Returns: None """ if expectation_suite_name is None: key: ExpectationSuiteIdentifier = ExpectationSuiteIdentifier( expectation_suite_name=expectation_suite.expectation_suite_name ) else: expectation_suite.expectation_suite_name = expectation_suite_name key: ExpectationSuiteIdentifier = ExpectationSuiteIdentifier( expectation_suite_name=expectation_suite_name) if self.expectations_store.has_key(key) and not overwrite_existing: raise ge_exceptions.DataContextError( "expectation_suite with name {} already exists. If you would like to overwrite this " "expectation_suite, set overwrite_existing=True.".format( expectation_suite_name)) self._evaluation_parameter_dependencies_compiled = False return self.expectations_store.set(key, expectation_suite, **kwargs)
def suite_with_table_and_column_expectations( exp1, exp2, exp3, exp4, column_pair_expectation, table_exp1, table_exp2, table_exp3 ): suite = ExpectationSuite( expectation_suite_name="warning", expectations=[ exp1, exp2, exp3, exp4, column_pair_expectation, table_exp1, table_exp2, table_exp3, ], meta={"notes": "This is an expectation suite."}, ) assert suite.expectations == [ exp1, exp2, exp3, exp4, column_pair_expectation, table_exp1, table_exp2, table_exp3, ] return suite
def test_sqlalchemy_source_limit(sqlitedb_engine): df1 = pd.DataFrame({ 'col_1': [1, 2, 3, 4, 5], 'col_2': ['a', 'b', 'c', 'd', 'e'] }) df2 = pd.DataFrame({ 'col_1': [0, 1, 2, 3, 4], 'col_2': ['b', 'c', 'd', 'e', 'f'] }) df1.to_sql('table_1', con=sqlitedb_engine, index=True) df2.to_sql('table_2', con=sqlitedb_engine, index=True, schema='main') datasource = SqlAlchemyDatasource('SqlAlchemy', engine=sqlitedb_engine) limited_batch = datasource.get_batch({ "table": "table_1", "limit": 1, "offset": 2 }) assert isinstance(limited_batch, Batch) limited_dataset = Validator( limited_batch, expectation_suite=ExpectationSuite("test"), expectation_engine=SqlAlchemyDataset).get_dataset() assert limited_dataset._table.name.startswith( "ge_tmp_") # we have generated a temporary table assert len(limited_dataset.head(10)) == 1 # and it is only one row long assert limited_dataset.head( 10)['col_1'][0] == 3 # offset should have been applied
def test_golden_path_sql_datasource_configuration( sa, empty_data_context, test_connectable_postgresql_db): """Tests the golden path for setting up a StreamlinedSQLDatasource using test_yaml_config""" context = empty_data_context os.chdir(context.root_directory) # Everything below this line (except for asserts) is what we expect users to run as part of the golden path. import great_expectations as ge context = ge.get_context() db_hostname = os.getenv("GE_TEST_LOCAL_DB_HOSTNAME", "localhost") yaml_config = f""" class_name: SimpleSqlalchemyDatasource credentials: drivername: postgresql username: postgres password: "" host: {db_hostname} port: 5432 database: test_ci introspection: whole_table_with_limits: sampling_method: _sample_using_limit sampling_kwargs: n: 10 """ # noinspection PyUnusedLocal report_object = context.test_yaml_config( name="my_datasource", yaml_config=yaml_config, return_mode="report_object", ) print(json.dumps(report_object, indent=2)) print(context.datasources) my_batch = context.get_batch( "my_datasource", "whole_table_with_limits", "test_df", ) # assert len(my_batch.data.fetchall()) == 10 with pytest.raises(KeyError): my_batch = context.get_batch( "my_datasource", "whole_table_with_limits", "DOES_NOT_EXIST", ) my_validator = context.get_validator( datasource_name="my_datasource", data_connector_name="whole_table_with_limits", data_asset_name="test_df", expectation_suite=ExpectationSuite("my_expectation_suite"), ) my_evr = my_validator.expect_table_columns_to_match_set(column_set=[]) print(my_evr)
def get_expectation_suite( self, expectation_suite_name: Optional[str] = None, ge_cloud_id: Optional[str] = None, ) -> ExpectationSuite: """Get an Expectation Suite by name or GE Cloud ID Args: expectation_suite_name (str): the name for the Expectation Suite ge_cloud_id (str): the GE Cloud ID for the Expectation Suite Returns: expectation_suite """ key = GeCloudIdentifier( resource_type=GeCloudRESTResource.EXPECTATION_SUITE, ge_cloud_id=ge_cloud_id, ) if self.expectations_store.has_key(key): expectations_schema_dict: dict = cast( dict, self.expectations_store.get(key)) # create the ExpectationSuite from constructor return ExpectationSuite(**expectations_schema_dict, data_context=self) else: raise ge_exceptions.DataContextError( f"expectation_suite {expectation_suite_name} not found")
def create_expectation_suite( self, expectation_suite_name: str, overwrite_existing: bool = False, ge_cloud_id: Optional[str] = None, **kwargs: Optional[dict], ) -> ExpectationSuite: """Build a new expectation suite and save it into the data_context expectation store. Args: expectation_suite_name: The name of the expectation_suite to create overwrite_existing (boolean): Whether to overwrite expectation suite if expectation suite with given name already exists. Returns: A new (empty) expectation suite. """ if not isinstance(overwrite_existing, bool): raise ValueError( "Parameter overwrite_existing must be of type BOOL") expectation_suite: ExpectationSuite = ExpectationSuite( expectation_suite_name=expectation_suite_name, data_context=self) key = GeCloudIdentifier( resource_type=GeCloudRESTResource.EXPECTATION_SUITE, ge_cloud_id=ge_cloud_id, ) if self.expectations_store.has_key(key) and not overwrite_existing: raise ge_exceptions.DataContextError( "expectation_suite with GE Cloud ID {} already exists. If you would like to overwrite this " "expectation_suite, set overwrite_existing=True.".format( ge_cloud_id)) self.expectations_store.set(key, expectation_suite, **kwargs) return expectation_suite
def test_ExpectationsStore_with_DatabaseStoreBackend_postgres(caplog): connection_kwargs = { "drivername": "postgresql", "username": "******", "password": "", "host": "localhost", "port": "5432", "database": "test_ci", } # First, demonstrate that we pick up default configuration my_store = ExpectationsStore(store_backend={ "class_name": "DatabaseStoreBackend", "credentials": connection_kwargs, }) with pytest.raises(TypeError): my_store.get("not_a_ExpectationSuiteIdentifier") # first suite to add to db default_suite = ExpectationSuite( expectation_suite_name="a.b.c", meta={"test_meta_key": "test_meta_value"}, expectations=[], ) ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c")) # initial set and check if first suite exists my_store.set(ns_1, default_suite) assert my_store.get(ns_1) == ExpectationSuite( expectation_suite_name="a.b.c", meta={"test_meta_key": "test_meta_value"}, expectations=[], ) # update suite and check if new value exists updated_suite = ExpectationSuite( expectation_suite_name="a.b.c", meta={"test_meta_key": "test_new_meta_value"}, expectations=[], ) my_store.set(ns_1, updated_suite) assert my_store.get(ns_1) == ExpectationSuite( expectation_suite_name="a.b.c", meta={"test_meta_key": "test_new_meta_value"}, expectations=[], )
def empty_suite(): return ExpectationSuite( expectation_suite_name="warning", expectations=[], meta={ "notes": "This is an expectation suite." } )
def test_meta_version_warning(): asset = ge.data_asset.DataAsset() with pytest.warns(UserWarning) as w: out = asset.validate(expectation_suite=ExpectationSuite( expectations=[], expectation_suite_name="test", meta={})) assert w[0].message.args[ 0] == "WARNING: No great_expectations version found in configuration object." with pytest.warns(UserWarning) as w: out = asset.validate(expectation_suite=ExpectationSuite( expectations=[], expectation_suite_name="test", meta={"great_expectations.__version__": "0.0.0"})) assert w[0].message.args[0] == \ "WARNING: This configuration object was built using version 0.0.0 of great_expectations, but is currently "\ "being validated by version %s." % ge.__version__
def equivalent_suite(exp1, exp3): return ExpectationSuite( expectation_suite_name="danger", expectations=[exp1, exp3], meta={ "notes": "This is another expectation suite, with a different name and meta" })
def render(self, expectations): if isinstance(expectations, dict): expectations = ExpectationSuite(**expectations, data_context=None) ( columns, ordered_columns, ) = expectations.get_grouped_and_ordered_expectations_by_column() expectation_suite_name = expectations.expectation_suite_name overview_content_blocks = [ self._render_expectation_suite_header(), self._render_expectation_suite_info(expectations), ] table_level_expectations_content_block = self._render_table_level_expectations( columns) if table_level_expectations_content_block is not None: overview_content_blocks.append( table_level_expectations_content_block) asset_notes_content_block = self._render_expectation_suite_notes( expectations) if asset_notes_content_block is not None: overview_content_blocks.append(asset_notes_content_block) sections = [ RenderedSectionContent( **{ "section_name": "Overview", "content_blocks": overview_content_blocks, }) ] sections += [ self._column_section_renderer.render(expectations=columns[column]) for column in ordered_columns if column != "_nocolumn" ] return RenderedDocumentContent( **{ "renderer_type": "ExpectationSuitePageRenderer", "page_title": f"Expectations / {str(expectation_suite_name)}", "expectation_suite_name": expectation_suite_name, "utm_medium": "expectation-suite-page", "sections": sections, })
def self_check(self, pretty_print): return_obj = {} if pretty_print: print("Checking for existing keys...") return_obj["keys"] = self.list_keys() return_obj["len_keys"] = len(return_obj["keys"]) len_keys = return_obj["len_keys"] if pretty_print: if return_obj["len_keys"] == 0: print(f"\t{len_keys} keys found") else: print(f"\t{len_keys} keys found:") for key in return_obj["keys"][:10]: print("\t\t" + str(key)) if len_keys > 10: print("\t\t...") print() test_key_name = "test-key-" + "".join( [random.choice(list("0123456789ABCDEF")) for i in range(20)] ) test_key = self._key_class(test_key_name) test_value = ExpectationSuite(test_key_name) if pretty_print: print(f"Attempting to add a new test key: {test_key}...") self.set(key=test_key, value=test_value) if pretty_print: print("\tTest key successfully added.") print() if pretty_print: print( f"Attempting to retrieve the test value associated with key: {test_key}..." ) test_value = self.get( key=test_key, ) if pretty_print: print("\tTest value successfully retrieved.") print() if pretty_print: print(f"Cleaning up test key and value: {test_key}...") test_value = self.remove_key( # key=self.key_to_tuple(test_key), key=self.key_to_tuple(test_key), ) if pretty_print: print("\tTest key and value successfully removed.") print() return return_obj
def titanic_profiled_expectations_1(empty_data_context_stats_enabled): context: DataContext = empty_data_context_stats_enabled with open( file_relative_path( __file__, "./fixtures/BasicDatasetProfiler_expectations.json"), ) as infile: expectation_suite_dict: dict = expectationSuiteSchema.load( json.load(infile)) return ExpectationSuite(**expectation_suite_dict, data_context=context)
def titanic_dataset_profiler_expectations(empty_data_context_module_scoped): context: DataContext = empty_data_context_module_scoped with open( file_relative_path( __file__, "./fixtures/BasicDatasetProfiler_expectations.json"), ) as infile: expectations_dict: dict = expectationSuiteSchema.load( json.load(fp=infile, object_pairs_hook=OrderedDict)) return ExpectationSuite(**expectations_dict, data_context=context)
def get_or_create_expectation_suite( data_context: "BaseDataContext", # noqa: F821 expectation_suite: Optional["ExpectationSuite"] = None, # noqa: F821 expectation_suite_name: Optional[str] = None, component_name: Optional[str] = None, persist: bool = False, ) -> "ExpectationSuite": # noqa: F821 """ Use "expectation_suite" if provided. If not, then if "expectation_suite_name" is specified, then create "ExpectationSuite" from it. Otherwise, generate temporary "expectation_suite_name" using supplied "component_name". """ generate_temp_expectation_suite_name: bool create_expectation_suite: bool if expectation_suite is not None and expectation_suite_name is not None: if expectation_suite.expectation_suite_name != expectation_suite_name: raise ValueError( 'Mutually inconsistent "expectation_suite" and "expectation_suite_name" were specified.' ) return expectation_suite elif expectation_suite is None and expectation_suite_name is not None: generate_temp_expectation_suite_name = False create_expectation_suite = True elif expectation_suite is not None and expectation_suite_name is None: generate_temp_expectation_suite_name = False create_expectation_suite = False else: generate_temp_expectation_suite_name = True create_expectation_suite = True if generate_temp_expectation_suite_name: if not component_name: component_name = "test" expectation_suite_name = f"{TEMPORARY_EXPECTATION_SUITE_NAME_PREFIX}.{component_name}.{TEMPORARY_EXPECTATION_SUITE_NAME_STEM}.{str(uuid.uuid4())[:8]}" if create_expectation_suite: if persist: try: # noinspection PyUnusedLocal expectation_suite = data_context.get_expectation_suite( expectation_suite_name=expectation_suite_name) except ge_exceptions.DataContextError: expectation_suite = data_context.create_expectation_suite( expectation_suite_name=expectation_suite_name) logger.info( f'Created ExpectationSuite "{expectation_suite.expectation_suite_name}".' ) else: expectation_suite = ExpectationSuite( expectation_suite_name=expectation_suite_name, data_context=data_context, ) return expectation_suite
def parameterized_expectation_suite(empty_data_context_stats_enabled): context: DataContext = empty_data_context_stats_enabled fixture_path = file_relative_path( __file__, "../test_fixtures/expectation_suites/parameterized_expression_expectation_suite_fixture.json", ) with open(fixture_path, ) as suite: expectation_suite_dict: dict = expectationSuiteSchema.load( json.load(suite)) return ExpectationSuite(**expectation_suite_dict, data_context=context)
def test_data_asset_expectation_suite(): asset = DataAsset() default_suite = ExpectationSuite( expectation_suite_name="default", data_asset_type="DataAsset", meta={"great_expectations.__version__": ge_version}, expectations=[]) # We should have a default-initialized suite stored internally and available for getting assert asset._expectation_suite == default_suite assert asset.get_expectation_suite() == default_suite
def test_meta_version_warning(): asset = ge.data_asset.DataAsset() with pytest.warns(UserWarning) as w: suite = ExpectationSuite(expectations=[], expectation_suite_name="test") # mangle the metadata suite.meta = {"foo": "bar"} out = asset.validate(expectation_suite=suite) assert ( w[0].message.args[0] == "WARNING: No great_expectations version found in configuration object." ) with pytest.warns(UserWarning) as w: suite = ExpectationSuite( expectations=[], expectation_suite_name="test", meta={"great_expectations_version": "0.0.0"}, ) # mangle the metadata suite.meta = {"great_expectations_version": "0.0.0"} out = asset.validate(expectation_suite=suite) assert ( w[0].message.args[0] == "WARNING: This configuration object was built using version 0.0.0 of great_expectations, but is currently " "being validated by version %s." % ge.__version__)
def test_expectations_store(): my_store = ExpectationsStore() with pytest.raises(TypeError): my_store.set("not_a_ValidationResultIdentifier") ns_1 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.warning")) my_store.set(ns_1, ExpectationSuite(expectation_suite_name="a.b.c.warning")) assert my_store.get(ns_1) == ExpectationSuite( expectation_suite_name="a.b.c.warning") ns_2 = ExpectationSuiteIdentifier.from_tuple(tuple("a.b.c.failure")) my_store.set(ns_2, ExpectationSuite(expectation_suite_name="a.b.c.failure")) assert my_store.get(ns_2) == ExpectationSuite( expectation_suite_name="a.b.c.failure") assert set(my_store.list_keys()) == { ns_1, ns_2, }
def test_sqlalchemy_source_templating(sqlitedb_engine): datasource = SqlAlchemyDatasource(engine=sqlitedb_engine, generators={ "foo": { "class_name": "QueryBatchKwargsGenerator" } }) generator = datasource.get_generator("foo") generator.add_query("test", "select 'cat' as ${col_name};") batch = datasource.get_batch(generator.build_batch_kwargs("test", query_parameters={'col_name': "animal_name"})) dataset = Validator(batch, expectation_suite=ExpectationSuite("test"), expectation_engine=SqlAlchemyDataset).get_dataset() res = dataset.expect_column_to_exist("animal_name") assert res.success is True res = dataset.expect_column_values_to_be_in_set('animal_name', ['cat']) assert res.success is True
def test_pandas_datasource_processes_dataset_options(test_folder_connection_path): datasource = SparkDFDatasource('PandasCSV', generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": test_folder_connection_path } } ) batch_kwargs = datasource.build_batch_kwargs("subdir_reader", name="test") batch_kwargs["dataset_options"] = {"caching": False, "persist": False} batch = datasource.get_batch(batch_kwargs) validator = Validator(batch, ExpectationSuite(expectation_suite_name="foo")) dataset = validator.get_dataset() assert dataset.caching is False assert dataset._persist is False
def get_batch_kwargs(self, suite: ExpectationSuite, batch_kwargs: Union[dict, BatchKwargs]): if isinstance(batch_kwargs, dict): return self._fix_path_in_batch_kwargs(batch_kwargs) citations = suite.meta.get("citations") if not citations: return self._fix_path_in_batch_kwargs(batch_kwargs) citations = suite.get_citations(require_batch_kwargs=True) if not citations: return None citation = citations[-1] batch_kwargs = citation.get("batch_kwargs") return self._fix_path_in_batch_kwargs(batch_kwargs)