def test_standalone_sqlalchemy_datasource(test_db_connection_string, sa): datasource = SqlAlchemyDatasource( "SqlAlchemy", connection_string=test_db_connection_string, echo=False, batch_kwargs_generators={ "default": { "class_name": "TableBatchKwargsGenerator" } }, ) assert set( datasource.get_available_data_asset_names()["default"]["names"]) == { ("main.table_1", "table"), ("main.table_2", "table"), } batch_kwargs = datasource.build_batch_kwargs("default", "main.table_1") batch = datasource.get_batch(batch_kwargs=batch_kwargs) assert isinstance(batch, Batch) batch_data = batch.data assert isinstance( batch_data, great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyBatchReference, ) dataset = SqlAlchemyDataset(**batch.data.get_init_kwargs()) assert len(dataset.head(10)) == 5
def test_sqlalchemy_source_limit(sqlitedb_engine): df1 = pd.DataFrame({ 'col_1': [1, 2, 3, 4, 5], 'col_2': ['a', 'b', 'c', 'd', 'e'] }) df2 = pd.DataFrame({ 'col_1': [0, 1, 2, 3, 4], 'col_2': ['b', 'c', 'd', 'e', 'f'] }) df1.to_sql('table_1', con=sqlitedb_engine, index=True) df2.to_sql('table_2', con=sqlitedb_engine, index=True, schema='main') datasource = SqlAlchemyDatasource('SqlAlchemy', engine=sqlitedb_engine) limited_batch = datasource.get_batch({ "table": "table_1", "limit": 1, "offset": 2 }) assert isinstance(limited_batch, Batch) limited_dataset = Validator( limited_batch, expectation_suite=ExpectationSuite("test"), expectation_engine=SqlAlchemyDataset).get_dataset() assert limited_dataset._table.name.startswith( "ge_tmp_") # we have generated a temporary table assert len(limited_dataset.head(10)) == 1 # and it is only one row long assert limited_dataset.head( 10)['col_1'][0] == 3 # offset should have been applied
def test_sqlalchemy_source_templating(sqlitedb_engine, empty_data_context): context: DataContext = empty_data_context datasource = SqlAlchemyDatasource( engine=sqlitedb_engine, batch_kwargs_generators={ "foo": { "class_name": "QueryBatchKwargsGenerator" } }, ) generator = datasource.get_batch_kwargs_generator("foo") generator.add_query(data_asset_name="test", query="select 'cat' as ${col_name};") batch = datasource.get_batch( generator.build_batch_kwargs( "test", query_parameters={"col_name": "animal_name"})) dataset = BridgeValidator( batch, expectation_suite=ExpectationSuite("test", data_context=context), expectation_engine=SqlAlchemyDataset, ).get_dataset() res = dataset.expect_column_to_exist("animal_name") assert res.success is True res = dataset.expect_column_values_to_be_in_set("animal_name", ["cat"]) assert res.success is True
def test_standalone_sqlalchemy_datasource(test_db_connection_string): datasource = SqlAlchemyDatasource( 'SqlAlchemy', connection_string=test_db_connection_string, echo=False) assert datasource.get_available_data_asset_names() == {"default": {"main.table_1", "main.table_2"}} dataset1 = datasource.get_batch("main.table_1") dataset2 = datasource.get_batch("main.table_2") assert isinstance(dataset1, SqlAlchemyDataset) assert isinstance(dataset2, SqlAlchemyDataset)
def test_query_generator_view(sqlite_view_engine): datasource = SqlAlchemyDatasource( engine=sqlite_view_engine, batch_kwargs_generators={"table": {"class_name": "TableBatchKwargsGenerator"}}, ) # Build a datasource with a queries generator to introspect our database with a view names = set(datasource.get_available_data_asset_names()["table"]["names"]) # We should see both the table *and* the primary view, but *not* the temp view assert names == {("main.test_table", "table"), ("main.test_view", "view")}
def test_sqlalchemy_source_limit(sqlitedb_engine): df1 = pd.DataFrame({ 'col_1': [1, 2, 3, 4, 5], 'col_2': ['a', 'b', 'c', 'd', 'e'] }) df2 = pd.DataFrame({ 'col_1': [0, 1, 2, 3, 4], 'col_2': ['b', 'c', 'd', 'e', 'f'] }) df1.to_sql('table_1', con=sqlitedb_engine, index=True) df2.to_sql('table_2', con=sqlitedb_engine, index=True, schema='main') datasource = SqlAlchemyDatasource('SqlAlchemy', engine=sqlitedb_engine) limited_dataset = datasource.get_data_asset("table_1", "default", limit=1, offset=2) assert isinstance(limited_dataset, SqlAlchemyDataset) assert limited_dataset._table.name.startswith( "ge_tmp_") # we have generated a temporary table assert len(limited_dataset.head(10)) == 1 # and it is only one row long assert limited_dataset.head( 10)['col_1'][0] == 3 # offset should have been applied