def test_standalone_sqlalchemy_datasource(test_db_connection_string, sa):
    datasource = SqlAlchemyDatasource(
        "SqlAlchemy",
        connection_string=test_db_connection_string,
        echo=False,
        batch_kwargs_generators={
            "default": {
                "class_name": "TableBatchKwargsGenerator"
            }
        },
    )

    assert set(
        datasource.get_available_data_asset_names()["default"]["names"]) == {
            ("main.table_1", "table"),
            ("main.table_2", "table"),
        }
    batch_kwargs = datasource.build_batch_kwargs("default", "main.table_1")
    batch = datasource.get_batch(batch_kwargs=batch_kwargs)
    assert isinstance(batch, Batch)
    batch_data = batch.data
    assert isinstance(
        batch_data,
        great_expectations.dataset.sqlalchemy_dataset.SqlAlchemyBatchReference,
    )
    dataset = SqlAlchemyDataset(**batch.data.get_init_kwargs())
    assert len(dataset.head(10)) == 5
Exemple #2
0
def test_sqlalchemy_source_limit(sqlitedb_engine):
    df1 = pd.DataFrame({
        'col_1': [1, 2, 3, 4, 5],
        'col_2': ['a', 'b', 'c', 'd', 'e']
    })
    df2 = pd.DataFrame({
        'col_1': [0, 1, 2, 3, 4],
        'col_2': ['b', 'c', 'd', 'e', 'f']
    })
    df1.to_sql('table_1', con=sqlitedb_engine, index=True)
    df2.to_sql('table_2', con=sqlitedb_engine, index=True, schema='main')
    datasource = SqlAlchemyDatasource('SqlAlchemy', engine=sqlitedb_engine)
    limited_batch = datasource.get_batch({
        "table": "table_1",
        "limit": 1,
        "offset": 2
    })
    assert isinstance(limited_batch, Batch)
    limited_dataset = Validator(
        limited_batch,
        expectation_suite=ExpectationSuite("test"),
        expectation_engine=SqlAlchemyDataset).get_dataset()
    assert limited_dataset._table.name.startswith(
        "ge_tmp_")  # we have generated a temporary table
    assert len(limited_dataset.head(10)) == 1  # and it is only one row long
    assert limited_dataset.head(
        10)['col_1'][0] == 3  # offset should have been applied
def test_sqlalchemy_source_templating(sqlitedb_engine, empty_data_context):
    context: DataContext = empty_data_context
    datasource = SqlAlchemyDatasource(
        engine=sqlitedb_engine,
        batch_kwargs_generators={
            "foo": {
                "class_name": "QueryBatchKwargsGenerator"
            }
        },
    )
    generator = datasource.get_batch_kwargs_generator("foo")
    generator.add_query(data_asset_name="test",
                        query="select 'cat' as ${col_name};")
    batch = datasource.get_batch(
        generator.build_batch_kwargs(
            "test", query_parameters={"col_name": "animal_name"}))
    dataset = BridgeValidator(
        batch,
        expectation_suite=ExpectationSuite("test", data_context=context),
        expectation_engine=SqlAlchemyDataset,
    ).get_dataset()
    res = dataset.expect_column_to_exist("animal_name")
    assert res.success is True
    res = dataset.expect_column_values_to_be_in_set("animal_name", ["cat"])
    assert res.success is True
def test_standalone_sqlalchemy_datasource(test_db_connection_string):
    datasource = SqlAlchemyDatasource(
        'SqlAlchemy', connection_string=test_db_connection_string, echo=False)

    assert datasource.get_available_data_asset_names() == {"default": {"main.table_1", "main.table_2"}}
    dataset1 = datasource.get_batch("main.table_1")
    dataset2 = datasource.get_batch("main.table_2")
    assert isinstance(dataset1, SqlAlchemyDataset)
    assert isinstance(dataset2, SqlAlchemyDataset)
def test_query_generator_view(sqlite_view_engine):
    datasource = SqlAlchemyDatasource(
        engine=sqlite_view_engine,
        batch_kwargs_generators={"table": {"class_name": "TableBatchKwargsGenerator"}},
    )  # Build a datasource with a queries generator to introspect our database with a view
    names = set(datasource.get_available_data_asset_names()["table"]["names"])

    # We should see both the table *and* the primary view, but *not* the temp view
    assert names == {("main.test_table", "table"), ("main.test_view", "view")}
Exemple #6
0
def test_sqlalchemy_source_limit(sqlitedb_engine):
    df1 = pd.DataFrame({
        'col_1': [1, 2, 3, 4, 5],
        'col_2': ['a', 'b', 'c', 'd', 'e']
    })
    df2 = pd.DataFrame({
        'col_1': [0, 1, 2, 3, 4],
        'col_2': ['b', 'c', 'd', 'e', 'f']
    })
    df1.to_sql('table_1', con=sqlitedb_engine, index=True)
    df2.to_sql('table_2', con=sqlitedb_engine, index=True, schema='main')
    datasource = SqlAlchemyDatasource('SqlAlchemy', engine=sqlitedb_engine)
    limited_dataset = datasource.get_data_asset("table_1",
                                                "default",
                                                limit=1,
                                                offset=2)
    assert isinstance(limited_dataset, SqlAlchemyDataset)
    assert limited_dataset._table.name.startswith(
        "ge_tmp_")  # we have generated a temporary table
    assert len(limited_dataset.head(10)) == 1  # and it is only one row long
    assert limited_dataset.head(
        10)['col_1'][0] == 3  # offset should have been applied