def test_get_batch_with_query_in_runtime_parameters_using_runtime_data_connector(
    sa,
    data_context_with_runtime_sql_datasource_for_testing_get_batch,
    sqlite_view_engine,
):
    context: DataContext = (
        data_context_with_runtime_sql_datasource_for_testing_get_batch
    )

    batch: Batch

    batch = context.get_batch(
        batch_request=RuntimeBatchRequest(
            datasource_name="my_runtime_sql_datasource",
            data_connector_name="my_runtime_data_connector",
            data_asset_name="IN_MEMORY_DATA_ASSET",
            runtime_parameters={
                "query": "SELECT * FROM table_partitioned_by_date_column__A"
            },
            batch_identifiers={
                "pipeline_stage_name": "core_processing",
                "airflow_run_id": 1234567890,
            },
        ),
    )

    assert batch.batch_spec is not None
    assert batch.batch_definition["data_asset_name"] == "IN_MEMORY_DATA_ASSET"
    assert isinstance(batch.data, SqlAlchemyBatchData)

    selectable_table_name = batch.data.selectable.name
    selectable_count_sql_str = f"select count(*) from {selectable_table_name}"
    sa_engine = batch.data.execution_engine.engine

    assert sa_engine.execute(selectable_count_sql_str).scalar() == 120
    assert batch.batch_markers.get("ge_load_time") is not None
    # since create_temp_table defaults to True, there should be 1 temp table
    assert len(get_sqlite_temp_table_names(batch.data.execution_engine.engine)) == 1

    # if create_temp_table in batch_spec_passthrough is set to False, no new temp tables should be created
    batch = context.get_batch(
        batch_request=RuntimeBatchRequest(
            datasource_name="my_runtime_sql_datasource",
            data_connector_name="my_runtime_data_connector",
            data_asset_name="IN_MEMORY_DATA_ASSET",
            runtime_parameters={
                "query": "SELECT * FROM table_partitioned_by_date_column__A"
            },
            batch_identifiers={
                "pipeline_stage_name": "core_processing",
                "airflow_run_id": 1234567890,
            },
            batch_spec_passthrough={"create_temp_table": False},
        ),
    )
    assert len(get_sqlite_temp_table_names(batch.data.execution_engine.engine)) == 1
 def validate_tmp_tables():
     temp_tables = [
         name for name in get_sqlite_temp_table_names(engine.engine)
         if name.startswith("ge_temp_")
     ]
     tables = [
         name for name in get_sqlite_table_names(engine.engine)
         if name.startswith("ge_temp_")
     ]
     assert len(temp_tables) == 0
     assert len(tables) == 0
def test_get_batch_data_and_markers_using_query(sqlite_view_engine, test_df):
    my_execution_engine: SqlAlchemyExecutionEngine = SqlAlchemyExecutionEngine(
        engine=sqlite_view_engine)
    test_df.to_sql("test_table_0", con=my_execution_engine.engine)

    query: str = "SELECT * FROM test_table_0"
    batch_data, batch_markers = my_execution_engine.get_batch_data_and_markers(
        batch_spec=RuntimeQueryBatchSpec(query=query, ))

    assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 2
    assert batch_markers.get("ge_load_time") is not None
Exemple #4
0
def test_instantiation_with_query(sqlite_view_engine, test_df):
    test_df.to_sql("test_table_0", con=sqlite_view_engine)

    query: str = "SELECT * FROM test_table_0"
    # If create_temp_table=False, a new temp table should NOT be created
    # noinspection PyUnusedLocal
    batch_data: SqlAlchemyBatchData = SqlAlchemyBatchData(
        execution_engine=sqlite_view_engine,
        query=query,
        create_temp_table=False,
    )
    assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 1
Exemple #5
0
def test_instantiation_with_and_without_temp_table(sqlite_view_engine, sa):
    print(get_sqlite_temp_table_names(sqlite_view_engine))
    assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 1
    assert get_sqlite_temp_table_names(sqlite_view_engine) == {
        "test_temp_view"
    }

    execution_engine: SqlAlchemyExecutionEngine = SqlAlchemyExecutionEngine(
        engine=sqlite_view_engine)
    # When the SqlAlchemyBatchData object is based on a table, a new temp table is NOT created, even if create_temp_table=True
    SqlAlchemyBatchData(
        execution_engine=execution_engine,
        table_name="test_table",
        create_temp_table=True,
    )
    assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 1

    selectable = sa.select("*").select_from(sa.text("main.test_table"))

    # If create_temp_table=False, a new temp table should NOT be created
    SqlAlchemyBatchData(
        execution_engine=execution_engine,
        selectable=selectable,
        create_temp_table=False,
    )
    assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 1

    # If create_temp_table=True, a new temp table should be created
    SqlAlchemyBatchData(
        execution_engine=execution_engine,
        selectable=selectable,
        create_temp_table=True,
    )
    assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 2

    # If create_temp_table=True, a new temp table should be created
    SqlAlchemyBatchData(
        execution_engine=execution_engine,
        selectable=selectable,
        # create_temp_table defaults to True
    )
    assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 3

    # testing whether schema is supported
    selectable = sa.select("*").select_from(
        sa.table(name="test_table", schema="main"))
    SqlAlchemyBatchData(
        execution_engine=execution_engine,
        selectable=selectable,
        # create_temp_table defaults to True
    )
    assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 4

    # test schema with execution engine
    # TODO : Will20210222 Add tests for specifying schema with non-sqlite backend that actually supports new schema creation
    my_batch_spec = SqlAlchemyDatasourceBatchSpec(
        **{
            "table_name": "test_table",
            "batch_identifiers": {},
            "schema_name": "main",
        })
    res = execution_engine.get_batch_data_and_markers(batch_spec=my_batch_spec)
    assert len(res) == 2