def test_get_batch_with_query_in_runtime_parameters_using_runtime_data_connector( sa, data_context_with_runtime_sql_datasource_for_testing_get_batch, sqlite_view_engine, ): context: DataContext = ( data_context_with_runtime_sql_datasource_for_testing_get_batch ) batch: Batch batch = context.get_batch( batch_request=RuntimeBatchRequest( datasource_name="my_runtime_sql_datasource", data_connector_name="my_runtime_data_connector", data_asset_name="IN_MEMORY_DATA_ASSET", runtime_parameters={ "query": "SELECT * FROM table_partitioned_by_date_column__A" }, batch_identifiers={ "pipeline_stage_name": "core_processing", "airflow_run_id": 1234567890, }, ), ) assert batch.batch_spec is not None assert batch.batch_definition["data_asset_name"] == "IN_MEMORY_DATA_ASSET" assert isinstance(batch.data, SqlAlchemyBatchData) selectable_table_name = batch.data.selectable.name selectable_count_sql_str = f"select count(*) from {selectable_table_name}" sa_engine = batch.data.execution_engine.engine assert sa_engine.execute(selectable_count_sql_str).scalar() == 120 assert batch.batch_markers.get("ge_load_time") is not None # since create_temp_table defaults to True, there should be 1 temp table assert len(get_sqlite_temp_table_names(batch.data.execution_engine.engine)) == 1 # if create_temp_table in batch_spec_passthrough is set to False, no new temp tables should be created batch = context.get_batch( batch_request=RuntimeBatchRequest( datasource_name="my_runtime_sql_datasource", data_connector_name="my_runtime_data_connector", data_asset_name="IN_MEMORY_DATA_ASSET", runtime_parameters={ "query": "SELECT * FROM table_partitioned_by_date_column__A" }, batch_identifiers={ "pipeline_stage_name": "core_processing", "airflow_run_id": 1234567890, }, batch_spec_passthrough={"create_temp_table": False}, ), ) assert len(get_sqlite_temp_table_names(batch.data.execution_engine.engine)) == 1
def validate_tmp_tables(): temp_tables = [ name for name in get_sqlite_temp_table_names(engine.engine) if name.startswith("ge_temp_") ] tables = [ name for name in get_sqlite_table_names(engine.engine) if name.startswith("ge_temp_") ] assert len(temp_tables) == 0 assert len(tables) == 0
def test_get_batch_data_and_markers_using_query(sqlite_view_engine, test_df): my_execution_engine: SqlAlchemyExecutionEngine = SqlAlchemyExecutionEngine( engine=sqlite_view_engine) test_df.to_sql("test_table_0", con=my_execution_engine.engine) query: str = "SELECT * FROM test_table_0" batch_data, batch_markers = my_execution_engine.get_batch_data_and_markers( batch_spec=RuntimeQueryBatchSpec(query=query, )) assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 2 assert batch_markers.get("ge_load_time") is not None
def test_instantiation_with_query(sqlite_view_engine, test_df): test_df.to_sql("test_table_0", con=sqlite_view_engine) query: str = "SELECT * FROM test_table_0" # If create_temp_table=False, a new temp table should NOT be created # noinspection PyUnusedLocal batch_data: SqlAlchemyBatchData = SqlAlchemyBatchData( execution_engine=sqlite_view_engine, query=query, create_temp_table=False, ) assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 1
def test_instantiation_with_and_without_temp_table(sqlite_view_engine, sa): print(get_sqlite_temp_table_names(sqlite_view_engine)) assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 1 assert get_sqlite_temp_table_names(sqlite_view_engine) == { "test_temp_view" } execution_engine: SqlAlchemyExecutionEngine = SqlAlchemyExecutionEngine( engine=sqlite_view_engine) # When the SqlAlchemyBatchData object is based on a table, a new temp table is NOT created, even if create_temp_table=True SqlAlchemyBatchData( execution_engine=execution_engine, table_name="test_table", create_temp_table=True, ) assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 1 selectable = sa.select("*").select_from(sa.text("main.test_table")) # If create_temp_table=False, a new temp table should NOT be created SqlAlchemyBatchData( execution_engine=execution_engine, selectable=selectable, create_temp_table=False, ) assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 1 # If create_temp_table=True, a new temp table should be created SqlAlchemyBatchData( execution_engine=execution_engine, selectable=selectable, create_temp_table=True, ) assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 2 # If create_temp_table=True, a new temp table should be created SqlAlchemyBatchData( execution_engine=execution_engine, selectable=selectable, # create_temp_table defaults to True ) assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 3 # testing whether schema is supported selectable = sa.select("*").select_from( sa.table(name="test_table", schema="main")) SqlAlchemyBatchData( execution_engine=execution_engine, selectable=selectable, # create_temp_table defaults to True ) assert len(get_sqlite_temp_table_names(sqlite_view_engine)) == 4 # test schema with execution engine # TODO : Will20210222 Add tests for specifying schema with non-sqlite backend that actually supports new schema creation my_batch_spec = SqlAlchemyDatasourceBatchSpec( **{ "table_name": "test_table", "batch_identifiers": {}, "schema_name": "main", }) res = execution_engine.get_batch_data_and_markers(batch_spec=my_batch_spec) assert len(res) == 2