def test_sqlalchemy_datasource_processes_dataset_options( test_db_connection_string): datasource = SqlAlchemyDatasource( "SqlAlchemy", credentials={"url": test_db_connection_string}) batch_kwargs = datasource.process_batch_parameters( dataset_options={"caching": False}) batch_kwargs["query"] = "select * from table_1;" batch = datasource.get_batch(batch_kwargs) validator = BridgeValidator(batch, ExpectationSuite(expectation_suite_name="foo")) dataset = validator.get_dataset() assert dataset.caching is False batch_kwargs = datasource.process_batch_parameters( dataset_options={"caching": True}) batch_kwargs["query"] = "select * from table_1;" batch = datasource.get_batch(batch_kwargs) validator = BridgeValidator(batch, ExpectationSuite(expectation_suite_name="foo")) dataset = validator.get_dataset() assert dataset.caching is True batch_kwargs = { "query": "select * from table_1;", "dataset_options": { "caching": False }, } batch = datasource.get_batch(batch_kwargs) validator = BridgeValidator(batch, ExpectationSuite(expectation_suite_name="foo")) dataset = validator.get_dataset() assert dataset.caching is False
def test_spark_datasource_processes_dataset_options( test_folder_connection_path_csv, test_backends, empty_data_context): context: DataContext = empty_data_context if "SparkDFDataset" not in test_backends: pytest.skip( "Spark has not been enabled, so this test must be skipped.") datasource = SparkDFDatasource( "PandasCSV", batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": test_folder_connection_path_csv, } }, ) batch_kwargs = datasource.build_batch_kwargs("subdir_reader", data_asset_name="test") batch_kwargs["dataset_options"] = {"caching": False, "persist": False} batch = datasource.get_batch(batch_kwargs) validator = BridgeValidator( batch, ExpectationSuite(expectation_suite_name="foo", data_context=context)) dataset = validator.get_dataset() assert dataset.caching is False assert dataset._persist is False
def test_pandas_datasource_processes_dataset_options(test_folder_connection_path_csv): datasource = PandasDatasource( "PandasCSV", batch_kwargs_generators={ "subdir_reader": { "class_name": "SubdirReaderBatchKwargsGenerator", "base_directory": test_folder_connection_path_csv, } }, ) batch_kwargs = datasource.build_batch_kwargs( "subdir_reader", data_asset_name="test" ) batch_kwargs["dataset_options"] = {"caching": False} batch = datasource.get_batch(batch_kwargs) validator = BridgeValidator(batch, ExpectationSuite(expectation_suite_name="foo")) dataset = validator.get_dataset() assert dataset.caching is False