Exemple #1
0
def test_spark_datasource_processes_dataset_options(
        test_folder_connection_path_csv, test_backends, empty_data_context):
    context: DataContext = empty_data_context
    if "SparkDFDataset" not in test_backends:
        pytest.skip(
            "Spark has not been enabled, so this test must be skipped.")
    datasource = SparkDFDatasource(
        "PandasCSV",
        batch_kwargs_generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": test_folder_connection_path_csv,
            }
        },
    )
    batch_kwargs = datasource.build_batch_kwargs("subdir_reader",
                                                 data_asset_name="test")
    batch_kwargs["dataset_options"] = {"caching": False, "persist": False}
    batch = datasource.get_batch(batch_kwargs)
    validator = BridgeValidator(
        batch,
        ExpectationSuite(expectation_suite_name="foo", data_context=context))
    dataset = validator.get_dataset()
    assert dataset.caching is False
    assert dataset._persist is False
Exemple #2
0
def test_pandas_datasource_processes_dataset_options(test_folder_connection_path):
    datasource = SparkDFDatasource('PandasCSV', generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": test_folder_connection_path
            }
        }
    )
    batch_kwargs = datasource.build_batch_kwargs("subdir_reader", name="test")
    batch_kwargs["dataset_options"] = {"caching": False, "persist": False}
    batch = datasource.get_batch(batch_kwargs)
    validator = Validator(batch, ExpectationSuite(expectation_suite_name="foo"))
    dataset = validator.get_dataset()
    assert dataset.caching is False
    assert dataset._persist is False
Exemple #3
0
def test_pandas_datasource_processes_dataset_options(test_folder_connection_path, test_backends):
    if "SparkDFDataset" not in test_backends:
        pytest.skip("Spark has not been enabled, so this test must be skipped.")
    datasource = SparkDFDatasource('PandasCSV', batch_kwargs_generators={
            "subdir_reader": {
                "class_name": "SubdirReaderBatchKwargsGenerator",
                "base_directory": test_folder_connection_path
            }
        }
    )
    batch_kwargs = datasource.build_batch_kwargs("subdir_reader", name="test")
    batch_kwargs["dataset_options"] = {"caching": False, "persist": False}
    batch = datasource.get_batch(batch_kwargs)
    validator = Validator(batch, ExpectationSuite(expectation_suite_name="foo"))
    dataset = validator.get_dataset()
    assert dataset.caching is False
    assert dataset._persist is False