def test_head(test_backend): dataset = get_dataset(test_backend, data, schemas=schemas.get(test_backend), caching=True) dataset.expect_column_mean_to_be_between("b", 5, 5) head = dataset.head(1) assert isinstance(head, PandasDataset) assert len(head) == 1 assert list(head.columns) == ["a", "b", "c", "d"] assert head["a"][0] == 2.0 suite = head.get_expectation_suite() assert len(suite.expectations) == 5 # Interestingly, the original implementation failed to work for a single # column (it would always name the column "*"). # This should also work if we only get a single column dataset = get_dataset(test_backend, {"a": data["a"]}, schemas=schemas.get(test_backend), caching=True) head = dataset.head(1) assert isinstance(head, PandasDataset) assert len(head) == 1 assert list(head.columns) == ["a"] # We also needed special handling for empty tables in SqlalchemyDataset dataset = get_dataset(test_backend, {"a": []}, schemas=schemas.get(test_backend), caching=True) head = dataset.head(1) assert isinstance(head, PandasDataset) assert len(head) == 0 assert list(head.columns) == ["a"]
def test_caching(test_backend): dataset = get_dataset(test_backend, data, schemas=schemas.get(test_backend), caching=True) dataset.get_column_max('a') dataset.get_column_max('a') dataset.get_column_max('b') assert dataset.get_column_max.cache_info().hits == 1 assert dataset.get_column_max.cache_info().misses == 2 assert dataset.get_column_max.cache_info().misses == 2 dataset = get_dataset(test_backend, data, schemas=schemas.get(test_backend), caching=False) with pytest.raises(AttributeError): dataset.get_column_max.cache_info()
def pytest_generate_tests(metafunc): #Load all the JSON files in the directory dir_path = os.path.dirname(os.path.realpath(__file__)) test_configuration_files = glob.glob(dir_path+'/*.json') parametrized_tests = [] ids = [] for c in contexts: for filename in test_configuration_files: file = open(filename) test_configuration = json.load(file) if candidate_test_is_on_temporary_notimplemented_list(c, test_configuration["expectation_type"]): warnings.warn("Skipping generation of tests for expectation " + test_configuration["expectation_type"] + " and context " + c) else: for d in test_configuration['datasets']: my_dataset = get_dataset(c, d["data"]) for test in d["tests"]: parametrized_tests.append({ "expectation_type": test_configuration["expectation_type"], "dataset": my_dataset, "test": test, }) ids.append(c+":"+test_configuration["expectation_type"]+":"+test["title"]) metafunc.parametrize( "test_case", parametrized_tests, ids=ids )
def unexpected_count_df(sa): return get_dataset("sqlite", {"a": [1, 2, 1, 2, 1, 2, 1, 2, 1, 2]})