Beispiel #1
0
def test_head(test_backend):
    dataset = get_dataset(test_backend,
                          data,
                          schemas=schemas.get(test_backend),
                          caching=True)
    dataset.expect_column_mean_to_be_between("b", 5, 5)
    head = dataset.head(1)
    assert isinstance(head, PandasDataset)
    assert len(head) == 1
    assert list(head.columns) == ["a", "b", "c", "d"]
    assert head["a"][0] == 2.0
    suite = head.get_expectation_suite()
    assert len(suite.expectations) == 5

    # Interestingly, the original implementation failed to work for a single
    # column (it would always name the column "*").
    # This should also work if we only get a single column
    dataset = get_dataset(test_backend, {"a": data["a"]},
                          schemas=schemas.get(test_backend),
                          caching=True)
    head = dataset.head(1)
    assert isinstance(head, PandasDataset)
    assert len(head) == 1
    assert list(head.columns) == ["a"]

    # We also needed special handling for empty tables in SqlalchemyDataset
    dataset = get_dataset(test_backend, {"a": []},
                          schemas=schemas.get(test_backend),
                          caching=True)
    head = dataset.head(1)
    assert isinstance(head, PandasDataset)
    assert len(head) == 0
    assert list(head.columns) == ["a"]
Beispiel #2
0
def test_caching(test_backend):
    dataset = get_dataset(test_backend, data, schemas=schemas.get(test_backend), caching=True)
    dataset.get_column_max('a')
    dataset.get_column_max('a')
    dataset.get_column_max('b')
    assert dataset.get_column_max.cache_info().hits == 1
    assert dataset.get_column_max.cache_info().misses == 2
    assert dataset.get_column_max.cache_info().misses == 2

    dataset = get_dataset(test_backend, data, schemas=schemas.get(test_backend), caching=False)
    with pytest.raises(AttributeError):
        dataset.get_column_max.cache_info()
Beispiel #3
0
def pytest_generate_tests(metafunc):

    #Load all the JSON files in the directory
    dir_path = os.path.dirname(os.path.realpath(__file__))
    test_configuration_files = glob.glob(dir_path+'/*.json')

    parametrized_tests = []
    ids = []
    for c in contexts:
        for filename in test_configuration_files:
            file = open(filename)
            test_configuration = json.load(file)

            if candidate_test_is_on_temporary_notimplemented_list(c, test_configuration["expectation_type"]):
                warnings.warn("Skipping generation of tests for expectation " + test_configuration["expectation_type"] +
                              " and context " + c)
            else:
                for d in test_configuration['datasets']:
                    my_dataset = get_dataset(c, d["data"])

                    for test in d["tests"]:
                        parametrized_tests.append({
                            "expectation_type": test_configuration["expectation_type"],
                            "dataset": my_dataset,
                            "test": test,
                        })

                        ids.append(c+":"+test_configuration["expectation_type"]+":"+test["title"])

    metafunc.parametrize(
        "test_case",
        parametrized_tests,
        ids=ids
    )
Beispiel #4
0
def unexpected_count_df(sa):
    return get_dataset("sqlite", {"a": [1, 2, 1, 2, 1, 2, 1, 2, 1, 2]})