Ejemplo n.º 1
0
def test_load_dataset_returns_cached_despite_update(upstream, repo, dataset):
    ds = repo.load_dataset('dataset')
    ds_updated = ds.copy()
    ds_updated.df['b'] += 1
    upstream.add_dataset(ds_updated, commit_message="Update dataset")
    loaded = repo.load_dataset('dataset')
    assert loaded.equals(ds)
Ejemplo n.º 2
0
def test_load_dataset_cached(upstream, repo, dataset):
    # Load dataset to get it into cache
    repo.load_dataset('dataset')
    # Remove dataset from cloud to make sure it is returned from cache
    upstream.clear_cloud()
    loaded = repo.load_dataset('dataset')
    assert loaded.equals(dataset)
Ejemplo n.º 3
0
def test_pull_dataset_enables_loading(repo, uncached_dataset):
    repo.pull_datasets()
    loaded = repo.load_dataset(uncached_dataset.identifier)
    assert loaded.equals(uncached_dataset)
Ejemplo n.º 4
0
def test_load_dataset_fail_before_pull(repo, uncached_dataset):
    with pytest.raises(NoOutputOrStageError):
        repo.load_dataset(uncached_dataset.identifier)
Ejemplo n.º 5
0
def test_load_dataset_does_not_exist(repo):
    with pytest.raises(NoOutputOrStageError):
        repo.load_dataset('does-not-exist')
Ejemplo n.º 6
0
def test_load_dataset_not_cached(repo, cache, dataset):
    assert dataset not in cache
    loaded = repo.load_dataset(dataset.identifier)
    assert dataset in cache
    assert loaded.equals(dataset)