예제 #1
0
def test_series_shallow_pull(size, direction, shallow):
    label = "LABEL"
    local_repo = Repo()
    remote_repo = Repo()
    local_coll = local_repo.create_collection(schema, "a_collection")
    series = local_coll / label

    series.write({"timestamp": arange(size), "value": arange(size)})
    series.write({"timestamp": arange(size), "value": arange(size) * 2})

    if direction == "pull":
        remote_repo.pull(local_repo, shallow=shallow)
    else:
        local_repo.push(remote_repo, shallow=shallow)

    remote_clc = remote_repo / "a_collection"
    assert len(remote_clc.changelog.log()) == (1 if shallow else 2)

    remote_series = remote_clc / label
    expected = series.frame()
    assert remote_series.frame() == expected
예제 #2
0
def test_pull(threaded, large):
    c_label = "a_collection"
    s_label = "a_series"
    remote_repo = Repo()
    remote_coll = remote_repo.create_collection(schema, c_label)
    rseries = remote_coll / s_label

    # Test support of both small dataset (where data is embedded in
    # commits) and large one (arrays are save on their own)
    N = 100_000 if large else 10
    for i in range(10):
        # Create 10 series of size N
        rseries.write({
            "timestamp": range(i, i + N),
            "value": range(i + 100, i + 100 + N),
        })
    nb_items = len(remote_repo.pod.ls())
    if large:
        assert nb_items > 2
    else:
        # for small arrays we have only two folder (one for the repo
        # registry one for the collection)
        assert nb_items == 2
    expected = rseries.frame()

    # Test pull
    local_repo = Repo()
    local_coll = local_repo.create_collection(schema, c_label)
    local_coll.pull(remote_coll)
    lseries = local_coll / s_label
    assert lseries.frame() == expected

    # Test push
    other_repo = Repo()
    other_coll = other_repo.create_collection(schema, c_label)
    remote_coll.push(other_coll)
    oseries = other_coll / s_label
    assert oseries.frame() == expected

    # Test with existing series
    local_repo = Repo()
    local_coll = local_repo.create_collection(schema, c_label)
    local_coll.pull(remote_coll)
    lseries = (
        other_repo.create_collection(schema, c_label, raise_if_exists=False) /
        s_label)
    assert oseries.frame() == expected

    # Test with existing series with existing data
    local_repo = Repo()
    local_coll = local_repo.create_collection(schema, c_label)
    lseries = local_coll / s_label
    frm = Frame(
        schema,
        {
            "timestamp": range(0, 20),
            "value": range(0, 20),
        },
    )
    lseries.write(frm)
    local_coll.pull(remote_coll)
    assert lseries.frame() == frm

    # Test with existing series with other schema
    local_repo = Repo()
    other_schema = Schema(timestamp="int*", value="int")
    local_coll = local_repo.create_collection(other_schema, c_label)
    lseries = local_coll / s_label

    with pytest.raises(ValueError):
        local_repo.pull(remote_repo)