def test_series_shallow_pull(size, direction, shallow): label = "LABEL" local_repo = Repo() remote_repo = Repo() local_coll = local_repo.create_collection(schema, "a_collection") series = local_coll / label series.write({"timestamp": arange(size), "value": arange(size)}) series.write({"timestamp": arange(size), "value": arange(size) * 2}) if direction == "pull": remote_repo.pull(local_repo, shallow=shallow) else: local_repo.push(remote_repo, shallow=shallow) remote_clc = remote_repo / "a_collection" assert len(remote_clc.changelog.log()) == (1 if shallow else 2) remote_series = remote_clc / label expected = series.frame() assert remote_series.frame() == expected
def test_pull(threaded, large): c_label = "a_collection" s_label = "a_series" remote_repo = Repo() remote_coll = remote_repo.create_collection(schema, c_label) rseries = remote_coll / s_label # Test support of both small dataset (where data is embedded in # commits) and large one (arrays are save on their own) N = 100_000 if large else 10 for i in range(10): # Create 10 series of size N rseries.write({ "timestamp": range(i, i + N), "value": range(i + 100, i + 100 + N), }) nb_items = len(remote_repo.pod.ls()) if large: assert nb_items > 2 else: # for small arrays we have only two folder (one for the repo # registry one for the collection) assert nb_items == 2 expected = rseries.frame() # Test pull local_repo = Repo() local_coll = local_repo.create_collection(schema, c_label) local_coll.pull(remote_coll) lseries = local_coll / s_label assert lseries.frame() == expected # Test push other_repo = Repo() other_coll = other_repo.create_collection(schema, c_label) remote_coll.push(other_coll) oseries = other_coll / s_label assert oseries.frame() == expected # Test with existing series local_repo = Repo() local_coll = local_repo.create_collection(schema, c_label) local_coll.pull(remote_coll) lseries = ( other_repo.create_collection(schema, c_label, raise_if_exists=False) / s_label) assert oseries.frame() == expected # Test with existing series with existing data local_repo = Repo() local_coll = local_repo.create_collection(schema, c_label) lseries = local_coll / s_label frm = Frame( schema, { "timestamp": range(0, 20), "value": range(0, 20), }, ) lseries.write(frm) local_coll.pull(remote_coll) assert lseries.frame() == frm # Test with existing series with other schema local_repo = Repo() other_schema = Schema(timestamp="int*", value="int") local_coll = local_repo.create_collection(other_schema, c_label) lseries = local_coll / s_label with pytest.raises(ValueError): local_repo.pull(remote_repo)