Beispiel #1
0
def test_generic_serialization(manifests, manifest_type, format, compressed):
    manifest = manifests[manifest_type]
    with NamedTemporaryFile(suffix='.' + format +
                            ('.gz' if compressed else '')) as f:
        store_manifest(manifest, f.name)
        restored = load_manifest(f.name)
        assert manifest == restored
Beispiel #2
0
def test_lazy_jsonl_to_arrow_serialization(manifests, manifest_type, format,
                                           compressed):
    manifest = manifests[manifest_type]
    with NamedTemporaryFile(suffix='.' + format + ('.gz' if compressed else '')) as jsonl_f, \
            NamedTemporaryFile(suffix='.arrow') as arrow_f:
        store_manifest(manifest, jsonl_f.name)
        # For now, we have to first create a JSONL so that we can create mmapped arrow...
        lazy_temp_manifest = type(manifest).from_jsonl_lazy(jsonl_f.name)
        lazy_temp_manifest.to_arrow(arrow_f.name)
        # Now read the real mmapped arrow manifest.
        lazy_manifest = type(manifest).from_arrow(arrow_f.name)
        for eager_obj, lazy_obj in zip(manifest, lazy_manifest):
            assert eager_obj == lazy_obj
Beispiel #3
0
def test_lazy_jsonl_deserialization(manifests, manifest_type, format,
                                    compressed):
    manifest = manifests[manifest_type]
    with NamedTemporaryFile(suffix='.' + format +
                            ('.gz' if compressed else '')) as f:
        store_manifest(manifest, f.name)
        lazy_manifest = type(manifest).from_jsonl_lazy(f.name)
        # Test iteration
        for eager_obj, lazy_obj in zip(manifest, lazy_manifest):
            assert eager_obj == lazy_obj
        # Test accessing elements by ID
        for lazy_obj in lazy_manifest:
            lazy_manifest[lazy_obj.id]