Esempio n. 1
0
def test_seek(orc_data):
    reader = Reader(orc_data(50))
    assert reader.seek(0) == 0
    assert reader.current_row == 0
    assert reader.seek(10) == 10
    assert reader.current_row == 10
    assert next(reader)[0] == 10
    assert reader.seek(0, 2) == len(reader)
    with pytest.raises(StopIteration):
        _ = next(reader)[0]
    assert reader.seek(-1, 2) == 49
    assert next(reader)[0] == 49
    assert reader.seek(-10, 2) == 40
    assert reader.seek(1, 1) == 41
    assert next(reader)[0] == 41
    reader.seek(10)
    assert reader.seek(8, 1) == 18
    assert reader.seek(-5, 1) == 13
    assert next(reader)[0] == 13
    with pytest.raises(ValueError):
        reader.seek(-1, 0)
    with pytest.raises(ValueError):
        reader.seek(10, 10)
Esempio n. 2
0
from pyorc import Reader
from pyorc.enums import StructRepr

ORC_FILE = 'data/orc/userdata1.orc'

with open(ORC_FILE, 'rb') as orc_file:
    reader = Reader(orc_file)
    
    # Read embedded schema
    print(str(reader.schema))
    
    # Read all the file at once:
    rows = reader.read()
    print(rows)

    # Go back to first line
    reader.seek(0)

    # Read the content of userdata1.orc by batch of 100 records
    # Using this optional parameter for large ORC files is highly recommended!
    rows = reader.read(100)
    while rows:
        print(rows)
        rows = reader.read(100)

    # Read file and return a list of dictionaries
    reader = Reader(orc_file, struct_repr=StructRepr.DICT)
    print(next(reader))

    print(reader.num_of_stripes)