def test_seek(orc_data): reader = Reader(orc_data(50)) assert reader.seek(0) == 0 assert reader.current_row == 0 assert reader.seek(10) == 10 assert reader.current_row == 10 assert next(reader)[0] == 10 assert reader.seek(0, 2) == len(reader) with pytest.raises(StopIteration): _ = next(reader)[0] assert reader.seek(-1, 2) == 49 assert next(reader)[0] == 49 assert reader.seek(-10, 2) == 40 assert reader.seek(1, 1) == 41 assert next(reader)[0] == 41 reader.seek(10) assert reader.seek(8, 1) == 18 assert reader.seek(-5, 1) == 13 assert next(reader)[0] == 13 with pytest.raises(ValueError): reader.seek(-1, 0) with pytest.raises(ValueError): reader.seek(10, 10)
from pyorc import Reader from pyorc.enums import StructRepr ORC_FILE = 'data/orc/userdata1.orc' with open(ORC_FILE, 'rb') as orc_file: reader = Reader(orc_file) # Read embedded schema print(str(reader.schema)) # Read all the file at once: rows = reader.read() print(rows) # Go back to first line reader.seek(0) # Read the content of userdata1.orc by batch of 100 records # Using this optional parameter for large ORC files is highly recommended! rows = reader.read(100) while rows: print(rows) rows = reader.read(100) # Read file and return a list of dictionaries reader = Reader(orc_file, struct_repr=StructRepr.DICT) print(next(reader)) print(reader.num_of_stripes)