def test_str_py3(): letters = ascii_uppercase + digits id_size = 100 seed('str_py3') # Repeatable results def gen_id(): return ''.join(choice(letters) for _ in range(id_size)) keys = ['first', 'second', 'third', 'fourth'] testdata = [dict((key, gen_id()) for key in keys) for _ in range(50)] schema = { "fields": [{ 'name': key, 'type': 'string' } for key in keys], "namespace": "namespace", "name": "zerobyte", "type": "record" } buf = BytesIO() fastavro.writer(buf, schema, testdata) buf.seek(0, SEEK_SET) for i, rec in enumerate(fastavro.iter_avro(buf), 1): pass size = len(testdata) assert i == size, 'bad number of records' assert rec == testdata[-1], 'bad last record'
def read_chunk(chunk, head): """Get rows from raw bytes block""" import fastavro head_bytes = head['head_bytes'] if not chunk.startswith(MAGIC): chunk = head_bytes + chunk i = io.BytesIO(chunk) return list(fastavro.iter_avro(i))
def read_chunk(fobj, off, l, head): """Get rows from raw bytes block""" import fastavro from dask.bytes.core import read_block with fobj as f: chunk = read_block(f, off, l, head['sync']) head_bytes = head['head_bytes'] if not chunk.startswith(MAGIC): chunk = head_bytes + chunk i = io.BytesIO(chunk) return list(fastavro.iter_avro(i))
def test_str_py3(): buf = BytesIO() fastavro.writer(buf, schema, testdata) buf.seek(0, SEEK_SET) for i, rec in enumerate(fastavro.iter_avro(buf), 1): pass size = len(testdata) assert i == size, 'bad number of records' assert rec == testdata[-1], 'bad last record'
def read_file(fo): """Get rows from file-like""" import fastavro with fo as f: return list(fastavro.iter_avro(f))
def __call__(self, string): fp = super(AvroFile, self).__call__(string) return iter_avro(fp)