def test_round_tripping(source_vals, schema): for codec in ['null', 'deflate', 'snappy']: buf = BytesIO() sch = cavro.Schema(schema) with cavro.ContainerWriter(buf, sch, codec) as writer: writer.write_many(source_vals) buf.seek(0) reader = cavro.ContainerReader(buf) obs = list(reader) assert obs == source_vals
def test_weather(): exts = [".avro", "-snappy.avro"] records = {} for ext in exts: container = cavro.ContainerReader( (DATA_DIR / f'weather{ext}').open('rb')) schema = container.schema assert schema.canonical_form == WEATHER_SCHEMA records[ext] = list({schema.json_encode(r) for r in container}) baseline = list(records.values())[0] for ext, record in records.items(): assert record == baseline
def main(count): if count < 0: counter = itertools.count() else: counter = range(count) for it in tqdm.tqdm(counter): try: tmp = io.BytesIO() sch_json = schema.make_schema_json(5) sch = cavro.Schema(sch_json) vals = [ values.make_value_for_type(sch.type, 5) for _ in range(randint(0, 2000)) ] with cavro.ContainerWriter(tmp, sch) as writer: writer.write_many(vals) tmp.seek(0) reader = cavro.ContainerReader(tmp) decoded = [values.de_record(v) for v in reader] expected = [values.de_record(v) for v in vals] info = [] equal = values.almost_equal(decoded, expected, info) if not equal: print("----------- SCHEMA -------------") print(sch_json) # print("\n----------- VALUE ---------------") # print(value) # print("\n----------- DECODED ---------------") # print(decoded) # print("\n----------- DECODED DATA ---------------") # print(de_recorded) print("\n----------- INFO ---------------") print(info) return except: print(tmp.getvalue()) raise
def test_weather_sorted(): container = cavro.ContainerReader(DATA_DIR / f'weather-sorted.avro')
def cavro(self): projects = set() with open(self.BULK_FILE, 'rb') as fo: for record in cavro.ContainerReader(fo): projects.add(record.file.project) return projects
def test_container_reading_from_fileobj(): here = Path(__file__).parent container_file = here / 'data' / 'weather.avro' print(container_file.read_bytes()) container = cavro.ContainerReader(container_file.open('rb')) assert list(container)[2].temp == -11
def test_container_invalid_codec(): bad_container = SIMPLE_CONTAINER.replace(b'null', b'xxxx') with pytest.raises(ValueError, match=r"Unsupported codec: 'xxxx'"): cavro.ContainerReader(bad_container)
def test_container_invalid_magic(): bad_container = b"Obj\x02" + SIMPLE_CONTAINER[4:] with pytest.raises(ValueError, match="Invalid file header"): cavro.ContainerReader(bad_container)
def test_simplest_container_invalid_magic(): container = cavro.ContainerReader(SIMPLE_CONTAINER) assert list(container) == [1]