def encode_record_with_schema_id(self, schema_id, schema, record): """ Encode a record with a given schema id. The record must be a python dictionary. """ if not isinstance(record, dict): raise SerializerError("record must be a dictionary") if not self.fast_avro: if schema_id not in self.id_to_writers: # get the writer + schema try: schema = self.registry_client.get_by_id(schema_id) if not schema: raise SerializerError("Schema does not exist") self.id_to_writers[schema_id] = avro.io.DatumWriter(schema) except ClientError as e: raise SerializerError( "Error fetching schema from registry") with ContextBytesIO() as outf: # write the header # magic byte outf.write(struct.pack('b', MAGIC_BYTE)) # write the schema ID in network byte order (big end) outf.write(struct.pack('>I', schema_id)) if self.fast_avro: dump(outf, record, schema.to_json()) else: writer = self.id_to_writers[schema_id] encoder = avro.io.BinaryEncoder(outf) writer.write(record, encoder) return outf.getvalue()
def test_dump_load(tmpdir): """ Write an Avro record to a file using the dump() function and loads it back using the load() function. """ schema = { "type": "record", "name": "Test", "namespace": "test", "fields": [{ "name": "field", "type": { "type": "string" } }] } record = {"field": "foobar"} temp_path = tmpdir.join('test_dump.avro') with temp_path.open('wb') as fo: fastavro.dump(fo, record, schema) with temp_path.open('rb') as fo: new_record = fastavro.load(fo, schema) assert record == new_record
def encode_record_with_schema_id(self, schema_id, schema, record): """ Encode a record with a given schema id. The record must be a python dictionary. """ if not isinstance(record, dict): raise SerializerError("record must be a dictionary") with ContextBytesIO() as outf: # write the header # magic byte outf.write(struct.pack('b',MAGIC_BYTE)) # write the schema ID in network byte order (big end) outf.write(struct.pack('>I',schema_id)) dump(outf, record, schema.to_json()) return outf.getvalue()
#!/usr/bin/env python # encoding: utf-8 """Fastavro.""" from io import BytesIO from itertools import repeat from time import time from fastavro import dump, load, acquaint_schema, reader as avro_reader import sys LOOPS = 2 with open(sys.argv[1]) as reader: records = avro_reader(reader) SCHEMA = records.schema RECORDS = list(records) start = time() n = 0 buf = BytesIO() for _ in repeat(None, LOOPS): for record in RECORDS: n += 1 dump(buf, record, SCHEMA) buf.seek(0) print 1000. * (time() - start) / n
#!/usr/bin/env python2.7 # encoding: utf-8 """Fastavro.""" from io import BytesIO from itertools import repeat from time import time from fastavro import dump, load, acquaint_schema, reader as avro_reader import sys LOOPS = 2 with open(sys.argv[1]) as reader: records = avro_reader(reader) SCHEMA = records.schema RECORDS = list(records) buf = BytesIO() m = 0 n = 0 start = time() for _ in repeat(None, LOOPS): for record in RECORDS: dump(buf, record, SCHEMA) m += buf.tell() n += 1 buf.seek(0) if m <= 0: raise Exception('no') print 1000. * (time() - start) / n
def JsonToAvro(record, schema): buf = io.BytesIO() dump(buf, record, schema) avroRecord = buf.getvalue() return avroRecord