Example #1
0
    def encode_record_with_schema_id(self, schema_id, schema, record):
        """
        Encode a record with a given schema id.  The record must
        be a python dictionary.
        """
        if not isinstance(record, dict):
            raise SerializerError("record must be a dictionary")

        if not self.fast_avro:
            if schema_id not in self.id_to_writers:
                # get the writer + schema
                try:
                    schema = self.registry_client.get_by_id(schema_id)
                    if not schema:
                        raise SerializerError("Schema does not exist")
                    self.id_to_writers[schema_id] = avro.io.DatumWriter(schema)
                except ClientError as e:
                    raise SerializerError(
                        "Error fetching schema from registry")

        with ContextBytesIO() as outf:
            # write the header
            # magic byte
            outf.write(struct.pack('b', MAGIC_BYTE))
            # write the schema ID in network byte order (big end)
            outf.write(struct.pack('>I', schema_id))
            if self.fast_avro:
                dump(outf, record, schema.to_json())
            else:
                writer = self.id_to_writers[schema_id]
                encoder = avro.io.BinaryEncoder(outf)
                writer.write(record, encoder)
            return outf.getvalue()
def test_dump_load(tmpdir):
    """
    Write an Avro record to a file using the dump() function and loads it back
    using the load() function.
    """
    schema = {
        "type": "record",
        "name": "Test",
        "namespace": "test",
        "fields": [{
            "name": "field",
            "type": {
                "type": "string"
            }
        }]
    }
    record = {"field": "foobar"}

    temp_path = tmpdir.join('test_dump.avro')
    with temp_path.open('wb') as fo:
        fastavro.dump(fo, record, schema)

    with temp_path.open('rb') as fo:
        new_record = fastavro.load(fo, schema)

    assert record == new_record
    def encode_record_with_schema_id(self, schema_id, schema, record):
        """
        Encode a record with a given schema id.  The record must
        be a python dictionary.
        """
        if not isinstance(record, dict):
            raise SerializerError("record must be a dictionary")

        with ContextBytesIO() as outf:
            # write the header
            # magic byte
            outf.write(struct.pack('b',MAGIC_BYTE))
            # write the schema ID in network byte order (big end)
            outf.write(struct.pack('>I',schema_id))
            dump(outf, record, schema.to_json())
            return outf.getvalue()
Example #4
0
#!/usr/bin/env python
# encoding: utf-8

"""Fastavro."""

from io import BytesIO
from itertools import repeat
from time import time
from fastavro import dump, load, acquaint_schema, reader as avro_reader
import sys

LOOPS = 2

with open(sys.argv[1]) as reader:
  records = avro_reader(reader)
  SCHEMA = records.schema
  RECORDS = list(records)

start = time()
n = 0
buf = BytesIO()
for _ in repeat(None, LOOPS):
  for record in RECORDS:
    n += 1
    dump(buf, record, SCHEMA)
    buf.seek(0)
print 1000. * (time() - start) / n
Example #5
0
#!/usr/bin/env python2.7
# encoding: utf-8
"""Fastavro."""

from io import BytesIO
from itertools import repeat
from time import time
from fastavro import dump, load, acquaint_schema, reader as avro_reader
import sys

LOOPS = 2

with open(sys.argv[1]) as reader:
    records = avro_reader(reader)
    SCHEMA = records.schema
    RECORDS = list(records)

buf = BytesIO()
m = 0
n = 0
start = time()
for _ in repeat(None, LOOPS):
    for record in RECORDS:
        dump(buf, record, SCHEMA)
        m += buf.tell()
        n += 1
        buf.seek(0)
if m <= 0:
    raise Exception('no')
print 1000. * (time() - start) / n
Example #6
0
 def JsonToAvro(record, schema):
     buf = io.BytesIO()
     dump(buf, record, schema)
     avroRecord = buf.getvalue()
     return avroRecord