def main(args):
    log = logging.getLogger(__name__)
    log.setLevel(logging.INFO)

    sys_log = logging.handlers.SysLogHandler("/dev/log")
    sys_format = logging.Formatter('%(name)s[%(process)d]: %(levelname)s %(message)s')
    sys_log.setFormatter(sys_format)

    log.addHandler(sys_log)

    reader = DataFileReader(open(args.avro_file, "r"), DatumReader())

    schema = reader.datum_reader.writers_schema

    for i, row in enumerate(reader):
        log.debug("Consumer row:" + str(row))
        writer = DatumWriter(schema)
        bytes_writer = io.BytesIO()
        encoder = BinaryEncoder(bytes_writer)
        writer.write(row, encoder)
        raw_bytes = bytes_writer.getvalue()
        b64enc = base64.b64encode(raw_bytes)
        msg = {"messages": [{"data": b64enc}]}

        json_str = json.dumps(msg)
        log.debug("json msg:" + json_str)
        publish(json_str, args.ams_endpoint, args.ams_project, args.ams_topic, args.ams_key, log)
def send_avro_record_to_kafka(topic, value, bootstrap_servers, avro_schema_json):
    value_schema = avro.schema.parse(avro_schema_json)

    producer_config = {
        "bootstrap.servers": bootstrap_servers,
        "request.timeout.ms": "1000",
    }

    producer = Producer(producer_config)

    writer = DatumWriter(value_schema)
    bytes_writer = io.BytesIO()
    encoder = BinaryEncoder(bytes_writer)

    writer.write(value, encoder)

    try:
        producer.produce(topic=topic, value=bytes_writer.getvalue())
    except Exception as e:
        print(
            f"Exception while producing record value - {value} to topic - {topic}: {e}"
        )
    else:
        print(f"Successfully producing record value - {value} to topic - {topic}")

    producer.flush()
Exemple #3
0
 def toKey(self, x, avroType):
     x = jsonEncoder(avroType, x, False)
     bytes = io.BytesIO()
     writer = DatumWriter(avroType.schema)
     writer.write(x, BinaryEncoder(bytes))
     bytes.flush()
     return base64.b64encode(bytes.getvalue())
Exemple #4
0
class Meta:
    def __init__(self,
                 callback,
                 service_name,
                 param_schema,
                 result_schema,
                 version=0):
        self.callback = callback
        self.service_name = service_name
        self.param_schema = SchemaFromJSONData(param_schema, Names())
        self.result_schema = SchemaFromJSONData(result_schema, Names())
        self.version = version
        self._param_writer = DatumWriter(self.param_schema)
        self._param_reader = DatumReader(self.param_schema)
        self._result_writer = DatumWriter(self.result_schema)
        self._result_reader = DatumReader(self.result_schema)

    def decode_param(self, byte_mem):
        return self._param_reader.read(BinaryDecoder(BytesIO(byte_mem)))

    def encode_param(self, param):
        logger.info(param)
        io = BytesIO()
        self._param_writer.write(param, BinaryEncoder(io))
        return io.getbuffer().tobytes()

    def decode_result(self, byte_mem):
        return self._result_reader.read(BinaryDecoder(BytesIO(byte_mem)))

    def encode_result(self, result):
        io = BytesIO()
        self._result_writer.write(result, BinaryEncoder(io))
        return io.getbuffer().tobytes()
Exemple #5
0
 def toKey(self, x, avroType):
     x = jsonEncoder(avroType, x, False)
     bytes = io.BytesIO()
     writer = DatumWriter(avroType.schema)
     writer.write(x, BinaryEncoder(bytes))
     bytes.flush()
     return base64.b64encode(bytes.getvalue())
        def _avro_serialize(msg):
            avro_writer = DatumWriter(self.shared.topic['schema'])
            bytesio = BytesIO()
            encoder = BinaryEncoder(bytesio)
            avro_writer.write(msg, encoder)

            return bytesio.getvalue()
def main(args):
    log = logging.getLogger(__name__)
    log.setLevel(logging.INFO)

    sys_log = logging.handlers.SysLogHandler("/dev/log")
    sys_format = logging.Formatter(
        '%(name)s[%(process)d]: %(levelname)s %(message)s')
    sys_log.setFormatter(sys_format)

    log.addHandler(sys_log)

    reader = DataFileReader(open(args.avro_file, "r"), DatumReader())

    schema = reader.datum_reader.writers_schema

    for i, row in enumerate(reader):
        log.debug("Consumer row:" + str(row))
        writer = DatumWriter(schema)
        bytes_writer = io.BytesIO()
        encoder = BinaryEncoder(bytes_writer)
        writer.write(row, encoder)
        raw_bytes = bytes_writer.getvalue()
        b64enc = base64.b64encode(raw_bytes)
        msg = {"messages": [{"data": b64enc}]}

        json_str = json.dumps(msg)
        log.debug("json msg:" + json_str)
        publish(json_str, args.ams_endpoint, args.ams_project, args.ams_topic,
                args.ams_key, log)
def serialize(thing):
    writer = DatumWriter(SCHEMA)
    buf = StringIO()
    writer.write(thing, BinaryEncoder(buf))
    v = buf.getvalue()
    buf.close()
    return v
    def serialize(
        self,
        data,  # type: ObjectType
        schema,  # type: Union[str, bytes, avro.schema.Schema]
    ):
        # type: (ObjectType, Union[str, bytes, avro.schema.Schema]) -> bytes
        """Convert the provided value to it's binary representation and write it to the stream.
        Schema must be a Avro RecordSchema:
        https://avro.apache.org/docs/1.10.0/gettingstartedpython.html#Defining+a+schema
        :param data: An object to serialize
        :type data: ObjectType
        :param schema: An Avro RecordSchema
        :type schema: Union[str, bytes, avro.schema.Schema]
        :returns: Encoded bytes
        :rtype: bytes
        """
        if not schema:
            raise ValueError("Schema is required in Avro serializer.")

        if not isinstance(schema, avro.schema.Schema):
            schema = avro.schema.parse(schema)

        try:
            writer = self._schema_writer_cache[str(schema)]
        except KeyError:
            writer = DatumWriter(schema)
            self._schema_writer_cache[str(schema)] = writer

        stream = BytesIO()
        with stream:
            writer.write(data, BinaryEncoder(stream))
            encoded_data = stream.getvalue()
        return encoded_data
Exemple #10
0
def encode_avro_message(data):
    datum_writer = DatumWriter(get_media_avro_schema())
    bytes_writer = io.BytesIO()
    encoder = avro.io.BinaryEncoder(bytes_writer)
    datum_writer.write(data, encoder)
    raw_bytes = bytes_writer.getvalue()
    return raw_bytes
    def run(self, n):
        # JSON Serializer
        # serializer = ajs.AvroJsonSerializer(self.movies_schema)
        # json_data = serializer.to_json(self.movies_data)
        total_ser = 0
        total_deser = 0
        bytes_len = 0
        for i in range(0, n):
            datum_writer = DatumWriter(self.movies_schema)
            bytes_writer = io.BytesIO()

            encoder = BinaryEncoder(bytes_writer)
            tic = timeit.default_timer()
            datum_writer.write(self.movies_data, encoder)
            elapsed = timeit.default_timer() - tic
            payload = bytes_writer.getvalue()
            total_ser = total_ser + elapsed
            bytes_len = len(payload)

            bytes_reader = io.BytesIO(payload)
            decoder = BinaryDecoder(bytes_reader)
            reader = DatumReader(self.movies_schema)
            tic2 = timeit.default_timer()
            movies = reader.read(decoder)
            elapsed2 = timeit.default_timer() - tic2
            total_deser = total_deser + elapsed2

        self.logger.log(logging.INFO, "serialized len: %s bytes", bytes_len)
        avg_ser = (total_ser*(10**9))/n
        avg_deser = (total_deser*(10**9))/n
        self.logger.log(logging.INFO, "Serialization time: \n%s", avg_ser)
        self.logger.log(logging.INFO, "De-serialization time: \n%s", avg_deser)
Exemple #12
0
def serialize(myschema, myobject):
    buf = io.BytesIO()
    encoder = BinaryEncoder(buf)
    writer = DatumWriter(writer_schema=myschema)
    writer.write(myobject, encoder)
    buf.seek(0)
    return (buf.read())
Exemple #13
0
 def __call__(self, state, scope, pos, paramTypes, x):
     schema = avro.schema.parse(json.dumps(paramTypes[0]))
     x = untagUnion(x, paramTypes[0])
     bytes = io.BytesIO()
     writer = DatumWriter(schema)
     writer.write(x, BinaryEncoder(bytes))
     bytes.flush()
     return bytes.getvalue()
Exemple #14
0
 def __call__(self, state, scope, pos, paramTypes, x):
     schema = avro.schema.Parse(json.dumps(paramTypes[0]))
     x = untagUnion(x, paramTypes[0])
     bytes_io = io.BytesIO()
     writer = DatumWriter(schema)
     writer.write(x, BinaryEncoder(bytes_io))
     bytes_io.flush()
     return bytesToString(bytes_io.getvalue())
Exemple #15
0
 def to_avro(self):
     writer = DatumWriter()
     writer.set_writer_schema(SYSLOG_AVRO_SCHEMA)
     buffer = BytesIO()
     encoder = BinaryEncoder(buffer)
     writer.write(self.to_avro_dict(), encoder)
     data = buffer.getbuffer().tobytes()
     return bytearray(data)
Exemple #16
0
    def _serialize_message(content, schema_path: Path) -> ByteString:
        schema = avro.schema.parse(schema_path.read_text())

        bytes_writer = io.BytesIO()
        writer = DatumWriter(schema)
        encoder = avro.io.BinaryEncoder(bytes_writer)
        writer.write(content, encoder)

        return bytes_writer.getvalue()
Exemple #17
0
def objToBin():
    bytes_writer = io.BytesIO()
    encoder = BinaryEncoder(bytes_writer)
    writer_binary = DatumWriter(sc)
    for d in datum:
        writer_binary.write(d, encoder)

    ab = bytes_writer.getvalue()
    return ab
def compose_data(timestamp, src_vmtype, host_ip, account_id, dest_ip):
    writer = DatumWriter(get_schema())
    bytes_writer = io.BytesIO()
    encoder = avro.io.BinaryEncoder(bytes_writer)
    message = '{"eventName": "Neighbour_Unreachable", "accountId":"%s", "destIp":"%s"}' \
              % (account_id, dest_ip)
    raw_data = bytes(message)
    writer.write({"timestamp": timestamp, "src": src_vmtype, "host_ip": host_ip, "rawdata":raw_data}, encoder)
    raw_bytes = bytes_writer.getvalue()
    return raw_bytes
class AvroSerializer(object):
    def __init__(self, schema_str):
        schema = avro.schema.parse(schema_str)
        self.writer = DatumWriter(schema)

    def serialize(self, record):
        f = StringIO()
        encoder = BinaryEncoder(f)
        self.writer.write(record, encoder)
        return f.getvalue()
Exemple #20
0
def avro_view(request):
    data = DataSource().data
    buffer = BytesIO()

    schema = SchemaFromJSONData(avro_api_schema)
    writer = DatumWriter(schema)
    encoder = BinaryEncoder(buffer)
    writer.write(data, encoder)

    return HttpResponse(buffer.getvalue(), content_type='application/octet-stream')
def compose_data(timestamp, src_vmtype, host_ip, account_id, proc_name):
    writer = DatumWriter(get_schema())
    bytes_writer = io.BytesIO()
    encoder = avro.io.BinaryEncoder(bytes_writer)
    message = '{"eventName": "Process_Down", "accountId":"%s", "ProcName":"%s"}' \
              % (account_id, proc_name)
    raw_data = bytes(message)
    writer.write({"timestamp": timestamp, "src": src_vmtype, "host_ip": host_ip, "rawdata":raw_data}, encoder)
    raw_bytes = bytes_writer.getvalue()
    return raw_bytes
 def encode(self, data):
     raw_bytes = None
     try:
         writer = DatumWriter(self.schema)
         bytes_writer = io.BytesIO()
         encoder = BinaryEncoder(bytes_writer)
         writer.write(data, encoder)
         raw_bytes = bytes_writer.getvalue()
     except:
         print("Error encoding data", sys.exc_info())
     return raw_bytes
Exemple #23
0
class Serializer(object):

    def __init__(self, schema_str):
        schema = avro.schema.parse(schema_str)
        self.writer = DatumWriter(schema)

    def serialize(self, record):
        f = StringIO()
        encoder = BinaryEncoder(f)
        self.writer.write(record, encoder)
        return f.getvalue()
Exemple #24
0
class AvroSerializer(object):

    def __init__(self, schema):
        self.schema = schema
        self.datum_writer = DatumWriter(schema)

    def serialize(self, record):
        f = StringIO()
        encoder = BinaryEncoder(f)
        self.datum_writer.write(record, encoder)
        return f.getvalue()
Exemple #25
0
 def avro_encode(cls, json_data, schema=None):
     """avro 序列化json数据为二进制
     :param json_data:
     :param schema:
     :return:
     """
     bio = BytesIO()
     binary_encoder = BinaryEncoder(bio)
     dw = DatumWriter(writer_schema=schema or cls.RESPONSE_SCHEMA)
     dw.write(json_data, binary_encoder)
     return bio.getvalue()
 def encode(self, schema_file, data):
     raw_bytes = None
     try:
         schema = avro.schema.Parse(open(schema_file).read())
         writer = DatumWriter(schema)
         bytes_writer = io.BytesIO()
         encoder = BinaryEncoder(bytes_writer)
         writer.write(data, encoder)
         raw_bytes = bytes_writer.getvalue()
     except:
         print("Error encode data", sys.exc_info())
     return raw_bytes
Exemple #27
0
        def _avro_serialize(msg):
            opened_schema = load_schema(schema)
            avro_writer = DatumWriter(opened_schema)
            bytesio = BytesIO()
            encoder = BinaryEncoder(bytesio)
            if isinstance(msg, list):
                for m in msg:
                    avro_writer.write(m, encoder)
            else:
                avro_writer.write(msg, encoder)

            return bytesio.getvalue()
Exemple #28
0
def serialize_val(val, serializer, schema=None):
    if serializer == "Avro":
        writer = DatumWriter(schema)
        bytes_writer = io.BytesIO()
        encoder = avro.io.BinaryEncoder(bytes_writer)
        writer.write(val, encoder)
        return_val = bytes_writer.getvalue()
    elif serializer == "JSON":
        return_val = json.dumps(val)
    else:
        return_val = val
    return return_val
Exemple #29
0
def test_sanity():
  """

  Ensures that our "base" and "good" schemas are actually forwards- and
  backwards-compatible

  """
  # fst schema / record
  fst_schema = schema.parse(open("%s/MyRecord.base.avsc" % BASE_DIR).read())
  fst_writer = DatumWriter(writers_schema=fst_schema)
  fst_record = {
      "fieldWithoutDefaultValue": 0,
      "properField": 0,
      "enumField": "A",
      "unionField": None,
      "arrayField": ["world"],
      "mapField": {"hello": "world"},
      "fixedField": "aaaaaaaaaaaaaaaa"
  }

  # sec schema / record
  sec_schema = schema.parse(open("%s/MyRecord.good.avsc" % BASE_DIR).read())
  sec_writer = DatumWriter(writers_schema=sec_schema)
  sec_record = {
      "fieldWithoutDefaultValue": 0,
      "properField2": 0,
      "enumField": "B",
      "unionField": None,
      "arrayField": ["world"],
      "fixedField": "bbbbbbbbbbbbbbbb"
  }

  # Encode record w/ fst
  fst_buf = StringIO.StringIO()
  fst_encoder = BinaryEncoder(fst_buf)
  fst_writer.write(fst_record, fst_encoder)
  fst_data = fst_buf.getvalue()

  # Encode record w/ sec
  sec_buf = StringIO.StringIO()
  sec_encoder = BinaryEncoder(sec_buf)
  sec_writer.write(sec_record, sec_encoder)
  sec_data = sec_buf.getvalue()

  # writers == fst, readers == sec
  sec_reader = DatumReader(writers_schema=fst_schema, readers_schema=sec_schema)
  sec_decoder = BinaryDecoder(StringIO.StringIO(fst_data))
  sec_from_fst = sec_reader.read(sec_decoder) # no exception -> good

  # writers == sec, readers == fst
  fst_reader = DatumReader(writers_schema=sec_schema, readers_schema=fst_schema)
  fst_decoder = BinaryDecoder(StringIO.StringIO(sec_data))
  fst_from_sec = fst_reader.read(fst_decoder) # no exception -> good
Exemple #30
0
def write_value(schema: TypedSchema, bio: io.BytesIO, value: dict):
    if schema.schema_type is SchemaType.AVRO:
        writer = DatumWriter(schema.schema)
        writer.write(value, BinaryEncoder(bio))
    elif schema.schema_type is SchemaType.JSONSCHEMA:
        try:
            schema.schema.validate(value)
        except ValidationError as e:
            raise InvalidPayload from e
        bio.write(json_encode(value, binary=True))
    else:
        raise ValueError("Unknown schema type")
Exemple #31
0
class Serializer(object):
    def __init__(self, schema_str):
        if sys.version_info >= (3, ):
            schema = avro.schema.Parse(schema_str)
        else:
            schema = avro.schema.parse(schema_str)
        self.writer = DatumWriter(schema)

    def serialize(self, record):
        f = string_io()
        encoder = BinaryEncoder(f)
        self.writer.write(record, encoder)
        return f.getvalue()
Exemple #32
0
 def produce(self):
     writer = DatumWriter(self.schema)
     bytes_writer = io.BytesIO()
     encoder = avro.io.BinaryEncoder(bytes_writer)
     for i in range(0, 100):
         writer.write(
             {
                 "name": self.names[random.randint(0, 9)],
                 "favorite_color": self.colors[random.randint(0, 4)],
                 "favorite_number": random.randint(0, 10)
             }, encoder)
         raw_bytes = bytes_writer.getvalue()
         self.kafka_producer.send(topic=self.topic_name, value=raw_bytes)
Exemple #33
0
def serialize_avro(payload_str, schema):
    """
    Function used to serialize a json event to binary format based on avro schema
    :param schema: avro schema of payload
    :param payload_str: event data in json string format
    :return: avro serialized binary data and corresponding schema
    """
    payload_json = json.loads(payload_str)
    writer = DatumWriter(schema)
    bytes_writer = io.BytesIO()
    encoder = BinaryEncoder(bytes_writer)
    writer.write(payload_json, encoder)
    raw_bytes = bytes_writer.getvalue()
    return raw_bytes, schema
Exemple #34
0
    def __encode(self, data, schema):

        raw_bytes = None
        try:
            writer = DatumWriter(schema)
            bytes_writer = io.BytesIO()
            encoder = BinaryEncoder(bytes_writer)
            writer.write(data, encoder)
            raw_bytes = bytes_writer.getvalue()

        except Exception as e:
            print(f'Error encoding data: {repr(e)}')

        return raw_bytes
class Serializer(object):

    def __init__(self, schema_str):
        if sys.version_info >= (3,):
            schema = avro.schema.Parse(schema_str)
        else:
            schema = avro.schema.parse(schema_str)
        self.writer = DatumWriter(schema)

    def serialize(self, record):
        f = string_io()
        encoder = BinaryEncoder(f)
        self.writer.write(record, encoder)
        return f.getvalue()
Exemple #36
0
def publish_avro_records(project_id, topic_id, avsc_file):
    """Pulbish a BINARY or JSON encoded message to a topic configured with an Avro schema."""
    # [START pubsub_publish_avro_records]
    from avro.io import BinaryEncoder, DatumWriter
    import avro
    import io
    import json
    from google.api_core.exceptions import NotFound
    from google.cloud.pubsub import PublisherClient
    from google.pubsub_v1.types import Encoding

    # TODO(developer): Replace these variables before running the sample.
    # project_id = "your-project-id"
    # topic_id = "your-topic-id"
    # avsc_file = "path/to/an/avro/schema/file/(.avsc)/formatted/in/json"

    publisher_client = PublisherClient()
    topic_path = publisher_client.topic_path(project_id, topic_id)

    # Prepare to write Avro records to the binary output stream.
    avro_schema = avro.schema.parse(open(avsc_file, "rb").read())
    writer = DatumWriter(avro_schema)
    bout = io.BytesIO()

    # Prepare some data using a Python dictionary that matches the Avro schema
    record = {"name": "Alaska", "post_abbr": "AK"}

    try:
        # Get the topic encoding type.
        topic = publisher_client.get_topic(request={"topic": topic_path})
        encoding = topic.schema_settings.encoding

        # Encode the data according to the message serialization type.
        if encoding == Encoding.BINARY:
            encoder = BinaryEncoder(bout)
            writer.write(record, encoder)
            data = bout.getvalue()
            print(f"Preparing a binary-encoded message:\n{data}")
        elif encoding == Encoding.JSON:
            data = json.dumps(record).encode("utf-8")
            print(f"Preparing a JSON-encoded message:\n{data}")
        else:
            print(f"No encoding specified in {topic_path}. Abort.")
            exit(0)

        future = publisher_client.publish(topic_path, data)
        print(f"Published message ID: {future.result()}")

    except NotFound:
        print(f"{topic_id} not found.")
Exemple #37
0
def compose_data(timestamp, src_vmtype, host_ip, account_id, proc_name):
    writer = DatumWriter(get_schema())
    bytes_writer = io.BytesIO()
    encoder = avro.io.BinaryEncoder(bytes_writer)
    message = '{"eventName": "Process_Down", "accountId":"%s", "ProcName":"%s"}' \
              % (account_id, proc_name)
    raw_data = bytes(message)
    writer.write(
        {
            "timestamp": timestamp,
            "src": src_vmtype,
            "host_ip": host_ip,
            "rawdata": raw_data
        }, encoder)
    raw_bytes = bytes_writer.getvalue()
    return raw_bytes
Exemple #38
0
    def __encode(self, data, schema=None):
        if schema is None:
            out_schema = self.out_schema
        else:
            out_schema = schema

        raw_bytes = None
        try:
            writer = DatumWriter(out_schema)
            bytes_writer = io.BytesIO()
            encoder = BinaryEncoder(bytes_writer)
            writer.write(data, encoder)
            raw_bytes = bytes_writer.getvalue()
        except:
            print("Error encoding data", sys.exc_info())
        return raw_bytes
def avroSerialize(message):
    """takes as input a message loops through each part of the message, 
    and attempts to serialize it and return the new AVRO serialized message"""
    
    # set up a writer to serialize data
    writer = DatumWriter(schema)
    
    # we set up a try loop because the RTM may return messages
    # that are not actual messages that fit our Avro schema
    try: 
            
        # set up a new converted message
        new_message = {}
        new_message['user_id'] = message['user']
        new_message['record_type'] = message['type']
        new_message['text'] = message['text']
        new_message['channel'] = message['channel']
        new_message['time_stamp'] = message['ts']

        # serialize the message and return it
        return writer.write(new_message)
    
    # if we fail to write successfully, it's probably that we were
    # attempting to write a message that we don't care about, like
    # a status change. if this happens, we'll pass
    except:
        pass
#
# NB: the AvroOutputReader specific portion begins here
#

def new_column(name, value):
    column = dict()
    column['name'] = '%s' % name
    column['value'] = '%s' % value
    column['timestamp'] = long(time.time() * 1e6)
    column['ttl'] = 0
    return column

# parse the current avro schema
proto = avro.protocol.parse(open('cassandra.avpr').read())
schema = proto.types_dict['StreamingMutation']
# open an avro encoder and writer for stdout
enc = BinaryEncoder(sys.stdout)
writer = DatumWriter(schema)

# output a series of objects matching 'StreamingMutation' in the Avro interface
smutation = dict()
try:
    for word, count in word2count.iteritems():
        smutation['key'] = word
        smutation['mutation'] = {'column_or_supercolumn': {'column': new_column('count', count)}}
        writer.write(smutation, enc)
finally:
    sys.stdout.flush()

producer = KafkaProducer(bootstrap_servers = "localhost:9092", compression_type = "gzip")

# Kafka topic
topic = "tnx"

# Path to user.avsc avro schema
schema_path = "/home/cloudera/workspace/kafka-clients-python/transactions.avsc"
schema = avro.schema.Parse(open(schema_path).read())
print("Schema", schema.to_json())

writer = DatumWriter(schema)
bytes_writer = io.BytesIO()
encoder = avro.io.BinaryEncoder(bytes_writer)

def get_record():
    return {"id": "123"
            , "merchant_id": "m123"
            , "customer_id": "c345"
            , "amount": 100.1
            , "category": "pos"
            , "timestamp": int(time())}


for i in range(10):
    record = get_record()
    writer.write(record, encoder)
    raw_bytes = bytes_writer.getvalue()
    producer.send(topic, raw_bytes)
producer.flush()

Exemple #42
0
"""Python avro official implementation encoding benchmark."""

from io import BytesIO
from itertools import repeat
from time import time
from avro.datafile import DataFileReader, DataFileWriter
from avro.io import DatumReader, DatumWriter, BinaryEncoder, BinaryDecoder
import sys


LOOPS = 1

with open(sys.argv[1]) as reader:
  datum_reader = DatumReader()
  file_reader = DataFileReader(reader, datum_reader)
  SCHEMA = datum_reader.writers_schema
  RECORDS = list(file_reader)

buf = BytesIO()
datum_writer = DatumWriter(SCHEMA)
start = time()
n = 0
for _ in repeat(None, LOOPS):
  for record in RECORDS:
    buf.seek(0)
    encoder = BinaryEncoder(buf)
    datum_writer.write(record, encoder)
    n += 1
print 1000. * (time() - start) / n
Exemple #43
0
def serialize(data):
    writer = DatumWriter(schema)
    bytes_writer = io.BytesIO()
    encoder = avro.io.BinaryEncoder(bytes_writer)
    writer.write(data, encoder)
    return bytes_writer.getvalue()
 def createAvroMemoryRecord(data,schema):
     f = StringIO()
     encoder = BinaryEncoder(f)
     writer = DatumWriter(schema)
     writer.write(dict(data),encoder)
     return f.getvalue()
Exemple #45
0
 def toKey(self, x, schema):
     bytes = io.BytesIO()
     writer = DatumWriter(schema)
     writer.write(x, BinaryEncoder(bytes))
     bytes.flush()
     return base64.b64encode(bytes.getvalue())