コード例 #1
0
def test_sanity():
  """

  Ensures that our "base" and "good" schemas are actually forwards- and
  backwards-compatible

  """
  # fst schema / record
  fst_schema = schema.parse(open("%s/MyRecord.base.avsc" % BASE_DIR).read())
  fst_writer = DatumWriter(writers_schema=fst_schema)
  fst_record = {
      "fieldWithoutDefaultValue": 0,
      "properField": 0,
      "enumField": "A",
      "unionField": None,
      "arrayField": ["world"],
      "mapField": {"hello": "world"},
      "fixedField": "aaaaaaaaaaaaaaaa"
  }

  # sec schema / record
  sec_schema = schema.parse(open("%s/MyRecord.good.avsc" % BASE_DIR).read())
  sec_writer = DatumWriter(writers_schema=sec_schema)
  sec_record = {
      "fieldWithoutDefaultValue": 0,
      "properField2": 0,
      "enumField": "B",
      "unionField": None,
      "arrayField": ["world"],
      "fixedField": "bbbbbbbbbbbbbbbb"
  }

  # Encode record w/ fst
  fst_buf = StringIO.StringIO()
  fst_encoder = BinaryEncoder(fst_buf)
  fst_writer.write(fst_record, fst_encoder)
  fst_data = fst_buf.getvalue()

  # Encode record w/ sec
  sec_buf = StringIO.StringIO()
  sec_encoder = BinaryEncoder(sec_buf)
  sec_writer.write(sec_record, sec_encoder)
  sec_data = sec_buf.getvalue()

  # writers == fst, readers == sec
  sec_reader = DatumReader(writers_schema=fst_schema, readers_schema=sec_schema)
  sec_decoder = BinaryDecoder(StringIO.StringIO(fst_data))
  sec_from_fst = sec_reader.read(sec_decoder) # no exception -> good

  # writers == sec, readers == fst
  fst_reader = DatumReader(writers_schema=sec_schema, readers_schema=fst_schema)
  fst_decoder = BinaryDecoder(StringIO.StringIO(sec_data))
  fst_from_sec = fst_reader.read(fst_decoder) # no exception -> good
コード例 #2
0
    def respond(self, call_request):
        buffer_reader = io.BytesIO(call_request)
        buffer_decoder = BinaryDecoder(buffer_reader)
        buffer_writer = io.BytesIO()
        buffer_encoder = BinaryEncoder(buffer_writer)
        error = None
        response_metadata = {}
        try:
            remote_protocol = self.process_handshake(buffer_decoder,
                                                     buffer_encoder)
            if remote_protocol is None or self.local_protocol is None:
                return buffer_writer.getvalue()

            DatumReader(schema.parse(
                '{"type": "map", "values": "bytes"}')).read(buffer_decoder)
            remote_message_name = buffer_decoder.read_utf8()

            remote_message = remote_protocol.messages.get(remote_message_name)
            if remote_message is None:
                fail_msg = 'Unknown remote message: %s' % remote_message_name
                raise schema.AvroException(fail_msg)
            local_message = self.local_protocol.messages.get(
                remote_message_name)
            if local_message is None:
                fail_msg = 'Unknown local message: %s' % remote_message_name
                raise schema.AvroException(fail_msg)
            writers_schema = remote_message.request
            readers_schema = local_message.request
            request = self.read_request(writers_schema, readers_schema,
                                        buffer_decoder)

            response = None
            try:
                response = self.invoke(self.local_protocol, local_message,
                                       request)
            except AvroRemoteException as e:
                error = e
            except Exception as e:
                error = AvroRemoteException(str(e))

            DatumWriter(
                schema.parse('{"type": "map", "values": "bytes"}')).write(
                    response_metadata, buffer_encoder)
            buffer_encoder.write_boolean(error is not None)
            if error is None:
                writers_schema = local_message.response
                self.write_response(writers_schema, response, buffer_encoder)
            else:
                writers_schema = local_message.errors
                self.write_error(writers_schema, error, buffer_encoder)
        except schema.AvroException as e:
            error = AvroRemoteException(str(e))
            buffer_encoder = BinaryEncoder(io.BytesIO())
            DatumWriter(
                schema.parse('{"type": "map", "values": "bytes"}')).write(
                    response_metadata, buffer_encoder)
            buffer_encoder.write_boolean(True)
            self.write_error(schema.parse('["string"]'), error, buffer_encoder)
            return buffer_encoder.writer.getvalue()
        return buffer_writer.getvalue()
コード例 #3
0
def serialize(myschema, myobject):
    buf = io.BytesIO()
    encoder = BinaryEncoder(buf)
    writer = DatumWriter(writer_schema=myschema)
    writer.write(myobject, encoder)
    buf.seek(0)
    return (buf.read())
コード例 #4
0
    def serialize(
            self,
            data,  # type: ObjectType
            schema,  # type: Union[str, bytes, avro.schema.Schema]
    ):
        # type: (ObjectType, Union[str, bytes, avro.schema.Schema]) -> bytes
        """Convert the provided value to it's binary representation and write it to the stream.
        Schema must be a Avro RecordSchema:
        https://avro.apache.org/docs/1.10.0/gettingstartedpython.html#Defining+a+schema
        :param data: An object to serialize
        :type data: ObjectType
        :param schema: An Avro RecordSchema
        :type schema: str
        :returns: Encoded bytes
        :rtype: bytes
        """
        if not schema:
            raise ValueError("Schema is required in Avro serializer.")

        writer = self.get_schema_writer(schema)

        stream = BytesIO()
        with stream:
            writer.write(data, BinaryEncoder(stream))
            encoded_data = stream.getvalue()
        return encoded_data
コード例 #5
0
def serialize(thing):
    writer = DatumWriter(SCHEMA)
    buf = StringIO()
    writer.write(thing, BinaryEncoder(buf))
    v = buf.getvalue()
    buf.close()
    return v
コード例 #6
0
        def _avro_serialize(msg):
            avro_writer = DatumWriter(self.shared.topic['schema'])
            bytesio = BytesIO()
            encoder = BinaryEncoder(bytesio)
            avro_writer.write(msg, encoder)

            return bytesio.getvalue()
コード例 #7
0
def send_avro_record_to_kafka(topic, value, bootstrap_servers, avro_schema_json):
    value_schema = avro.schema.parse(avro_schema_json)

    producer_config = {
        "bootstrap.servers": bootstrap_servers,
        "request.timeout.ms": "1000",
    }

    producer = Producer(producer_config)

    writer = DatumWriter(value_schema)
    bytes_writer = io.BytesIO()
    encoder = BinaryEncoder(bytes_writer)

    writer.write(value, encoder)

    try:
        producer.produce(topic=topic, value=bytes_writer.getvalue())
    except Exception as e:
        print(
            f"Exception while producing record value - {value} to topic - {topic}: {e}"
        )
    else:
        print(f"Successfully producing record value - {value} to topic - {topic}")

    producer.flush()
コード例 #8
0
def main(args):
    log = logging.getLogger(__name__)
    log.setLevel(logging.INFO)

    sys_log = logging.handlers.SysLogHandler("/dev/log")
    sys_format = logging.Formatter(
        '%(name)s[%(process)d]: %(levelname)s %(message)s')
    sys_log.setFormatter(sys_format)

    log.addHandler(sys_log)

    reader = DataFileReader(open(args.avro_file, "r"), DatumReader())

    schema = reader.datum_reader.writers_schema

    for i, row in enumerate(reader):
        log.debug("Consumer row:" + str(row))
        writer = DatumWriter(schema)
        bytes_writer = io.BytesIO()
        encoder = BinaryEncoder(bytes_writer)
        writer.write(row, encoder)
        raw_bytes = bytes_writer.getvalue()
        b64enc = base64.b64encode(raw_bytes)
        msg = {"messages": [{"data": b64enc}]}

        json_str = json.dumps(msg)
        log.debug("json msg:" + json_str)
        publish(json_str, args.ams_endpoint, args.ams_project, args.ams_topic,
                args.ams_key, log)
コード例 #9
0
 def toKey(self, x, avroType):
     x = jsonEncoder(avroType, x, False)
     bytes = io.BytesIO()
     writer = DatumWriter(avroType.schema)
     writer.write(x, BinaryEncoder(bytes))
     bytes.flush()
     return base64.b64encode(bytes.getvalue())
コード例 #10
0
ファイル: main.py プロジェクト: robidev/rsyslog_avro
 def to_avro(self):
     writer = DatumWriter()
     writer.set_writer_schema(SYSLOG_AVRO_SCHEMA)
     buffer = BytesIO()
     encoder = BinaryEncoder(buffer)
     writer.write(self.to_avro_dict(), encoder)
     data = buffer.getbuffer().tobytes()
     return bytearray(data)
コード例 #11
0
def avro_serialization(value_schema, x):
    bytes_writer = BytesIO()
    datum_writer = DatumWriter(writer_schema=value_schema)
    encoder = BinaryEncoder(bytes_writer)
    datum_writer.write_data(value_schema, x, encoder)
    serialized_x = bytes_writer.getvalue()
    bytes_writer.close()
    return serialized_x
コード例 #12
0
 def __call__(self, state, scope, pos, paramTypes, x):
     schema = avro.schema.Parse(json.dumps(paramTypes[0]))
     x = untagUnion(x, paramTypes[0])
     bytes_io = io.BytesIO()
     writer = DatumWriter(schema)
     writer.write(x, BinaryEncoder(bytes_io))
     bytes_io.flush()
     return bytesToString(bytes_io.getvalue())
コード例 #13
0
def objToBin():
    bytes_writer = io.BytesIO()
    encoder = BinaryEncoder(bytes_writer)
    writer_binary = DatumWriter(sc)
    for d in datum:
        writer_binary.write(d, encoder)

    ab = bytes_writer.getvalue()
    return ab
コード例 #14
0
def avro_view(request):
    data = DataSource().data
    buffer = BytesIO()

    schema = SchemaFromJSONData(avro_api_schema)
    writer = DatumWriter(schema)
    encoder = BinaryEncoder(buffer)
    writer.write(data, encoder)

    return HttpResponse(buffer.getvalue(), content_type='application/octet-stream')
コード例 #15
0
ファイル: avro_utils.py プロジェクト: wgy1109/python-utils
 def avro_encode(cls, json_data, schema=None):
     """avro 序列化json数据为二进制
     :param json_data:
     :param schema:
     :return:
     """
     bio = BytesIO()
     binary_encoder = BinaryEncoder(bio)
     dw = DatumWriter(writer_schema=schema or cls.RESPONSE_SCHEMA)
     dw.write(json_data, binary_encoder)
     return bio.getvalue()
コード例 #16
0
 def encode(self, data):
     raw_bytes = None
     try:
         writer = DatumWriter(self.schema)
         bytes_writer = io.BytesIO()
         encoder = BinaryEncoder(bytes_writer)
         writer.write(data, encoder)
         raw_bytes = bytes_writer.getvalue()
     except:
         print("Error encoding data", sys.exc_info())
     return raw_bytes
コード例 #17
0
 def encode(self, schema_file, data):
     raw_bytes = None
     try:
         schema = avro.schema.Parse(open(schema_file).read())
         writer = DatumWriter(schema)
         bytes_writer = io.BytesIO()
         encoder = BinaryEncoder(bytes_writer)
         writer.write(data, encoder)
         raw_bytes = bytes_writer.getvalue()
     except:
         print("Error encode data", sys.exc_info())
     return raw_bytes
コード例 #18
0
        def _avro_serialize(msg):
            opened_schema = load_schema(schema)
            avro_writer = DatumWriter(opened_schema)
            bytesio = BytesIO()
            encoder = BinaryEncoder(bytesio)
            if isinstance(msg, list):
                for m in msg:
                    avro_writer.write(m, encoder)
            else:
                avro_writer.write(msg, encoder)

            return bytesio.getvalue()
コード例 #19
0
def write_value(schema: TypedSchema, bio: io.BytesIO, value: dict):
    if schema.schema_type is SchemaType.AVRO:
        writer = DatumWriter(schema.schema)
        writer.write(value, BinaryEncoder(bio))
    elif schema.schema_type is SchemaType.JSONSCHEMA:
        try:
            schema.schema.validate(value)
        except ValidationError as e:
            raise InvalidPayload from e
        bio.write(json_encode(value, binary=True))
    else:
        raise ValueError("Unknown schema type")
コード例 #20
0
ファイル: avrolib.py プロジェクト: wtj/pydoop
 def __serialize_as_needed(self, key, value):
     out_kv = {'K': key, 'V': value}
     jc = self.job_conf
     if AVRO_OUTPUT in jc and (self.is_reducer() or self.__is_map_only()):
         for mode, record in out_kv.iteritems():
             datum_writer = self.__datum_writers.get(mode)
             if datum_writer is not None:
                 f = StringIO()
                 encoder = BinaryEncoder(f)
                 datum_writer.write(record, encoder)
                 out_kv[mode] = f.getvalue()
     return out_kv['K'], out_kv['V']
コード例 #21
0
def serialize(tweets):
    if tweets is not None:
        schema_tweet = avro.schema.Parse(
            open(dir_path + "/tweet.schema.avsc", "rb").read())

        writer = DatumWriter()
        bytes_writer = BytesIO()
        encoder = BinaryEncoder(bytes_writer)
        writer.write_array(schema_tweet, tweets, encoder)
        tweets_bytes = bytes_writer.getvalue()
        return tweets_bytes
    else:
        return None
コード例 #22
0
ファイル: schema.py プロジェクト: renovate-bot/python-pubsub
def publish_avro_records(project_id, topic_id, avsc_file):
    """Pulbish a BINARY or JSON encoded message to a topic configured with an Avro schema."""
    # [START pubsub_publish_avro_records]
    from avro.io import BinaryEncoder, DatumWriter
    import avro
    import io
    import json
    from google.api_core.exceptions import NotFound
    from google.cloud.pubsub import PublisherClient
    from google.pubsub_v1.types import Encoding

    # TODO(developer): Replace these variables before running the sample.
    # project_id = "your-project-id"
    # topic_id = "your-topic-id"
    # avsc_file = "path/to/an/avro/schema/file/(.avsc)/formatted/in/json"

    publisher_client = PublisherClient()
    topic_path = publisher_client.topic_path(project_id, topic_id)

    # Prepare to write Avro records to the binary output stream.
    avro_schema = avro.schema.parse(open(avsc_file, "rb").read())
    writer = DatumWriter(avro_schema)
    bout = io.BytesIO()

    # Prepare some data using a Python dictionary that matches the Avro schema
    record = {"name": "Alaska", "post_abbr": "AK"}

    try:
        # Get the topic encoding type.
        topic = publisher_client.get_topic(request={"topic": topic_path})
        encoding = topic.schema_settings.encoding

        # Encode the data according to the message serialization type.
        if encoding == Encoding.BINARY:
            encoder = BinaryEncoder(bout)
            writer.write(record, encoder)
            data = bout.getvalue()
            print(f"Preparing a binary-encoded message:\n{data}")
        elif encoding == Encoding.JSON:
            data = json.dumps(record).encode("utf-8")
            print(f"Preparing a JSON-encoded message:\n{data}")
        else:
            print(f"No encoding specified in {topic_path}. Abort.")
            exit(0)

        future = publisher_client.publish(topic_path, data)
        print(f"Published message ID: {future.result()}")

    except NotFound:
        print(f"{topic_id} not found.")
コード例 #23
0
ファイル: test_pyspark.py プロジェクト: CX000/pyspark_avro
def serialize_avro(payload_str, schema):
    """
    Function used to serialize a json event to binary format based on avro schema
    :param schema: avro schema of payload
    :param payload_str: event data in json string format
    :return: avro serialized binary data and corresponding schema
    """
    payload_json = json.loads(payload_str)
    writer = DatumWriter(schema)
    bytes_writer = io.BytesIO()
    encoder = BinaryEncoder(bytes_writer)
    writer.write(payload_json, encoder)
    raw_bytes = bytes_writer.getvalue()
    return raw_bytes, schema
コード例 #24
0
    def __encode(self, data, schema):

        raw_bytes = None
        try:
            writer = DatumWriter(schema)
            bytes_writer = io.BytesIO()
            encoder = BinaryEncoder(bytes_writer)
            writer.write(data, encoder)
            raw_bytes = bytes_writer.getvalue()

        except Exception as e:
            print(f'Error encoding data: {repr(e)}')

        return raw_bytes
コード例 #25
0
    def __init__(self,
                 boostrap_servers,
                 topic,
                 deployment_id,
                 data_scheme_filename,
                 label_scheme_filename,
                 description='',
                 validation_rate=0,
                 control_topic='control',
                 group_id='sink'):

        input_format = 'AVRO'
        super().__init__(boostrap_servers, topic, deployment_id, input_format,
                         description, validation_rate, control_topic, group_id)

        self.data_scheme_filename = data_scheme_filename

        self.data_schema = open(self.data_scheme_filename, "r").read()

        self.avro_data_schema = avro.schema.Parse(self.data_schema)
        self.data_writer = DatumWriter(self.avro_data_schema)

        self.label_scheme_filename = label_scheme_filename
        self.label_schema = open(self.label_scheme_filename, "r").read()

        self.avro_label_schema = avro.schema.Parse(self.label_schema)
        self.label_writer = DatumWriter(self.avro_label_schema)

        self.data_io = io.BytesIO()
        self.label_io = io.BytesIO()
        self.data_encoder = BinaryEncoder(self.data_io)
        self.label_encoder = BinaryEncoder(self.label_io)

        self.input_config = {
            'data_scheme': self.data_schema,
            'label_scheme': self.label_schema,
        }
コード例 #26
0
ファイル: KafkaPC.py プロジェクト: PriiOmega/KOARCH
    def __encode(self, data, schema=None):
        if schema is None:
            out_schema = self.out_schema
        else:
            out_schema = schema

        raw_bytes = None
        try:
            writer = DatumWriter(out_schema)
            bytes_writer = io.BytesIO()
            encoder = BinaryEncoder(bytes_writer)
            writer.write(data, encoder)
            raw_bytes = bytes_writer.getvalue()
        except:
            print("Error encoding data", sys.exc_info())
        return raw_bytes
コード例 #27
0
def encode_avro(msg, writer):
    """Function to encode bytes for the Avro serilization

    Args:
        msg ([dict]): dictionary with the data for serialize
        writer ([DatumWriter]): special variable of Avro schema for the serilization

    Returns:
        [bytes]: message translate to bytes
    """

    bytes_writer = io.BytesIO()
    encoder = BinaryEncoder(bytes_writer)
    writer.write(msg, encoder)
    raw_bytes = bytes_writer.getvalue()

    return raw_bytes
コード例 #28
0
ファイル: kfkpywrapper.py プロジェクト: hamedhsn/wikirace
    def produce(self, msg):
        if self.ser_type == kfkcfg.SERIALIZATIO_JSON:
            # s = json.dumps(msg)
            s = json.dumps(msg, default=json_util.default)
            future = self.kfkprod.produce(bytes(s, 'utf-8'))
            # msg = json.dumps(msg, default=json_util.default).encode('utf-8')
            # future = self.kfkprod.produce(bytes(msg))

        elif self.ser_type == kfkcfg.SERIALIZATIO_AVRO:

            writer = DatumWriter(self.avro_schema)
            bytes_writer = io.BytesIO()
            encoder = BinaryEncoder(bytes_writer)
            writer.write(msg, encoder)
            raw_bytes = bytes_writer.getvalue()

            future = self.kfkprod.produce(raw_bytes)
コード例 #29
0
ファイル: avro_inference.py プロジェクト: paperwait/kafka-ml
    def __init__(self,
                 boostrap_servers,
                 topic,
                 data_scheme_filename,
                 group_id='sink'):

        self.boostrap_servers = boostrap_servers
        self.topic = topic

        self.data_scheme_filename = data_scheme_filename

        self.data_schema = open(self.data_scheme_filename, "r").read()

        self.avro_data_schema = avro.schema.Parse(self.data_schema)
        self.data_writer = DatumWriter(self.avro_data_schema)

        self.data_io = io.BytesIO()
        self.data_encoder = BinaryEncoder(self.data_io)
        self.__producer = KafkaProducer(
            bootstrap_servers=self.boostrap_servers)
コード例 #30
0
def send_avro_record_to_kafka(topic, value, bootstrap_servers,
                              avro_schema_json):
    value_schema = avro.schema.parse(avro_schema_json)

    producer = KafkaProducer(bootstrap_servers=bootstrap_servers)

    writer = DatumWriter(value_schema)
    bytes_writer = io.BytesIO()
    encoder = BinaryEncoder(bytes_writer)

    writer.write(value, encoder)

    try:
        producer.send(topic=topic, value=bytes_writer.getvalue())
    except Exception as e:
        print(
            f"Exception while producing record value - {value} to topic - {topic}: {e}"
        )
    else:
        print(
            f"Successfully producing record value - {value} to topic - {topic}"
        )

    producer.flush()