def _get_decoder_func(self, schema_id, payload, is_key=False):
        if schema_id in self.id_to_decoder_func:
            return self.id_to_decoder_func[schema_id]

        # fetch writer schema from schema reg
        try:
            writer_schema_obj = self.registry_client.get_by_id(schema_id)
        except ClientError as e:
            raise SerializerError("unable to fetch schema with id %d: %s" %
                                  (schema_id, str(e)))

        if writer_schema_obj is None:
            raise SerializerError("unable to fetch schema with id %d" %
                                  (schema_id))

        curr_pos = payload.tell()

        reader_schema_obj = (self.reader_key_schema
                             if is_key else self.reader_value_schema)

        if HAS_FAST:
            # try to use fast avro
            try:
                fast_avro_writer_schema = parse_schema(
                    writer_schema_obj.to_json())
                fast_avro_reader_schema = parse_schema(
                    reader_schema_obj.to_json())
                schemaless_reader(payload, fast_avro_writer_schema)

                # If we reach this point, this means we have fastavro and it can
                # do this deserialization. Rewind since this method just determines
                # the reader function and we need to deserialize again along the
                # normal path.
                payload.seek(curr_pos)

                self.id_to_decoder_func[
                    schema_id] = lambda p: schemaless_reader(
                        p, fast_avro_writer_schema, fast_avro_reader_schema)
                return self.id_to_decoder_func[schema_id]
            except Exception:
                # Fast avro failed, fall thru to standard avro below.
                pass

        # here means we should just delegate to slow avro
        # rewind
        payload.seek(curr_pos)
        # Avro DatumReader py2/py3 inconsistency, hence no param keywords
        # should be revisited later
        # https://github.com/apache/avro/blob/master/lang/py3/avro/io.py#L459
        # https://github.com/apache/avro/blob/master/lang/py/src/avro/io.py#L423
        # def __init__(self, writers_schema=None, readers_schema=None)
        # def __init__(self, writer_schema=None, reader_schema=None)
        avro_reader = avro.io.DatumReader(writer_schema_obj, reader_schema_obj)

        def decoder(p):
            bin_decoder = avro.io.BinaryDecoder(p)
            return avro_reader.read(bin_decoder)

        self.id_to_decoder_func[schema_id] = decoder
        return self.id_to_decoder_func[schema_id]
    def produce(self, **kwargs):
        """
            Sends message to kafka by encoding with specified avro schema
            @:param: topic: topic name
            @:param: value: A dictionary object
            @:param: value_schema : Avro schema for value
            @:param: key: A dictionary object
            @:param: key_schema : Avro schema for key
            @:exception: SerializerError
        """
        # get schemas from  kwargs if defined
        key_schema = kwargs.pop('key_schema', self._key_schema)
        value_schema = kwargs.pop('value_schema', self._value_schema)
        topic = kwargs.pop('topic', None)
        if not topic:
            raise ClientError("Topic name not specified.")
        value = kwargs.pop('value', None)
        key = kwargs.pop('key', None)
        if value:
            if value_schema:
                value = self._serializer.encode_record_with_schema(topic, value_schema, value)
            else:
                raise SerializerError("Avro schema required for value")

        if key:
            if key_schema:
                key = self._serializer.encode_record_with_schema(topic, key_schema, key, True)
            else:
                raise SerializerError("Avro schema required for key")

        super(AvroProducer, self).produce(topic, value, key, **kwargs)
    def _get_decoder_func(self, schema_id, is_key):
        if schema_id in self.id_to_decoder_func:
            return self.id_to_decoder_func[schema_id]

        # fetch from schema reg
        try:
            schema = self.registry_client.get_by_id(schema_id)
        except ClientError as e:
            raise SerializerError("unable to fetch schema with id %d: %s" %
                                  (schema_id, str(e)))

        if schema is None:
            raise SerializerError("unable to fetch schema with id %d" %
                                  schema_id)

        if sys.version_info[0] < 3:
            avro_reader = avro.io.DatumReader(readers_schema=self.key_schema
                                              if is_key else self.value_schema,
                                              writers_schema=schema)
        else:
            avro_reader = avro.io.DatumReader(
                reader_schema=self.key_schema if is_key else self.value_schema,
                writer_schema=schema)

        def decoder(p):
            bin_decoder = avro.io.BinaryDecoder(p)
            return avro_reader.read(bin_decoder)

        self.id_to_decoder_func[schema_id] = decoder
        return self.id_to_decoder_func[schema_id]
Example #4
0
    def _load_schema(self, schema_id):
        # fetch from schema reg
        try:
            schema = self.registry_client.get_by_id(schema_id)
        except ClientError as e:
            raise SerializerError("unable to fetch schema with id %d: %s" % (schema_id, str(e)))

        if schema is None:
            raise SerializerError("unable to fetch schema with id %d" % (schema_id))

        return schema
Example #5
0
    def decode(self, bytes):
        if len(bytes) <= 5:
            raise SerializerError("Message is too small to decode")
        with ContextStringIO(bytes) as payload:
            magic, schema_id = struct.unpack(">bI", payload.read(5))
            if magic != MAGIC_BYTE:
                raise SerializerError("message does not start with magic byte")
            curr_pos = payload.tell()
            avro_reader = avro.io.DatumReader(self.schema)

            def decoder(p):
                bin_decoder = avro.io.BinaryDecoder(p)
                return avro_reader.read(bin_decoder)

            return decoder(payload)
    def decode_message(self, message):
        """
        Decode a message from kafka that has been encoded for use with
        the schema registry.
        @:param: message
        """
        if len(message) <= 5:
            raise SerializerError("message is too small to decode")

        with ContextStringIO(message) as payload:
            magic, schema_id = struct.unpack('>bI', payload.read(5))
            if magic != MAGIC_BYTE:
                raise SerializerError("message does not start with magic byte")
            decoder_func = self._get_decoder_func(schema_id, payload)
            return decoder_func(payload)
Example #7
0
    def poll(self, timeout=None):
        """
        This is an overriden method from confluent_kafka.Consumer class. This handles message
        deserialization using avro schema

        :param float timeout: Poll timeout in seconds (default: indefinite)
        :returns: message object with deserialized key and value as dict objects
        :rtype: Message
        """
        if timeout is None:
            timeout = -1
        message = super(AvroConsumer, self).poll(timeout)
        if message is None:
            return None

        if not message.error():
            try:
                if message.value() is not None:
                    decoded_value = self._serializer.decode_message(
                        message.value(), is_key=False)
                    message.set_value(decoded_value)
                if message.key() is not None:
                    decoded_key = self._serializer.decode_message(
                        message.key(), is_key=True)
                    message.set_key(decoded_key)
            except SerializerError as e:
                raise SerializerError(
                    "Message deserialization failed for message at {} [{}] offset {}: {}"
                    .format(message.topic(), message.partition(),
                            message.offset(), e))
        return message
Example #8
0
    def _get_decoder_func(self, schema_id, payload):
        if schema_id in self.id_to_decoder_func:
            return self.id_to_decoder_func[schema_id]

        # fetch from schema reg
        try:
            schema = self.registry_client.get_by_id(schema_id)
        except ClientError as e:
            raise SerializerError("unable to fetch schema with id %d: %s" %
                                  (schema_id, str(e)))

        if schema is None:
            raise SerializerError("unable to fetch schema with id %d" %
                                  (schema_id))

        curr_pos = payload.tell()
        if HAS_FAST:
            # try to use fast avro
            try:
                schema_dict = schema.to_json()
                read_data(payload, schema_dict)

                # If we reach this point, this means we have fastavro and it can
                # do this deserialization. Rewind since this method just determines
                # the reader function and we need to deserialize again along the
                # normal path.
                payload.seek(curr_pos)

                self.id_to_decoder_func[schema_id] = lambda p: read_data(
                    p, schema_dict)
                return self.id_to_decoder_func[schema_id]
            except Exception:
                # Fast avro failed, fall thru to standard avro below.
                pass

        # here means we should just delegate to slow avro
        # rewind
        payload.seek(curr_pos)
        avro_reader = avro.io.DatumReader(schema)

        def decoder(p):
            bin_decoder = avro.io.BinaryDecoder(p)
            return avro_reader.read(bin_decoder)

        self.id_to_decoder_func[schema_id] = decoder
        return self.id_to_decoder_func[schema_id]
    def produce(self, **kwargs):
        '''
            Sends message to kafka by encoding with specified avro schema
            @:param: topic: topic name
            @:param: value: A dictionary object
            @:param: value_schema : Avro schema for value
            @:param: key: A dictionary object
            @:param: key_schema : Avro schema for key
            @:exception: SerializerError
        '''
        # get schemas from  kwargs if defined
        key_schema = kwargs.pop('key_schema', None)
        value_schema = kwargs.pop('value_schema', None)
        topic = kwargs.pop('topic', None)
        if not topic:
            log.error("Topic name not specified.")
            raise ClientError("Topic name not specified.")
        value = kwargs.pop('value', None)
        key = kwargs.pop('key', None)

        # if key_schema is not initialized, fall back on default key_schema passed as construction param.
        if not key_schema:
            key_schema = self.key_schema

        # if value_schema is not initialized, fall back on default value_schema passed as construction param.
        if not value_schema:
            value_schema = self.value_schema

        if value:
            if value_schema:
                value = self._serializer.encode_record_with_schema(
                    topic, value_schema, value)
            else:
                log.error("Schema required for value serialization")
                raise SerializerError("Avro schema required for value")

        if key:
            if key_schema:
                key = self._serializer.encode_record_with_schema(
                    topic, key_schema, key, True)
            else:
                log.error("Schema required for key serialization")
                raise SerializerError("Avro schema required for key")

        self._producer.produce(topic, value, key, **kwargs)
Example #10
0
    def decode_message(self, message, is_key=False):
        """
        Decode a message from kafka that has been encoded for use with
        the schema registry.
        :param str|bytes or None message: message key or value to be decoded
        :returns: Decoded message contents.
        :rtype dict:
        """

        if message is None:
            return None

        if len(message) <= 5:
            raise SerializerError("message is too small to decode")

        with ContextStringIO(message) as payload:
            magic, schema_id = struct.unpack('>bI', payload.read(5))
            if magic != MAGIC_BYTE:
                raise SerializerError("message does not start with magic byte")
            decoder_func = self._get_decoder_func(schema_id, payload, is_key)
            return decoder_func(payload)
Example #11
0
    def deserialize(self, message):
        # Apply any decoding, like from an AvroConsumer
        if hasattr(self, '_serializer') and self._serializer:
            try:
                if message.value() is not None:
                    decoded_value = self._serializer.decode_message(
                        message.value(), is_key=False)
                    message.set_value(decoded_value)
                if message.key() is not None:
                    decoded_key = self._serializer.decode_message(
                        message.key(), is_key=True)
                    message.set_key(decoded_key)
            except SerializerError as e:
                raise SerializerError(
                    "Message de-serialization failed for message at {} [{}] offset {}: {}"
                    .format(message.topic(), message.partition(),
                            message.offset(), e))

        return message
    def _get_decoder_func(self, schema_id, payload):
        if schema_id in self.id_to_decoder_func:
            return self.id_to_decoder_func[schema_id]

        # fetch from schema reg
        try:
            schema = self.registry_client.get_by_id(schema_id)
        except:
            schema = None

        if not schema:
            err = "unable to fetch schema with id %d" % (schema_id)
            raise SerializerError(err)

        curr_pos = payload.tell()
        if HAS_FAST:
            # try to use fast avro
            try:
                schema_dict = schema.to_json()
                obj = read_data(payload, schema_dict)
                # here means we passed so this is something fastavro can do
                # seek back since it will be called again for the
                # same payload - one time hit

                payload.seek(curr_pos)
                decoder_func = lambda p: read_data(p, schema_dict)
                self.id_to_decoder_func[schema_id] = decoder_func
                return self.id_to_decoder_func[schema_id]
            except:
                pass

        # here means we should just delegate to slow avro
        # rewind
        payload.seek(curr_pos)
        avro_reader = avro.io.DatumReader(schema)

        def decoder(p):
            bin_decoder = avro.io.BinaryDecoder(p)
            return avro_reader.read(bin_decoder)

        self.id_to_decoder_func[schema_id] = decoder
        return self.id_to_decoder_func[schema_id]
Example #13
0
    def poll(
        self,
        group_id,
        timeout=1,
        max_records=1,
        poll_attempts=10,
        only_value=True,
        auto_create_topics=True,
        decode_format=None,
        fail_on_deserialization=False
    ):
        """Fetch and return messages from assigned topics / partitions as list.
        - ``timeout`` (int): Seconds spent waiting in poll if data is not available in the buffer.\n
        - ``max_records`` (int): maximum number of messages to get from poll. Default: 1.
        If 0, returns immediately with any records that are available currently in the buffer,
        else returns empty. Must not be negative. Default: `1`
        - ``poll_attempts`` (int): Attempts to consume messages and endless looping prevention.
        Sometimes the first messages are None or the topic could be empty. Default: `10`.
        - ``only_value`` (bool): Return only message.value(). Default: `True`.
        - ``decode_format`` (str) - If you need to decode data to specific format
            (See https://docs.python.org/3/library/codecs.html#standard-encodings). Default: None.
        - ``auto_create_topics`` (bool): Consumers no longer trigger auto creation of topics,
            will be removed in future release. If True then the error message UNKNOWN_TOPIC_OR_PART is ignored.
            Default: `True`.
        - ``fail_on_deserialization`` (bool): If True and message deserialization fails, will raise a SerializerError
            exception; on False will just stop the current poll and return the message so far. Default: `False`.
        """

        messages = []
        while poll_attempts > 0:
            msg = None
            try:
                msg = self.consumers[group_id].poll(timeout=timeout)
            except SerializerError as err:
                error = 'Message deserialization failed for {}: {}'.format(msg, err)
                if fail_on_deserialization:
                    raise SerializerError(error)

                print(error)
                break

            if msg is None:
                poll_attempts -= 1
                continue

            if msg.error():
                # Workaround due to new message return + deprecation of the "Consumers no longer trigger auto creation of topics"
                if int(msg.error().code()) == KafkaError.UNKNOWN_TOPIC_OR_PART and auto_create_topics:
                    continue
                raise KafkaException(msg.error())

            if only_value:
                messages.append(msg.value())
            else:
                messages.append(msg)

            if len(messages) == max_records:
                break

        if decode_format:
            messages = self._decode_data(data=messages, decode_format=decode_format)

        return messages
Example #14
0
 def test_message(self):
     try:
         raise SerializerError("message")
     except SerializerError as e:
         assert e.message == 'message'