Esempio n. 1
0
class AvroConsumer(Consumer):
    """
    Kafka Consumer client which does avro schema decoding of messages.
    Handles message deserialization.

    Constructor takes below parameters

    :param dict config: Config parameters containing url for schema registry (``schema.registry.url``)
                        and the standard Kafka client configuration (``bootstrap.servers`` et.al).
    """
    def __init__(self, config, schema_registry=None):

        schema_registry_url = config.pop("schema.registry.url", None)
        schema_registry_ca_location = config.pop("schema.registry.ssl.ca.location", None)
        schema_registry_certificate_location = config.pop("schema.registry.ssl.certificate.location", None)
        schema_registry_key_location = config.pop("schema.registry.ssl.key.location", None)

        if schema_registry is None:
            if schema_registry_url is None:
                raise ValueError("Missing parameter: schema.registry.url")

            schema_registry = CachedSchemaRegistryClient(url=schema_registry_url,
                                                         ca_location=schema_registry_ca_location,
                                                         cert_location=schema_registry_certificate_location,
                                                         key_location=schema_registry_key_location)
        elif schema_registry_url is not None:
            raise ValueError("Cannot pass schema_registry along with schema.registry.url config")

        super(AvroConsumer, self).__init__(config)
        self._serializer = MessageSerializer(schema_registry)

    def poll(self, timeout=None, with_schema=False):
        """
        This is an overriden method from confluent_kafka.Consumer class. This handles message
        deserialization using avro schema

        :param float timeout: Poll timeout in seconds (default: indefinite)
        :param boolean with_schema: If true, the key_schema and value_schema are added as properties of the message
                                    (default: False)
        :returns: message object with deserialized key and value as dict objects
        :rtype: Message or AvroMessage
        """
        if timeout is None:
            timeout = -1
        message = super(AvroConsumer, self).poll(timeout)
        key_schema = value_schema = None
        if message is None:
            return None
        if not message.value() and not message.key():
            return message
        if not message.error():
            if message.value() is not None:
                decoded_value, value_schema = self._serializer.decode_message(message.value())
                message.set_value(decoded_value)
            if message.key() is not None:
                decoded_key, key_schema = self._serializer.decode_message(message.key())
                message.set_key(decoded_key)
        return message if not with_schema else AvroMessage(key_schema, value_schema, message)
Esempio n. 2
0
class TestMessageSerializer(unittest.TestCase):
    def setUp(self):
        # need to set up the serializer
        self.client = MockSchemaRegistryClient()
        self.ms = MessageSerializer(self.client)

    def assertMessageIsSame(self, message, expected, schema_id, schema):
        self.assertTrue(message)
        self.assertTrue(len(message) > 5)
        magic, sid = struct.unpack('>bI', message[0:5])
        self.assertEqual(magic, 0)
        self.assertEqual(sid, schema_id)
        decoded_msg, decoded_schema = self.ms.decode_message(message)
        self.assertTrue(decoded_msg)
        self.assertEqual(decoded_msg, expected)
        self.assertEqual(decoded_schema, schema)

    def test_encode_with_schema_id(self):
        adv = avro.loads(data_gen.ADVANCED_SCHEMA)
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test'
        schema_id = self.client.register(subject, basic)

        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(schema_id, record)
            self.assertMessageIsSame(message, record, schema_id, basic)

        subject = 'test_adv'
        adv_schema_id = self.client.register(subject, adv)
        self.assertNotEqual(adv_schema_id, schema_id)
        records = data_gen.ADVANCED_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(
                adv_schema_id, record)
            self.assertMessageIsSame(message, record, adv_schema_id, adv)

    def test_encode_record_with_schema(self):
        topic = 'test'
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test-value'
        schema_id = self.client.register(subject, basic)
        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema(topic, basic, record)
            self.assertMessageIsSame(message, record, schema_id, basic)

    def test_decode_none(self):
        """"null/None messages should decode to None"""

        self.assertIsNone(self.ms.decode_message(None))

    def hash_func(self):
        return hash(str(self))
class TestMessageSerializer(unittest.TestCase):
    def setUp(self):
        # need to set up the serializer
        self.client = MockSchemaRegistryClient()
        self.ms = MessageSerializer(self.client)

    def assertMessageIsSame(self, message, expected, schema_id):
        self.assertTrue(message)
        self.assertTrue(len(message) > 5)
        magic, sid = struct.unpack('>bI', message[0:5])
        self.assertEqual(magic, 0)
        self.assertEqual(sid, schema_id)
        decoded = self.ms.decode_message(message)
        self.assertTrue(decoded)
        self.assertEqual(decoded, expected)

    def test_encode_with_schema_id(self):
        adv = avro.loads(data_gen.ADVANCED_SCHEMA)
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test'
        schema_id = self.client.register(subject, basic)

        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(schema_id, record)
            self.assertMessageIsSame(message, record, schema_id)

        subject = 'test_adv'
        adv_schema_id = self.client.register(subject, adv)
        self.assertNotEqual(adv_schema_id, schema_id)
        records = data_gen.ADVANCED_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(adv_schema_id, record)
            self.assertMessageIsSame(message, record, adv_schema_id)

    def test_encode_record_with_schema(self):
        topic = 'test'
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test-value'
        schema_id = self.client.register(subject, basic)
        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema(topic, basic, record)
            self.assertMessageIsSame(message, record, schema_id)

    def test_decode_none(self):
        """"null/None messages should decode to None"""

        self.assertIsNone(self.ms.decode_message(None))

    def hash_func(self):
        return hash(str(self))
class AvroConsumer(Consumer):
    """
    Kafka Consumer client which does avro schema decoding of messages.
    Handles message deserialization.

    Constructor takes below parameters

    @:param: config: dict object with config parameters containing url for schema registry (schema.registry.url).
    """
    def __init__(self, config, schema_registry=None):
        self._enable_key_decoding = config.pop("enable.key.decoding", True)

        schema_registry_url = config.pop("schema.registry.url", None)
        if schema_registry is None:
            if schema_registry_url is None:
                raise ValueError("Missing parameter: schema.registry.url")
            schema_registry = CachedSchemaRegistryClient(
                url=schema_registry_url)
        elif schema_registry_url is not None:
            raise ValueError(
                "Cannot pass schema_registry along with schema.registry.url config"
            )

        super(AvroConsumer, self).__init__(config)
        self._serializer = MessageSerializer(schema_registry)

    def poll(self, timeout=None):
        """
        This is an overriden method from confluent_kafka.Consumer class. This handles message
        deserialization using avro schema

        @:param timeout
        @:return message object with deserialized key and value as dict objects
        """
        if timeout is None:
            timeout = -1
        message = super(AvroConsumer, self).poll(timeout)
        if message is None:
            return None
        if not message.value() and not message.key():
            return message
        if not message.error():
            if message.value() is not None:
                decoded_value = self._serializer.decode_message(
                    message.value())
                message.set_value(decoded_value)
            if self._enable_key_decoding and message.key() is not None:
                decoded_key = self._serializer.decode_message(message.key())
                message.set_key(decoded_key)
        return message
Esempio n. 5
0
class SimpleAvroDeserializer(Deserializer):
    def __init__(self, schema_registry_url):
        schema_registry = CachedSchemaRegistryClient(
            {'url': schema_registry_url})
        self._serializer = MessageSerializer(schema_registry, None, None)

    def __call__(self, value, ctx=None):
        if value is None:
            return None

        if ctx is not None and ctx.field == 'key':
            decoded = self._serializer.decode_message(value, is_key=True)
        else:
            decoded = self._serializer.decode_message(value, is_key=False)

        return decoded
Esempio n. 6
0
def consume(config, topic, handler):
    """
    Starts a consumer and calls the given handler for each consumed message.
    Assumes that keys are serialized as strings and values are serialized
    as Avro objects with their schemas stored in a Confluent Schema Registry.
    """
    c_conf = {}
    for key, value in config.items():
        if not key.startswith("schema.registry"):
            if not value is None:
                c_conf[key] = value.strip()

    if "auto.offset.reset" in c_conf:
        print("offset provided")
    else:
        c_conf['auto.offset.reset'] = 'earliest'

    if "group.id" in c_conf:
        print("group id provided")
    else:
        c_conf['group.id'] = 'sme_test'

    c = Consumer(c_conf)
    c.subscribe([topic])

    sr_conf = {
        key.replace("schema.registry.", ""): value.strip()
        for key, value in config.items() if key.startswith("schema.registry")
    }

    sr = CachedSchemaRegistryClient(sr_conf)
    ser = MessageSerializer(sr)

    while True:
        try:
            msg = c.poll(10)
            if msg is None:
                print('No Messages')
                continue
            if msg.error():
                log.error("Consumer error: {}".format(msg.error()))
                continue
            key = msg.key().decode('utf-8')
            value = ser.decode_message(msg.value(), is_key=False)
        except Exception as e:
            log.error("Message consumption failed: {}".format(e))
            break
        try:
            handler(key, value)
        except Exception as e:
            log.error("Message handler failed: {}".format(e))
            break
    c.close()
Esempio n. 7
0
class TestMessageSerializer(unittest.TestCase):
    def setUp(self):
        # need to set up the serializer
        # Make RecordSchema and PrimitiveSchema hashable
        schema.RecordSchema.__hash__ = self.hash_func
        schema.PrimitiveSchema.__hash__ = self.hash_func
        self.client = MockSchemaRegistryClient()
        self.ms = MessageSerializer(self.client)

    def assertMessageIsSame(self, message, expected, schema_id):
        self.assertTrue(message)
        self.assertTrue(len(message) > 5)
        magic, sid = struct.unpack('>bI', message[0:5])
        self.assertEqual(magic, 0)
        self.assertEqual(sid, schema_id)
        decoded = self.ms.decode_message(message)
        self.assertTrue(decoded)
        self.assertEqual(decoded, expected)

    def test_encode_with_schema_id(self):
        adv = util.parse_schema_from_string(data_gen.ADVANCED_SCHEMA)
        basic = util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
        subject = 'test'
        schema_id = self.client.register(subject, basic)

        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(schema_id, record)
            self.assertMessageIsSame(message, record, schema_id)

        subject = 'test_adv'
        adv_schema_id = self.client.register(subject, adv)
        self.assertNotEqual(adv_schema_id, schema_id)
        records = data_gen.ADVANCED_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(
                adv_schema_id, record)
            self.assertMessageIsSame(message, record, adv_schema_id)

    def test_encode_record_with_schema(self):
        topic = 'test'
        basic = util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
        subject = 'test-value'
        schema_id = self.client.register(subject, basic)
        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema(topic, basic, record)
            self.assertMessageIsSame(message, record, schema_id)

    def hash_func(self):
        return hash(str(self))
Esempio n. 8
0
class AvroConsumer(Consumer):
    """
    Kafka Consumer client which does avro schema decoding of messages.
    Handles message deserialization.

    Constructor takes below parameters

    @:param: config: dict object with config parameters containing url for schema registry (schema.registry.url).
    """
    def __init__(self, config):

        if ('schema.registry.url' not in config.keys()):
            raise ValueError("Missing parameter: schema.registry.url")
        schem_registry_url = config["schema.registry.url"]
        del config["schema.registry.url"]

        super(AvroConsumer, self).__init__(config)
        self._serializer = MessageSerializer(CachedSchemaRegistryClient(url=schem_registry_url))

    def poll(self, timeout):
        """
        This is an overriden method from confluent_kafka.Consumer class. This handles message
        deserialization using avro schema

        @:param timeout
        @:return message object with deserialized key and value as dict objects
        """
        message = super(AvroConsumer, self).poll(timeout)
        if not message:
            return message
        if not message.error():
            if message.value() is not None:
                decoded_value = self._serializer.decode_message(message.value())
                message.set_value(decoded_value)
            if message.key() is not None:
                decoded_key = self._serializer.decode_message(message.key())
                message.set_key(decoded_key)
        return message
    def printAndProduceMessages(self):
        consumer = AvroConsumer({
            'bootstrap.servers': BOOTSTRAP_SERVERS,
            'group.id': GROUP_ID,
            'auto.offset.reset': AUTO_OFFSET_RESET,
            'enable.auto.commit': False,
            'schema.registry.url': SCHEMA_REGISTRY_URL
        })
        schema_registry = CachedSchemaRegistryClient(
            os.environ.get('SCHEMA_REGISTRY', SCHEMA_REGISTRY_URL))
        avro_serde = AvroSerde(schema_registry)

        consumer.subscribe([INPUT_TOPIC_NAME])

        while True:
            try:
                consumedMessages = consumer.consume(
                    num_messages=CONSUMER_BATCH_SIZE, timeout=1)
            except Exception as e:
                logging.error("Message pool failed: {}".format(e))
                break

            messages = []
            for consumedMessage in consumedMessages:
                consumedMessageValue = avro_serde.decode_message(
                    consumedMessage.value())
                message = {}
                message["key"] = {}
                message["value"] = {}

                for attr, value in consumedMessageValue.items():
                    if attr != ARRAY_NAME:
                        message["value"][DOCUMENT_FIELD_PREFIX + attr] = value

                for arrayItem in consumedMessageValue[ARRAY_NAME]:
                    message["key"]["id"] = consumedMessageValue["id"] + \
                        "-" + arrayItem["id"]
                    for attr, value in arrayItem.items():
                        message["value"][attr] = value
                    messages.append(message)

            self.produceMessages(messages)
            consumer.commit()
        consumer.close()
Esempio n. 10
0
class _AvroIORegistry:
    def __init__(self, schema_registry_url):
        """Private implementation class for Avro IO using the registry"""
        log.info(
            f"Using registry with schema_url/id {schema_registry_url}/{config.SCHEMA_ID}"
        )
        try:
            self.client = CachedSchemaRegistryClient(url=schema_registry_url)
            self.schema = self.client.get_by_id(config.SCHEMA_ID)
            self.serializer = MessageSerializer(self.client)
        except:
            raise ValueError("Client id or schema id not found")

    def decode(self, bytes):
        return self.serializer.decode_message(bytes)

    def encode(self, record):
        return self.serializer.encode_record_with_schema_id(
            config.SCHEMA_ID, record)
Esempio n. 11
0
class TestMessageSerializer(unittest.TestCase):
    def setUp(self):
        # need to set up the serializer
        self.client = MockSchemaRegistryClient()
        self.ms = MessageSerializer(self.client)

    def assertMessageIsSame(self, message, expected, schema_id):
        self.assertTrue(message)
        self.assertTrue(len(message) > 5)
        magic, sid = struct.unpack('>bI', message[0:5])
        self.assertEqual(magic, 0)
        self.assertEqual(sid, schema_id)
        decoded = self.ms.decode_message(message)
        self.assertTrue(decoded)
        self.assertEqual(decoded, expected)

    def test_encode_with_schema_id(self):
        adv = avro.loads(data_gen.ADVANCED_SCHEMA)
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test'
        schema_id = self.client.register(subject, basic)

        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(schema_id, record)
            self.assertMessageIsSame(message, record, schema_id)

        subject = 'test_adv'
        adv_schema_id = self.client.register(subject, adv)
        self.assertNotEqual(adv_schema_id, schema_id)
        records = data_gen.ADVANCED_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(
                adv_schema_id, record)
            self.assertMessageIsSame(message, record, adv_schema_id)

    def test_encode_record_with_schema(self):
        topic = 'test'
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test-value'
        schema_id = self.client.register(subject, basic)
        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema(topic, basic, record)
            self.assertMessageIsSame(message, record, schema_id)

    def test_decode_none(self):
        """"null/None messages should decode to None"""
        self.assertIsNone(self.ms.decode_message(None))

    def test_decode_with_schema(self):
        topic = 'test_specific'

        schema_v1 = avro.loads(
            data_gen.load_schema_file('evolution_schema_v1.avsc'))
        schema_v2 = avro.loads(
            data_gen.load_schema_file('evolution_schema_v2.avsc'))

        dsv1 = SpecificRecordMessageDeserializer(self.client,
                                                 value_schema=schema_v1)
        dsv2 = SpecificRecordMessageDeserializer(self.client,
                                                 value_schema=schema_v2)

        record_v1 = {"name": "suzyq", "age": 27}
        record_v2 = dict(record_v1)
        record_v2['gender'] = 'NONE'

        encoded_v1 = self.ms.encode_record_with_schema(topic, schema_v1,
                                                       record_v1)
        decoded_v1_v1 = dsv1.decode_message(encoded_v1, is_key=False)
        self.assertDictEqual(record_v1, decoded_v1_v1)
        decoded_v1_v2 = dsv2.decode_message(encoded_v1, is_key=False)
        self.assertDictEqual(record_v2, decoded_v1_v2)

        encoded_v2 = self.ms.encode_record_with_schema(topic, schema_v2,
                                                       record_v2)
        decoded_v2_v2 = dsv2.decode_message(encoded_v2, is_key=False)
        self.assertDictEqual(record_v2, decoded_v2_v2)
        decoded_v2_v1 = dsv1.decode_message(encoded_v2, is_key=False)
        self.assertDictEqual(record_v1, decoded_v2_v1)

    def hash_func(self):
        return hash(str(self))
Esempio n. 12
0
class TestMessageSerializer(unittest.TestCase):
    def setUp(self):
        # need to set up the serializer
        self.client = MockSchemaRegistryClient()
        self.ms = MessageSerializer(self.client)

    def assertMessageIsSame(self, message, expected, schema_id):
        self.assertTrue(message)
        self.assertTrue(len(message) > 5)
        magic, sid = struct.unpack('>bI', message[0:5])
        self.assertEqual(magic, 0)
        self.assertEqual(sid, schema_id)
        decoded = self.ms.decode_message(message)
        self.assertTrue(decoded)
        self.assertEqual(decoded, expected)

    def test_encode_with_schema_id(self):
        adv = avro.loads(data_gen.ADVANCED_SCHEMA)
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test'
        schema_id = self.client.register(subject, basic)

        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(schema_id, record)
            self.assertMessageIsSame(message, record, schema_id)

        subject = 'test_adv'
        adv_schema_id = self.client.register(subject, adv)
        self.assertNotEqual(adv_schema_id, schema_id)
        records = data_gen.ADVANCED_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(
                adv_schema_id, record)
            self.assertMessageIsSame(message, record, adv_schema_id)

    def test_encode_record_with_schema(self):
        topic = 'test'
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test-value'
        schema_id = self.client.register(subject, basic)
        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema(topic, basic, record)
            self.assertMessageIsSame(message, record, schema_id)

    @skipIf(version_info < (3, ),
            'unittest.mock.patch not available in Python 2')
    def test_encode_record_with_schema_sets_writers_cache_once(self):
        topic = 'test'
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test-value'
        self.client.register(subject, basic)
        records = data_gen.BASIC_ITEMS
        with patch.object(self.ms, "_get_encoder_func") as encoder_func_mock:
            for record in records:
                self.ms.encode_record_with_schema(topic, basic, record)
        encoder_func_mock.assert_called_once_with(basic)

    def test_decode_none(self):
        """null/None messages should decode to None"""

        self.assertIsNone(self.ms.decode_message(None))

    def hash_func(self):
        return hash(str(self))
class TestMessageSerializer(unittest.TestCase):
    def setUp(self):
        # need to set up the serializer
        self.client = MockSchemaRegistryClient()
        self.ms = MessageSerializer(self.client)

    def assertMessageIsSame(self, message, expected, schema_id):
        self.assertTrue(message)
        self.assertTrue(len(message) > 5)
        magic, sid = struct.unpack('>bI', message[0:5])
        self.assertEqual(magic, 0)
        self.assertEqual(sid, schema_id)
        decoded = self.ms.decode_message(message)
        self.assertTrue(decoded)
        self.assertEqual(decoded, expected)

    def test_encode_with_schema_id(self):
        adv = avro.loads(data_gen.ADVANCED_SCHEMA)
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test'
        schema_id = self.client.register(subject, basic)

        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(schema_id, record)
            self.assertMessageIsSame(message, record, schema_id)

        subject = 'test_adv'
        adv_schema_id = self.client.register(subject, adv)
        self.assertNotEqual(adv_schema_id, schema_id)
        records = data_gen.ADVANCED_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(adv_schema_id, record)
            self.assertMessageIsSame(message, record, adv_schema_id)

    def test_encode_record_with_schema(self):
        topic = 'test'
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test-value'
        schema_id = self.client.register(subject, basic)
        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema(topic, basic, record)
            self.assertMessageIsSame(message, record, schema_id)

    def test_decode_none(self):
        """"null/None messages should decode to None"""

        self.assertIsNone(self.ms.decode_message(None))

    def test__get_subject_for_key_with_topic_name_strategy(self):
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        topic = "topic"
        self.ms.registry_client.key_subject_name_strategy_func = topic_name_strategy  # noqa
        subject = self.ms._get_subject(topic=topic, schema=basic, is_key=True)

        expected = "topic-key"
        self.assertEqual(expected, subject)

    def test__get_subject_for_key_with_record_name_strategy(self):
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        topic = "topic"
        self.ms.registry_client.key_subject_name_strategy_func = record_name_strategy  # noqa
        subject = self.ms._get_subject(topic=topic, schema=basic, is_key=True)

        expected = "python.test.basic.basic"
        self.assertEqual(expected, subject)

    def test__get_subject_for_key_with_topic_record_name_strategy(self):
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        topic = "topic"
        self.ms.registry_client.key_subject_name_strategy_func = topic_record_name_strategy  # noqa
        subject = self.ms._get_subject(topic=topic, schema=basic, is_key=True)

        expected = "topic-python.test.basic.basic"
        self.assertEqual(expected, subject)

    def test__get_subject_for_value_with_topic_name_strategy(self):
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        topic = "topic"
        self.ms.registry_client.value_subject_name_strategy_func = topic_name_strategy  # noqa
        subject = self.ms._get_subject(topic=topic, schema=basic, is_key=False)

        expected = "topic-value"
        self.assertEqual(expected, subject)

    def test__get_subject_for_value_with_record_name_strategy(self):
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        topic = "topic"
        self.ms.registry_client.value_subject_name_strategy_func = record_name_strategy  # noqa
        subject = self.ms._get_subject(topic=topic, schema=basic, is_key=False)

        expected = "python.test.basic.basic"
        self.assertEqual(expected, subject)

    def test__get_subject_for_value_with_topic_record_name_strategy(self):
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        topic = "topic"
        self.ms.registry_client.value_subject_name_strategy_func = topic_record_name_strategy  # noqa
        subject = self.ms._get_subject(topic=topic, schema=basic, is_key=False)

        expected = "topic-python.test.basic.basic"
        self.assertEqual(expected, subject)

    def hash_func(self):
        return hash(str(self))
class AvroConsumer(Consumer):
    """
    Kafka Consumer client which does avro schema decoding of messages.
    Handles message deserialization.

    Constructor takes below parameters

    :param dict config: Config parameters containing url for schema registry (``schema.registry.url``)
                        and the standard Kafka client configuration (``bootstrap.servers`` et.al)
    :param schema reader_key_schema: a reader schema for the message key
    :param schema reader_value_schema: a reader schema for the message value
    :raises ValueError: For invalid configurations
    """

    def __init__(self, config, schema_registry=None, reader_key_schema=None, reader_value_schema=None):

        sr_conf = {key.replace("schema.registry.", ""): value
                   for key, value in config.items() if key.startswith("schema.registry")}

        if sr_conf.get("basic.auth.credentials.source") == 'SASL_INHERIT':
            sr_conf['sasl.mechanisms'] = config.get('sasl.mechanisms', '')
            sr_conf['sasl.username'] = config.get('sasl.username', '')
            sr_conf['sasl.password'] = config.get('sasl.password', '')

        ap_conf = {key: value
                   for key, value in config.items() if not key.startswith("schema.registry")}

        if schema_registry is None:
            schema_registry = CachedSchemaRegistryClient(sr_conf)
        elif sr_conf.get("url", None) is not None:
            raise ValueError("Cannot pass schema_registry along with schema.registry.url config")

        super(AvroConsumer, self).__init__(ap_conf)
        self._serializer = MessageSerializer(schema_registry, reader_key_schema, reader_value_schema)

    def poll(self, timeout=None):
        """
        This is an overriden method from confluent_kafka.Consumer class. This handles message
        deserialization using avro schema

        :param float timeout: Poll timeout in seconds (default: indefinite)
        :returns: message object with deserialized key and value as dict objects
        :rtype: Message
        """
        if timeout is None:
            timeout = -1
        message = super(AvroConsumer, self).poll(timeout)
        if message is None:
            return None

        if not message.error():
            try:
                if message.value() is not None:
                    decoded_value = self._serializer.decode_message(message.value(), is_key=False)
                    message.set_value(decoded_value)
                if message.key() is not None:
                    decoded_key = self._serializer.decode_message(message.key(), is_key=True)
                    message.set_key(decoded_key)
            except SerializerError as e:
                raise SerializerError("Message deserialization failed for message at {} [{}] offset {}: {}".format(
                    message.topic(),
                    message.partition(),
                    message.offset(),
                    e))
        return message
Esempio n. 15
0
class AvroConsumer(Consumer):
    """
    Kafka Consumer client which does avro schema decoding of messages.
    Handles message deserialization.

    Constructor takes below parameters

    :param dict config: Config parameters containing url for schema registry (``schema.registry.url``)
                        and the standard Kafka client configuration (``bootstrap.servers`` et.al)
    :param schema reader_key_schema: a reader schema for the message key
    :param schema reader_value_schema: a reader schema for the message value
    :raises ValueError: For invalid configurations
    """
    def __init__(self,
                 config,
                 schema_registry=None,
                 reader_key_schema=None,
                 reader_value_schema=None,
                 **kwargs):

        sr_conf = {
            key.replace("schema.registry.", ""): value
            for key, value in config.items()
            if key.startswith("schema.registry")
        }

        if sr_conf.get("basic.auth.credentials.source") == 'SASL_INHERIT':
            # Fallback to plural 'mechanisms' for backward compatibility
            sr_conf['sasl.mechanism'] = config.get(
                'sasl.mechanism', config.get('sasl.mechanisms', ''))
            sr_conf['sasl.username'] = config.get('sasl.username', '')
            sr_conf['sasl.password'] = config.get('sasl.password', '')

        ap_conf = {
            key: value
            for key, value in config.items()
            if not key.startswith("schema.registry")
        }

        if schema_registry is None:
            schema_registry = CachedSchemaRegistryClient(sr_conf)
        elif sr_conf.get("url", None) is not None:
            raise ValueError(
                "Cannot pass schema_registry along with schema.registry.url config"
            )

        super(AvroConsumer, self).__init__(ap_conf, **kwargs)
        self._serializer = MessageSerializer(schema_registry,
                                             reader_key_schema,
                                             reader_value_schema)

    def poll(self, timeout=None):
        """
        This is an overriden method from confluent_kafka.Consumer class. This handles message
        deserialization using avro schema

        :param float timeout: Poll timeout in seconds (default: indefinite)
        :returns: message object with deserialized key and value as dict objects
        :rtype: Message
        """
        if timeout is None:
            timeout = -1
        message = super(AvroConsumer, self).poll(timeout)
        if message is None:
            return None

        if not message.error():
            try:
                if message.value() is not None:
                    decoded_value = self._serializer.decode_message(
                        message.value(), is_key=False)
                    message.set_value(decoded_value)
                if message.key() is not None:
                    decoded_key = self._serializer.decode_message(
                        message.key(), is_key=True)
                    message.set_key(decoded_key)
            except SerializerError as e:
                raise SerializerError(
                    "Message deserialization failed for message at {} [{}] offset {}: {}"
                    .format(message.topic(), message.partition(),
                            message.offset(), e))
        return message
Esempio n. 16
0
class KafkaDriver(object):

    '''

    Kafka driver class

    Can send json and avro mesages

    '''

    last_offset = 0

    def __init__(self, topic, server='kafka1', schema_registry = 'kafka-schema-registry'):
        self.server = server
        self.topic = topic
        self.schema_registry = schema_registry
        schema_registryObj = CachedSchemaRegistryClient(url='http://' + self.schema_registry + ':8081')
        self.serializer = MessageSerializer(schema_registryObj)

    def create_topic(self, topic):

        '''

        Create topic

        '''

        client = kafka.KafkaClient(hosts=self.server + ':9092')
        res = client.ensure_topic_exists(topic)
        return res

    def delete_topic(self, topic=None, server='zoo1'):

        '''

        Delete topic

        '''
        if (topic is None):
            topic = self.topic
        cmd = '/nis-test/bin/kafka_2.12-1.1.0/bin/kafka-topics.sh --delete --topic ' + topic + ' --zookeeper ' + server
        logging.debug('[KafkaDriver][delete_topic] cmd: ' + str(cmd))
        ret = os.system(cmd)
        logging.debug('[KafkaDriver][delete_topic] ret: ' + str(ret))
        #assert ret == 0
        return ret

    @pytest.allure.step('kafka_list')
    def list(self):

        '''

        Kafka list topics

        List exist topics

        '''

        consumer = KafkaConsumer(bootstrap_servers=self.server + ':9092',
                                 auto_offset_reset='earliest',
                                 consumer_timeout_ms=1000)
        log.debug("[KafkaDriver][list] list start")
        list = consumer.topics()
        for topic in list:
           log.debug("[KafkaDriver][list] topic: " + str(topic))
        log.debug("[KafkaDriver][list] self.topic: " + str(self.topic))
        assert self.topic in list
    #
    # Kafka get last offset of topic
    #
    @pytest.allure.step('get_last_offset')
    def get_last_offset(self):

        '''

        Kafka get last offset

        Get last message offset

        '''

        log.debug("[KafkaDriver][get_last_offset] start")
        # consumer = KafkaConsumer(bootstrap_servers=self.server + ':9092',
        #                          group_id = None,
        #                          enable_auto_commit = False)
        consumer = KafkaConsumer(bootstrap_servers=self.server + ':9092',
                                 consumer_timeout_ms=1000)
        log.debug("[KafkaDriver][get_last_offset] TopicPartition")
        tp = TopicPartition(self.topic, 0)
        log.debug("[KafkaDriver][get_last_offset] assign")
        consumer.assign([tp])
        log.debug("[KafkaDriver][get_last_offset] seek_to_end")
        consumer.seek_to_end(tp)
        log.debug("[KafkaDriver][get_last_offset] position")
        last_offset = consumer.position(tp)
        log.debug("[KafkaDriver][get_last_offset] topic: " + str(self.topic))
        log.debug("[KafkaDriver][get_last_offset] last_offset: " + str(last_offset))
        #consumer.close(autocommit=False)
        consumer.close()
        return last_offset


    @pytest.allure.step('read_from_offset')
    def read_from_offset(self, offset=0, lang='json', schema=None):

        '''

        Kafka read message

        Read json and avro messages from consumer

        '''
        log.debug("[KafkaDriver][read_from_offset] lang: " + str(lang))
        log.debug("[KafkaDriver][read_from_offset] offset: " + str(offset))

        def outputJSON(obj):

            '''

            Default JSON serializer.

            '''

            if isinstance(obj, datetime.datetime):
                return int(obj.strftime("%s%f")[:-3])
            return obj


        ret = None
        log.debug("[KafkaDriver][read_from_offset] read start: " + str(self.server))
        consumer = KafkaConsumer(bootstrap_servers=self.server + ':9092',
                                 auto_offset_reset='earliest',
                                 consumer_timeout_ms=1000)

        partition = TopicPartition(self.topic, 0)
        consumer.assign([partition])
        consumer.seek_to_end(partition)
        start = int(offset)
        consumer.seek(partition, offset)

        for msg in consumer:
            if (lang == 'avro'):
                #message = AvroDecoder.decode(schema, msg.value)
                schema_registry = CachedSchemaRegistryClient(url='http://' + self.schema_registry + ':8081')
                self._serializer = MessageSerializer(schema_registry)
                message = self._serializer.decode_message(msg.value)
                message = json.dumps(message, indent=4, sort_keys=True, default=outputJSON)
                #log.debug("[KafkaDriver][read_from_offset] avro message: " + str(message))
                ret = message
            else:
                message = msg.value
                #log.debug("[KafkaDriver][read_from_offset] other message: " + str(message))
                ret = msg.value
            log.debug("[KafkaDriver][read_from_offset] msg: " + str(message) + " msg.offset: " + str(msg.offset))
        consumer.close()
        log.debug("[KafkaDriver][read_from_offset] read end")
        return ret

    @pytest.allure.step('kafka_producer')
    def send(self, topic=None, msg="{'foo':'bar'}", lang='json', schema=None):

        '''

        Kafka send message

        Send json and avro messages

        '''


        log.debug("[KafkaDriver][send] producer start: " + str(self.server))
        log.debug("[KafkaDriver][send] send message: " + str(msg))
        if (topic is None):
            topic = self.topic
        log.debug("[KafkaDriver][send] topic: " + str(topic))
        if (lang == 'json'):
            producer = KafkaProducer(bootstrap_servers=self.server + ':9092')
            log.debug("[KafkaDriver][send] json msg")
            res = producer.send(self.topic, key=None, value=msg)
            log.debug("[KafkaDriver][send] produce result: " + str(res.get()))
            time.sleep(1)
            producer.close
            log.debug("[KafkaDriver][send] end")
        elif (lang == 'avro'):
            log.debug("[KafkaDriver][send] avro msg")
            log.debug("[KafkaDriver][send] schema: " + str(schema))
            value_schema = avro.loads(schema)
            avroProducer = AvroProducer({
                 'bootstrap.servers': self.server,
                 'schema.registry.url': 'http://' + self.schema_registry + ':8081'
                 }, default_value_schema=value_schema)

            res = avroProducer.produce(topic=self.topic, value=msg)
            log.debug("[KafkaDriver][send] produce result: " + str(res))
            time.sleep(1)
            avroProducer.flush()
            log.debug("[KafkaDriver][send] end")
print("Start: avro-python-consumer")

props = {
    'bootstrap.servers': 'localhost:9092',
    'group.id': 'avro-python-consumer_5',
    'auto.offset.reset': 'earliest'
}

consumer = Consumer(props)

consumer.subscribe(['avro-python-producer-topic'])

# connect to the schema_registry
schema_registry = CachedSchemaRegistryClient("http://localhost:8081")
# define avro serde - to be used to decode msg value against the avro schema
avro_serde = MessageSerializer(schema_registry)

while True:
    msg = consumer.poll()
    if msg is None:
        continue
    if msg.error():
        print("Consumer error: {}".format(msg.error()))
        continue
    print('Received message: {}'.format(avro_serde.decode_message(
        msg.value())))

consumer.close()

print("End: avro-python-consumer")