class AvroConsumer(Consumer): """ Kafka Consumer client which does avro schema decoding of messages. Handles message deserialization. Constructor takes below parameters :param dict config: Config parameters containing url for schema registry (``schema.registry.url``) and the standard Kafka client configuration (``bootstrap.servers`` et.al). """ def __init__(self, config, schema_registry=None): schema_registry_url = config.pop("schema.registry.url", None) schema_registry_ca_location = config.pop("schema.registry.ssl.ca.location", None) schema_registry_certificate_location = config.pop("schema.registry.ssl.certificate.location", None) schema_registry_key_location = config.pop("schema.registry.ssl.key.location", None) if schema_registry is None: if schema_registry_url is None: raise ValueError("Missing parameter: schema.registry.url") schema_registry = CachedSchemaRegistryClient(url=schema_registry_url, ca_location=schema_registry_ca_location, cert_location=schema_registry_certificate_location, key_location=schema_registry_key_location) elif schema_registry_url is not None: raise ValueError("Cannot pass schema_registry along with schema.registry.url config") super(AvroConsumer, self).__init__(config) self._serializer = MessageSerializer(schema_registry) def poll(self, timeout=None, with_schema=False): """ This is an overriden method from confluent_kafka.Consumer class. This handles message deserialization using avro schema :param float timeout: Poll timeout in seconds (default: indefinite) :param boolean with_schema: If true, the key_schema and value_schema are added as properties of the message (default: False) :returns: message object with deserialized key and value as dict objects :rtype: Message or AvroMessage """ if timeout is None: timeout = -1 message = super(AvroConsumer, self).poll(timeout) key_schema = value_schema = None if message is None: return None if not message.value() and not message.key(): return message if not message.error(): if message.value() is not None: decoded_value, value_schema = self._serializer.decode_message(message.value()) message.set_value(decoded_value) if message.key() is not None: decoded_key, key_schema = self._serializer.decode_message(message.key()) message.set_key(decoded_key) return message if not with_schema else AvroMessage(key_schema, value_schema, message)
class TestMessageSerializer(unittest.TestCase): def setUp(self): # need to set up the serializer self.client = MockSchemaRegistryClient() self.ms = MessageSerializer(self.client) def assertMessageIsSame(self, message, expected, schema_id, schema): self.assertTrue(message) self.assertTrue(len(message) > 5) magic, sid = struct.unpack('>bI', message[0:5]) self.assertEqual(magic, 0) self.assertEqual(sid, schema_id) decoded_msg, decoded_schema = self.ms.decode_message(message) self.assertTrue(decoded_msg) self.assertEqual(decoded_msg, expected) self.assertEqual(decoded_schema, schema) def test_encode_with_schema_id(self): adv = avro.loads(data_gen.ADVANCED_SCHEMA) basic = avro.loads(data_gen.BASIC_SCHEMA) subject = 'test' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema_id(schema_id, record) self.assertMessageIsSame(message, record, schema_id, basic) subject = 'test_adv' adv_schema_id = self.client.register(subject, adv) self.assertNotEqual(adv_schema_id, schema_id) records = data_gen.ADVANCED_ITEMS for record in records: message = self.ms.encode_record_with_schema_id( adv_schema_id, record) self.assertMessageIsSame(message, record, adv_schema_id, adv) def test_encode_record_with_schema(self): topic = 'test' basic = avro.loads(data_gen.BASIC_SCHEMA) subject = 'test-value' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema(topic, basic, record) self.assertMessageIsSame(message, record, schema_id, basic) def test_decode_none(self): """"null/None messages should decode to None""" self.assertIsNone(self.ms.decode_message(None)) def hash_func(self): return hash(str(self))
class TestMessageSerializer(unittest.TestCase): def setUp(self): # need to set up the serializer self.client = MockSchemaRegistryClient() self.ms = MessageSerializer(self.client) def assertMessageIsSame(self, message, expected, schema_id): self.assertTrue(message) self.assertTrue(len(message) > 5) magic, sid = struct.unpack('>bI', message[0:5]) self.assertEqual(magic, 0) self.assertEqual(sid, schema_id) decoded = self.ms.decode_message(message) self.assertTrue(decoded) self.assertEqual(decoded, expected) def test_encode_with_schema_id(self): adv = avro.loads(data_gen.ADVANCED_SCHEMA) basic = avro.loads(data_gen.BASIC_SCHEMA) subject = 'test' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema_id(schema_id, record) self.assertMessageIsSame(message, record, schema_id) subject = 'test_adv' adv_schema_id = self.client.register(subject, adv) self.assertNotEqual(adv_schema_id, schema_id) records = data_gen.ADVANCED_ITEMS for record in records: message = self.ms.encode_record_with_schema_id(adv_schema_id, record) self.assertMessageIsSame(message, record, adv_schema_id) def test_encode_record_with_schema(self): topic = 'test' basic = avro.loads(data_gen.BASIC_SCHEMA) subject = 'test-value' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema(topic, basic, record) self.assertMessageIsSame(message, record, schema_id) def test_decode_none(self): """"null/None messages should decode to None""" self.assertIsNone(self.ms.decode_message(None)) def hash_func(self): return hash(str(self))
class AvroConsumer(Consumer): """ Kafka Consumer client which does avro schema decoding of messages. Handles message deserialization. Constructor takes below parameters @:param: config: dict object with config parameters containing url for schema registry (schema.registry.url). """ def __init__(self, config, schema_registry=None): self._enable_key_decoding = config.pop("enable.key.decoding", True) schema_registry_url = config.pop("schema.registry.url", None) if schema_registry is None: if schema_registry_url is None: raise ValueError("Missing parameter: schema.registry.url") schema_registry = CachedSchemaRegistryClient( url=schema_registry_url) elif schema_registry_url is not None: raise ValueError( "Cannot pass schema_registry along with schema.registry.url config" ) super(AvroConsumer, self).__init__(config) self._serializer = MessageSerializer(schema_registry) def poll(self, timeout=None): """ This is an overriden method from confluent_kafka.Consumer class. This handles message deserialization using avro schema @:param timeout @:return message object with deserialized key and value as dict objects """ if timeout is None: timeout = -1 message = super(AvroConsumer, self).poll(timeout) if message is None: return None if not message.value() and not message.key(): return message if not message.error(): if message.value() is not None: decoded_value = self._serializer.decode_message( message.value()) message.set_value(decoded_value) if self._enable_key_decoding and message.key() is not None: decoded_key = self._serializer.decode_message(message.key()) message.set_key(decoded_key) return message
class SimpleAvroDeserializer(Deserializer): def __init__(self, schema_registry_url): schema_registry = CachedSchemaRegistryClient( {'url': schema_registry_url}) self._serializer = MessageSerializer(schema_registry, None, None) def __call__(self, value, ctx=None): if value is None: return None if ctx is not None and ctx.field == 'key': decoded = self._serializer.decode_message(value, is_key=True) else: decoded = self._serializer.decode_message(value, is_key=False) return decoded
def consume(config, topic, handler): """ Starts a consumer and calls the given handler for each consumed message. Assumes that keys are serialized as strings and values are serialized as Avro objects with their schemas stored in a Confluent Schema Registry. """ c_conf = {} for key, value in config.items(): if not key.startswith("schema.registry"): if not value is None: c_conf[key] = value.strip() if "auto.offset.reset" in c_conf: print("offset provided") else: c_conf['auto.offset.reset'] = 'earliest' if "group.id" in c_conf: print("group id provided") else: c_conf['group.id'] = 'sme_test' c = Consumer(c_conf) c.subscribe([topic]) sr_conf = { key.replace("schema.registry.", ""): value.strip() for key, value in config.items() if key.startswith("schema.registry") } sr = CachedSchemaRegistryClient(sr_conf) ser = MessageSerializer(sr) while True: try: msg = c.poll(10) if msg is None: print('No Messages') continue if msg.error(): log.error("Consumer error: {}".format(msg.error())) continue key = msg.key().decode('utf-8') value = ser.decode_message(msg.value(), is_key=False) except Exception as e: log.error("Message consumption failed: {}".format(e)) break try: handler(key, value) except Exception as e: log.error("Message handler failed: {}".format(e)) break c.close()
class TestMessageSerializer(unittest.TestCase): def setUp(self): # need to set up the serializer # Make RecordSchema and PrimitiveSchema hashable schema.RecordSchema.__hash__ = self.hash_func schema.PrimitiveSchema.__hash__ = self.hash_func self.client = MockSchemaRegistryClient() self.ms = MessageSerializer(self.client) def assertMessageIsSame(self, message, expected, schema_id): self.assertTrue(message) self.assertTrue(len(message) > 5) magic, sid = struct.unpack('>bI', message[0:5]) self.assertEqual(magic, 0) self.assertEqual(sid, schema_id) decoded = self.ms.decode_message(message) self.assertTrue(decoded) self.assertEqual(decoded, expected) def test_encode_with_schema_id(self): adv = util.parse_schema_from_string(data_gen.ADVANCED_SCHEMA) basic = util.parse_schema_from_string(data_gen.BASIC_SCHEMA) subject = 'test' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema_id(schema_id, record) self.assertMessageIsSame(message, record, schema_id) subject = 'test_adv' adv_schema_id = self.client.register(subject, adv) self.assertNotEqual(adv_schema_id, schema_id) records = data_gen.ADVANCED_ITEMS for record in records: message = self.ms.encode_record_with_schema_id( adv_schema_id, record) self.assertMessageIsSame(message, record, adv_schema_id) def test_encode_record_with_schema(self): topic = 'test' basic = util.parse_schema_from_string(data_gen.BASIC_SCHEMA) subject = 'test-value' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema(topic, basic, record) self.assertMessageIsSame(message, record, schema_id) def hash_func(self): return hash(str(self))
class AvroConsumer(Consumer): """ Kafka Consumer client which does avro schema decoding of messages. Handles message deserialization. Constructor takes below parameters @:param: config: dict object with config parameters containing url for schema registry (schema.registry.url). """ def __init__(self, config): if ('schema.registry.url' not in config.keys()): raise ValueError("Missing parameter: schema.registry.url") schem_registry_url = config["schema.registry.url"] del config["schema.registry.url"] super(AvroConsumer, self).__init__(config) self._serializer = MessageSerializer(CachedSchemaRegistryClient(url=schem_registry_url)) def poll(self, timeout): """ This is an overriden method from confluent_kafka.Consumer class. This handles message deserialization using avro schema @:param timeout @:return message object with deserialized key and value as dict objects """ message = super(AvroConsumer, self).poll(timeout) if not message: return message if not message.error(): if message.value() is not None: decoded_value = self._serializer.decode_message(message.value()) message.set_value(decoded_value) if message.key() is not None: decoded_key = self._serializer.decode_message(message.key()) message.set_key(decoded_key) return message
def printAndProduceMessages(self): consumer = AvroConsumer({ 'bootstrap.servers': BOOTSTRAP_SERVERS, 'group.id': GROUP_ID, 'auto.offset.reset': AUTO_OFFSET_RESET, 'enable.auto.commit': False, 'schema.registry.url': SCHEMA_REGISTRY_URL }) schema_registry = CachedSchemaRegistryClient( os.environ.get('SCHEMA_REGISTRY', SCHEMA_REGISTRY_URL)) avro_serde = AvroSerde(schema_registry) consumer.subscribe([INPUT_TOPIC_NAME]) while True: try: consumedMessages = consumer.consume( num_messages=CONSUMER_BATCH_SIZE, timeout=1) except Exception as e: logging.error("Message pool failed: {}".format(e)) break messages = [] for consumedMessage in consumedMessages: consumedMessageValue = avro_serde.decode_message( consumedMessage.value()) message = {} message["key"] = {} message["value"] = {} for attr, value in consumedMessageValue.items(): if attr != ARRAY_NAME: message["value"][DOCUMENT_FIELD_PREFIX + attr] = value for arrayItem in consumedMessageValue[ARRAY_NAME]: message["key"]["id"] = consumedMessageValue["id"] + \ "-" + arrayItem["id"] for attr, value in arrayItem.items(): message["value"][attr] = value messages.append(message) self.produceMessages(messages) consumer.commit() consumer.close()
class _AvroIORegistry: def __init__(self, schema_registry_url): """Private implementation class for Avro IO using the registry""" log.info( f"Using registry with schema_url/id {schema_registry_url}/{config.SCHEMA_ID}" ) try: self.client = CachedSchemaRegistryClient(url=schema_registry_url) self.schema = self.client.get_by_id(config.SCHEMA_ID) self.serializer = MessageSerializer(self.client) except: raise ValueError("Client id or schema id not found") def decode(self, bytes): return self.serializer.decode_message(bytes) def encode(self, record): return self.serializer.encode_record_with_schema_id( config.SCHEMA_ID, record)
class TestMessageSerializer(unittest.TestCase): def setUp(self): # need to set up the serializer self.client = MockSchemaRegistryClient() self.ms = MessageSerializer(self.client) def assertMessageIsSame(self, message, expected, schema_id): self.assertTrue(message) self.assertTrue(len(message) > 5) magic, sid = struct.unpack('>bI', message[0:5]) self.assertEqual(magic, 0) self.assertEqual(sid, schema_id) decoded = self.ms.decode_message(message) self.assertTrue(decoded) self.assertEqual(decoded, expected) def test_encode_with_schema_id(self): adv = avro.loads(data_gen.ADVANCED_SCHEMA) basic = avro.loads(data_gen.BASIC_SCHEMA) subject = 'test' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema_id(schema_id, record) self.assertMessageIsSame(message, record, schema_id) subject = 'test_adv' adv_schema_id = self.client.register(subject, adv) self.assertNotEqual(adv_schema_id, schema_id) records = data_gen.ADVANCED_ITEMS for record in records: message = self.ms.encode_record_with_schema_id( adv_schema_id, record) self.assertMessageIsSame(message, record, adv_schema_id) def test_encode_record_with_schema(self): topic = 'test' basic = avro.loads(data_gen.BASIC_SCHEMA) subject = 'test-value' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema(topic, basic, record) self.assertMessageIsSame(message, record, schema_id) def test_decode_none(self): """"null/None messages should decode to None""" self.assertIsNone(self.ms.decode_message(None)) def test_decode_with_schema(self): topic = 'test_specific' schema_v1 = avro.loads( data_gen.load_schema_file('evolution_schema_v1.avsc')) schema_v2 = avro.loads( data_gen.load_schema_file('evolution_schema_v2.avsc')) dsv1 = SpecificRecordMessageDeserializer(self.client, value_schema=schema_v1) dsv2 = SpecificRecordMessageDeserializer(self.client, value_schema=schema_v2) record_v1 = {"name": "suzyq", "age": 27} record_v2 = dict(record_v1) record_v2['gender'] = 'NONE' encoded_v1 = self.ms.encode_record_with_schema(topic, schema_v1, record_v1) decoded_v1_v1 = dsv1.decode_message(encoded_v1, is_key=False) self.assertDictEqual(record_v1, decoded_v1_v1) decoded_v1_v2 = dsv2.decode_message(encoded_v1, is_key=False) self.assertDictEqual(record_v2, decoded_v1_v2) encoded_v2 = self.ms.encode_record_with_schema(topic, schema_v2, record_v2) decoded_v2_v2 = dsv2.decode_message(encoded_v2, is_key=False) self.assertDictEqual(record_v2, decoded_v2_v2) decoded_v2_v1 = dsv1.decode_message(encoded_v2, is_key=False) self.assertDictEqual(record_v1, decoded_v2_v1) def hash_func(self): return hash(str(self))
class TestMessageSerializer(unittest.TestCase): def setUp(self): # need to set up the serializer self.client = MockSchemaRegistryClient() self.ms = MessageSerializer(self.client) def assertMessageIsSame(self, message, expected, schema_id): self.assertTrue(message) self.assertTrue(len(message) > 5) magic, sid = struct.unpack('>bI', message[0:5]) self.assertEqual(magic, 0) self.assertEqual(sid, schema_id) decoded = self.ms.decode_message(message) self.assertTrue(decoded) self.assertEqual(decoded, expected) def test_encode_with_schema_id(self): adv = avro.loads(data_gen.ADVANCED_SCHEMA) basic = avro.loads(data_gen.BASIC_SCHEMA) subject = 'test' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema_id(schema_id, record) self.assertMessageIsSame(message, record, schema_id) subject = 'test_adv' adv_schema_id = self.client.register(subject, adv) self.assertNotEqual(adv_schema_id, schema_id) records = data_gen.ADVANCED_ITEMS for record in records: message = self.ms.encode_record_with_schema_id( adv_schema_id, record) self.assertMessageIsSame(message, record, adv_schema_id) def test_encode_record_with_schema(self): topic = 'test' basic = avro.loads(data_gen.BASIC_SCHEMA) subject = 'test-value' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema(topic, basic, record) self.assertMessageIsSame(message, record, schema_id) @skipIf(version_info < (3, ), 'unittest.mock.patch not available in Python 2') def test_encode_record_with_schema_sets_writers_cache_once(self): topic = 'test' basic = avro.loads(data_gen.BASIC_SCHEMA) subject = 'test-value' self.client.register(subject, basic) records = data_gen.BASIC_ITEMS with patch.object(self.ms, "_get_encoder_func") as encoder_func_mock: for record in records: self.ms.encode_record_with_schema(topic, basic, record) encoder_func_mock.assert_called_once_with(basic) def test_decode_none(self): """null/None messages should decode to None""" self.assertIsNone(self.ms.decode_message(None)) def hash_func(self): return hash(str(self))
class TestMessageSerializer(unittest.TestCase): def setUp(self): # need to set up the serializer self.client = MockSchemaRegistryClient() self.ms = MessageSerializer(self.client) def assertMessageIsSame(self, message, expected, schema_id): self.assertTrue(message) self.assertTrue(len(message) > 5) magic, sid = struct.unpack('>bI', message[0:5]) self.assertEqual(magic, 0) self.assertEqual(sid, schema_id) decoded = self.ms.decode_message(message) self.assertTrue(decoded) self.assertEqual(decoded, expected) def test_encode_with_schema_id(self): adv = avro.loads(data_gen.ADVANCED_SCHEMA) basic = avro.loads(data_gen.BASIC_SCHEMA) subject = 'test' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema_id(schema_id, record) self.assertMessageIsSame(message, record, schema_id) subject = 'test_adv' adv_schema_id = self.client.register(subject, adv) self.assertNotEqual(adv_schema_id, schema_id) records = data_gen.ADVANCED_ITEMS for record in records: message = self.ms.encode_record_with_schema_id(adv_schema_id, record) self.assertMessageIsSame(message, record, adv_schema_id) def test_encode_record_with_schema(self): topic = 'test' basic = avro.loads(data_gen.BASIC_SCHEMA) subject = 'test-value' schema_id = self.client.register(subject, basic) records = data_gen.BASIC_ITEMS for record in records: message = self.ms.encode_record_with_schema(topic, basic, record) self.assertMessageIsSame(message, record, schema_id) def test_decode_none(self): """"null/None messages should decode to None""" self.assertIsNone(self.ms.decode_message(None)) def test__get_subject_for_key_with_topic_name_strategy(self): basic = avro.loads(data_gen.BASIC_SCHEMA) topic = "topic" self.ms.registry_client.key_subject_name_strategy_func = topic_name_strategy # noqa subject = self.ms._get_subject(topic=topic, schema=basic, is_key=True) expected = "topic-key" self.assertEqual(expected, subject) def test__get_subject_for_key_with_record_name_strategy(self): basic = avro.loads(data_gen.BASIC_SCHEMA) topic = "topic" self.ms.registry_client.key_subject_name_strategy_func = record_name_strategy # noqa subject = self.ms._get_subject(topic=topic, schema=basic, is_key=True) expected = "python.test.basic.basic" self.assertEqual(expected, subject) def test__get_subject_for_key_with_topic_record_name_strategy(self): basic = avro.loads(data_gen.BASIC_SCHEMA) topic = "topic" self.ms.registry_client.key_subject_name_strategy_func = topic_record_name_strategy # noqa subject = self.ms._get_subject(topic=topic, schema=basic, is_key=True) expected = "topic-python.test.basic.basic" self.assertEqual(expected, subject) def test__get_subject_for_value_with_topic_name_strategy(self): basic = avro.loads(data_gen.BASIC_SCHEMA) topic = "topic" self.ms.registry_client.value_subject_name_strategy_func = topic_name_strategy # noqa subject = self.ms._get_subject(topic=topic, schema=basic, is_key=False) expected = "topic-value" self.assertEqual(expected, subject) def test__get_subject_for_value_with_record_name_strategy(self): basic = avro.loads(data_gen.BASIC_SCHEMA) topic = "topic" self.ms.registry_client.value_subject_name_strategy_func = record_name_strategy # noqa subject = self.ms._get_subject(topic=topic, schema=basic, is_key=False) expected = "python.test.basic.basic" self.assertEqual(expected, subject) def test__get_subject_for_value_with_topic_record_name_strategy(self): basic = avro.loads(data_gen.BASIC_SCHEMA) topic = "topic" self.ms.registry_client.value_subject_name_strategy_func = topic_record_name_strategy # noqa subject = self.ms._get_subject(topic=topic, schema=basic, is_key=False) expected = "topic-python.test.basic.basic" self.assertEqual(expected, subject) def hash_func(self): return hash(str(self))
class AvroConsumer(Consumer): """ Kafka Consumer client which does avro schema decoding of messages. Handles message deserialization. Constructor takes below parameters :param dict config: Config parameters containing url for schema registry (``schema.registry.url``) and the standard Kafka client configuration (``bootstrap.servers`` et.al) :param schema reader_key_schema: a reader schema for the message key :param schema reader_value_schema: a reader schema for the message value :raises ValueError: For invalid configurations """ def __init__(self, config, schema_registry=None, reader_key_schema=None, reader_value_schema=None): sr_conf = {key.replace("schema.registry.", ""): value for key, value in config.items() if key.startswith("schema.registry")} if sr_conf.get("basic.auth.credentials.source") == 'SASL_INHERIT': sr_conf['sasl.mechanisms'] = config.get('sasl.mechanisms', '') sr_conf['sasl.username'] = config.get('sasl.username', '') sr_conf['sasl.password'] = config.get('sasl.password', '') ap_conf = {key: value for key, value in config.items() if not key.startswith("schema.registry")} if schema_registry is None: schema_registry = CachedSchemaRegistryClient(sr_conf) elif sr_conf.get("url", None) is not None: raise ValueError("Cannot pass schema_registry along with schema.registry.url config") super(AvroConsumer, self).__init__(ap_conf) self._serializer = MessageSerializer(schema_registry, reader_key_schema, reader_value_schema) def poll(self, timeout=None): """ This is an overriden method from confluent_kafka.Consumer class. This handles message deserialization using avro schema :param float timeout: Poll timeout in seconds (default: indefinite) :returns: message object with deserialized key and value as dict objects :rtype: Message """ if timeout is None: timeout = -1 message = super(AvroConsumer, self).poll(timeout) if message is None: return None if not message.error(): try: if message.value() is not None: decoded_value = self._serializer.decode_message(message.value(), is_key=False) message.set_value(decoded_value) if message.key() is not None: decoded_key = self._serializer.decode_message(message.key(), is_key=True) message.set_key(decoded_key) except SerializerError as e: raise SerializerError("Message deserialization failed for message at {} [{}] offset {}: {}".format( message.topic(), message.partition(), message.offset(), e)) return message
class AvroConsumer(Consumer): """ Kafka Consumer client which does avro schema decoding of messages. Handles message deserialization. Constructor takes below parameters :param dict config: Config parameters containing url for schema registry (``schema.registry.url``) and the standard Kafka client configuration (``bootstrap.servers`` et.al) :param schema reader_key_schema: a reader schema for the message key :param schema reader_value_schema: a reader schema for the message value :raises ValueError: For invalid configurations """ def __init__(self, config, schema_registry=None, reader_key_schema=None, reader_value_schema=None, **kwargs): sr_conf = { key.replace("schema.registry.", ""): value for key, value in config.items() if key.startswith("schema.registry") } if sr_conf.get("basic.auth.credentials.source") == 'SASL_INHERIT': # Fallback to plural 'mechanisms' for backward compatibility sr_conf['sasl.mechanism'] = config.get( 'sasl.mechanism', config.get('sasl.mechanisms', '')) sr_conf['sasl.username'] = config.get('sasl.username', '') sr_conf['sasl.password'] = config.get('sasl.password', '') ap_conf = { key: value for key, value in config.items() if not key.startswith("schema.registry") } if schema_registry is None: schema_registry = CachedSchemaRegistryClient(sr_conf) elif sr_conf.get("url", None) is not None: raise ValueError( "Cannot pass schema_registry along with schema.registry.url config" ) super(AvroConsumer, self).__init__(ap_conf, **kwargs) self._serializer = MessageSerializer(schema_registry, reader_key_schema, reader_value_schema) def poll(self, timeout=None): """ This is an overriden method from confluent_kafka.Consumer class. This handles message deserialization using avro schema :param float timeout: Poll timeout in seconds (default: indefinite) :returns: message object with deserialized key and value as dict objects :rtype: Message """ if timeout is None: timeout = -1 message = super(AvroConsumer, self).poll(timeout) if message is None: return None if not message.error(): try: if message.value() is not None: decoded_value = self._serializer.decode_message( message.value(), is_key=False) message.set_value(decoded_value) if message.key() is not None: decoded_key = self._serializer.decode_message( message.key(), is_key=True) message.set_key(decoded_key) except SerializerError as e: raise SerializerError( "Message deserialization failed for message at {} [{}] offset {}: {}" .format(message.topic(), message.partition(), message.offset(), e)) return message
class KafkaDriver(object): ''' Kafka driver class Can send json and avro mesages ''' last_offset = 0 def __init__(self, topic, server='kafka1', schema_registry = 'kafka-schema-registry'): self.server = server self.topic = topic self.schema_registry = schema_registry schema_registryObj = CachedSchemaRegistryClient(url='http://' + self.schema_registry + ':8081') self.serializer = MessageSerializer(schema_registryObj) def create_topic(self, topic): ''' Create topic ''' client = kafka.KafkaClient(hosts=self.server + ':9092') res = client.ensure_topic_exists(topic) return res def delete_topic(self, topic=None, server='zoo1'): ''' Delete topic ''' if (topic is None): topic = self.topic cmd = '/nis-test/bin/kafka_2.12-1.1.0/bin/kafka-topics.sh --delete --topic ' + topic + ' --zookeeper ' + server logging.debug('[KafkaDriver][delete_topic] cmd: ' + str(cmd)) ret = os.system(cmd) logging.debug('[KafkaDriver][delete_topic] ret: ' + str(ret)) #assert ret == 0 return ret @pytest.allure.step('kafka_list') def list(self): ''' Kafka list topics List exist topics ''' consumer = KafkaConsumer(bootstrap_servers=self.server + ':9092', auto_offset_reset='earliest', consumer_timeout_ms=1000) log.debug("[KafkaDriver][list] list start") list = consumer.topics() for topic in list: log.debug("[KafkaDriver][list] topic: " + str(topic)) log.debug("[KafkaDriver][list] self.topic: " + str(self.topic)) assert self.topic in list # # Kafka get last offset of topic # @pytest.allure.step('get_last_offset') def get_last_offset(self): ''' Kafka get last offset Get last message offset ''' log.debug("[KafkaDriver][get_last_offset] start") # consumer = KafkaConsumer(bootstrap_servers=self.server + ':9092', # group_id = None, # enable_auto_commit = False) consumer = KafkaConsumer(bootstrap_servers=self.server + ':9092', consumer_timeout_ms=1000) log.debug("[KafkaDriver][get_last_offset] TopicPartition") tp = TopicPartition(self.topic, 0) log.debug("[KafkaDriver][get_last_offset] assign") consumer.assign([tp]) log.debug("[KafkaDriver][get_last_offset] seek_to_end") consumer.seek_to_end(tp) log.debug("[KafkaDriver][get_last_offset] position") last_offset = consumer.position(tp) log.debug("[KafkaDriver][get_last_offset] topic: " + str(self.topic)) log.debug("[KafkaDriver][get_last_offset] last_offset: " + str(last_offset)) #consumer.close(autocommit=False) consumer.close() return last_offset @pytest.allure.step('read_from_offset') def read_from_offset(self, offset=0, lang='json', schema=None): ''' Kafka read message Read json and avro messages from consumer ''' log.debug("[KafkaDriver][read_from_offset] lang: " + str(lang)) log.debug("[KafkaDriver][read_from_offset] offset: " + str(offset)) def outputJSON(obj): ''' Default JSON serializer. ''' if isinstance(obj, datetime.datetime): return int(obj.strftime("%s%f")[:-3]) return obj ret = None log.debug("[KafkaDriver][read_from_offset] read start: " + str(self.server)) consumer = KafkaConsumer(bootstrap_servers=self.server + ':9092', auto_offset_reset='earliest', consumer_timeout_ms=1000) partition = TopicPartition(self.topic, 0) consumer.assign([partition]) consumer.seek_to_end(partition) start = int(offset) consumer.seek(partition, offset) for msg in consumer: if (lang == 'avro'): #message = AvroDecoder.decode(schema, msg.value) schema_registry = CachedSchemaRegistryClient(url='http://' + self.schema_registry + ':8081') self._serializer = MessageSerializer(schema_registry) message = self._serializer.decode_message(msg.value) message = json.dumps(message, indent=4, sort_keys=True, default=outputJSON) #log.debug("[KafkaDriver][read_from_offset] avro message: " + str(message)) ret = message else: message = msg.value #log.debug("[KafkaDriver][read_from_offset] other message: " + str(message)) ret = msg.value log.debug("[KafkaDriver][read_from_offset] msg: " + str(message) + " msg.offset: " + str(msg.offset)) consumer.close() log.debug("[KafkaDriver][read_from_offset] read end") return ret @pytest.allure.step('kafka_producer') def send(self, topic=None, msg="{'foo':'bar'}", lang='json', schema=None): ''' Kafka send message Send json and avro messages ''' log.debug("[KafkaDriver][send] producer start: " + str(self.server)) log.debug("[KafkaDriver][send] send message: " + str(msg)) if (topic is None): topic = self.topic log.debug("[KafkaDriver][send] topic: " + str(topic)) if (lang == 'json'): producer = KafkaProducer(bootstrap_servers=self.server + ':9092') log.debug("[KafkaDriver][send] json msg") res = producer.send(self.topic, key=None, value=msg) log.debug("[KafkaDriver][send] produce result: " + str(res.get())) time.sleep(1) producer.close log.debug("[KafkaDriver][send] end") elif (lang == 'avro'): log.debug("[KafkaDriver][send] avro msg") log.debug("[KafkaDriver][send] schema: " + str(schema)) value_schema = avro.loads(schema) avroProducer = AvroProducer({ 'bootstrap.servers': self.server, 'schema.registry.url': 'http://' + self.schema_registry + ':8081' }, default_value_schema=value_schema) res = avroProducer.produce(topic=self.topic, value=msg) log.debug("[KafkaDriver][send] produce result: " + str(res)) time.sleep(1) avroProducer.flush() log.debug("[KafkaDriver][send] end")
print("Start: avro-python-consumer") props = { 'bootstrap.servers': 'localhost:9092', 'group.id': 'avro-python-consumer_5', 'auto.offset.reset': 'earliest' } consumer = Consumer(props) consumer.subscribe(['avro-python-producer-topic']) # connect to the schema_registry schema_registry = CachedSchemaRegistryClient("http://localhost:8081") # define avro serde - to be used to decode msg value against the avro schema avro_serde = MessageSerializer(schema_registry) while True: msg = consumer.poll() if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue print('Received message: {}'.format(avro_serde.decode_message( msg.value()))) consumer.close() print("End: avro-python-consumer")