def consume(ctx, group, client, topics): """Consume an Avro-serialized message. """ topics = [str(t) for t in topics] print('Starting consumer\n\tGroup: {group}\n\tClient: {client}\n\t' 'Topic(s): {topics}'.format(group=group, client=client, topics=', '.join(topics))) settings = { 'bootstrap.servers': get_broker_url(ctx.parent), 'schema.registry.url': get_registry_url(ctx.parent), # Identify the consumer group 'group.id': group, # Identify the client within the consumer group 'client.id': client, # Automatically commit the current offset for this consumer 'enable.auto.commit': True, 'session.timeout.ms': 6000, 'default.topic.config': { # "smallest" means that old messages aren't ignored if an offset # hasn't been committed yet. Otherwise the default is "latest", # which has the consumer only pick up new messages. 'auto.offset.reset': 'smallest' } } c = avro.AvroConsumer(settings) c.subscribe(topics) try: while True: try: msg = c.poll(0.1) except SerializerError as e: print("Message deserialization failed for {}: {}".format( msg, e)) break if msg is None: continue elif not msg.error(): print('Received message (topic={topic}, key={key}):' '\n\t{message}'.format(topic=msg.topic(), key=msg.key(), message=msg.value())) elif msg.error().code() == KafkaError._PARTITION_EOF: print('End of partition reached {0}/{1}'.format( msg.topic(), msg.partition())) else: print('Error occured: {0}'.format(msg.error().str())) except KeyboardInterrupt: pass finally: # Close the consumer so that the consumer group an rebalance c.close()
def connect(self): logger = logging.getLogger() if self.type == "None": return if self.type == "Kafka": logger.debug("brokers: {}, group: {}, topic: {}".format( self.brokers, self.group, self.topic)) self.cons = Consumer({ 'bootstrap.servers': self.brokers, 'group.id': self.group, 'default.topic.config': { 'auto.offset.reset': 'smallest', } }) self.cons.subscribe([self.topic]) elif self.type == "CFKafka": logger.debug( "brokers: {}, schema_reg: {}, group: {}, topic: {}".format( self.brokers, self.schema_reg, self.group, self.topic)) self.cons = avro.AvroConsumer({ 'bootstrap.servers': self.brokers, 'schema.registry.url': self.schema_reg, 'group.id': self.group, 'default.topic.config': { 'auto.offset.reset': 'smallest', } }) self.cons.subscribe([self.topic])
def verify_avro(): from confluent_kafka import avro avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'tests', 'avro') # Producer config conf = { 'bootstrap.servers': bootstrap_servers, 'error_cb': error_cb, 'api.version.request': api_version_request, 'default.topic.config': { 'produce.offset.report': True } } # Create producer if schema_registry_url: conf['schema.registry.url'] = schema_registry_url p = avro.AvroProducer(conf) else: p = avro.AvroProducer(conf, schema_registry=InMemorySchemaRegistry()) prim_float = avro.load(os.path.join(avsc_dir, "primitive_float.avsc")) prim_string = avro.load(os.path.join(avsc_dir, "primitive_string.avsc")) basic = avro.load(os.path.join(avsc_dir, "basic_schema.avsc")) str_value = 'abc' float_value = 32. combinations = [ dict(key=float_value, key_schema=prim_float), dict(value=float_value, value_schema=prim_float), dict(key={'name': 'abc'}, key_schema=basic), dict(value={'name': 'abc'}, value_schema=basic), dict(value={'name': 'abc'}, value_schema=basic, key=float_value, key_schema=prim_float), dict(value={'name': 'abc'}, value_schema=basic, key=str_value, key_schema=prim_string), dict(value=float_value, value_schema=prim_float, key={'name': 'abc'}, key_schema=basic), dict(value=float_value, value_schema=prim_float, key=str_value, key_schema=prim_string), dict(value=str_value, value_schema=prim_string, key={'name': 'abc'}, key_schema=basic), dict(value=str_value, value_schema=prim_string, key=float_value, key_schema=prim_float), ] # Consumer config cons_conf = { 'bootstrap.servers': bootstrap_servers, 'group.id': 'test.py', 'session.timeout.ms': 6000, 'enable.auto.commit': False, 'api.version.request': api_version_request, 'on_commit': print_commit_result, 'error_cb': error_cb, 'default.topic.config': { 'auto.offset.reset': 'earliest' } } for i, combo in enumerate(combinations): combo['topic'] = str(uuid.uuid4()) p.produce(**combo) p.poll(0) p.flush() # Create consumer conf = copy(cons_conf) if schema_registry_url: conf['schema.registry.url'] = schema_registry_url c = avro.AvroConsumer(conf) else: c = avro.AvroConsumer(conf, schema_registry=InMemorySchemaRegistry()) c.subscribe([combo['topic']]) while True: msg = c.poll(0) if msg is None: continue if msg.error(): if msg.error().code( ) == confluent_kafka.KafkaError._PARTITION_EOF: break else: continue tstype, timestamp = msg.timestamp() print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' % (msg.topic(), msg.partition(), msg.offset(), msg.key(), msg.value(), tstype, timestamp)) c.commit(msg, async=False) # Close consumer c.close()
def verify_avro_https(): from confluent_kafka import avro avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'tests', 'avro') # Producer config conf = { 'bootstrap.servers': bootstrap_servers, 'error_cb': error_cb, 'api.version.request': api_version_request } conf.update(testconf.get('schema_registry_https', {})) p = avro.AvroProducer(conf) prim_float = avro.load(os.path.join(avsc_dir, "primitive_float.avsc")) prim_string = avro.load(os.path.join(avsc_dir, "primitive_string.avsc")) basic = avro.load(os.path.join(avsc_dir, "basic_schema.avsc")) str_value = 'abc' float_value = 32.0 combinations = [ dict(key=float_value, key_schema=prim_float), dict(value=float_value, value_schema=prim_float), dict(key={'name': 'abc'}, key_schema=basic), dict(value={'name': 'abc'}, value_schema=basic), dict(value={'name': 'abc'}, value_schema=basic, key=float_value, key_schema=prim_float), dict(value={'name': 'abc'}, value_schema=basic, key=str_value, key_schema=prim_string), dict(value=float_value, value_schema=prim_float, key={'name': 'abc'}, key_schema=basic), dict(value=float_value, value_schema=prim_float, key=str_value, key_schema=prim_string), dict(value=str_value, value_schema=prim_string, key={'name': 'abc'}, key_schema=basic), dict(value=str_value, value_schema=prim_string, key=float_value, key_schema=prim_float), # Verify identity check allows Falsy object values(e.g., 0, empty string) to be handled properly (issue #342) dict(value='', value_schema=prim_string, key=0.0, key_schema=prim_float), dict(value=0.0, value_schema=prim_float, key='', key_schema=prim_string), ] for i, combo in enumerate(combinations): combo['topic'] = str(uuid.uuid4()) combo['headers'] = [('index', str(i))] p.produce(**combo) p.flush() conf = { 'bootstrap.servers': bootstrap_servers, 'group.id': generate_group_id(), 'session.timeout.ms': 6000, 'enable.auto.commit': False, 'api.version.request': api_version_request, 'on_commit': print_commit_result, 'error_cb': error_cb, 'auto.offset.reset': 'earliest' } conf.update(testconf.get('schema_registry_https', {})) c = avro.AvroConsumer(conf) c.subscribe([(t['topic']) for t in combinations]) msgcount = 0 while msgcount < len(combinations): msg = c.poll(0) if msg is None or msg.error(): continue tstype, timestamp = msg.timestamp() print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' % (msg.topic(), msg.partition(), msg.offset(), msg.key(), msg.value(), tstype, timestamp)) # omit empty Avro fields from payload for comparison record_key = msg.key() record_value = msg.value() index = int(dict(msg.headers())['index']) if isinstance(msg.key(), dict): record_key = {k: v for k, v in msg.key().items() if v is not None} if isinstance(msg.value(), dict): record_value = { k: v for k, v in msg.value().items() if v is not None } assert combinations[index].get('key') == record_key assert combinations[index].get('value') == record_value c.commit() msgcount += 1 # Close consumer c.close()
def verify_avro(): from confluent_kafka import avro avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'tests', 'avro') # Producer config conf = { 'bootstrap.servers': bootstrap_servers, 'error_cb': error_cb, 'api.version.request': api_version_request } # Create producer if schema_registry_url: conf['schema.registry.url'] = schema_registry_url p = avro.AvroProducer(conf) else: p = avro.AvroProducer(conf, schema_registry=InMemorySchemaRegistry()) prim_float = avro.load(os.path.join(avsc_dir, "primitive_float.avsc")) prim_string = avro.load(os.path.join(avsc_dir, "primitive_string.avsc")) basic = avro.load(os.path.join(avsc_dir, "basic_schema.avsc")) str_value = 'abc' float_value = 32. combinations = [ dict(key=float_value, key_schema=prim_float), dict(value=float_value, value_schema=prim_float), dict(key={'name': 'abc'}, key_schema=basic), dict(value={'name': 'abc'}, value_schema=basic), dict(value={'name': 'abc'}, value_schema=basic, key=float_value, key_schema=prim_float), dict(value={'name': 'abc'}, value_schema=basic, key=str_value, key_schema=prim_string), dict(value=float_value, value_schema=prim_float, key={'name': 'abc'}, key_schema=basic), dict(value=float_value, value_schema=prim_float, key=str_value, key_schema=prim_string), dict(value=str_value, value_schema=prim_string, key={'name': 'abc'}, key_schema=basic), dict(value=str_value, value_schema=prim_string, key=float_value, key_schema=prim_float), # Verify identity check allows Falsy object values(e.g., 0, empty string) to be handled properly (issue #342) dict(value='', value_schema=prim_string, key=0., key_schema=prim_float), dict(value=0., value_schema=prim_float, key='', key_schema=prim_string), ] # Consumer config cons_conf = { 'bootstrap.servers': bootstrap_servers, 'group.id': 'test.py', 'session.timeout.ms': 6000, 'enable.auto.commit': False, 'api.version.request': api_version_request, 'on_commit': print_commit_result, 'error_cb': error_cb, 'auto.offset.reset': 'earliest' } for i, combo in enumerate(combinations): combo['topic'] = str(uuid.uuid4()) p.produce(**combo) p.flush() # Create consumer conf = copy(cons_conf) if schema_registry_url: conf['schema.registry.url'] = schema_registry_url c = avro.AvroConsumer(conf) else: c = avro.AvroConsumer(conf, schema_registry=InMemorySchemaRegistry()) c.subscribe([combo['topic']]) while True: msg = c.poll(0) if msg is None: continue if msg.error(): if msg.error().code( ) == confluent_kafka.KafkaError._PARTITION_EOF: break else: continue tstype, timestamp = msg.timestamp() print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' % (msg.topic(), msg.partition(), msg.offset(), msg.key(), msg.value(), tstype, timestamp)) # omit empty Avro fields from payload for comparison record_key = msg.key() record_value = msg.value() if isinstance(msg.key(), dict): record_key = { k: v for k, v in msg.key().items() if v is not None } if isinstance(msg.value(), dict): record_value = { k: v for k, v in msg.value().items() if v is not None } assert combo.get('key') == record_key assert combo.get('value') == record_value c.commit(msg, asynchronous=False) # Close consumer c.close()
def verify_avro_explicit_read_schema(): """ verify that reading Avro with explicit reader schema works""" from confluent_kafka import avro avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'tests', 'avro') # Producer config conf = { 'bootstrap.servers': bootstrap_servers, 'error_cb': error_cb, 'api.version.request': api_version_request, 'default.topic.config': { 'produce.offset.report': True } } # Create producer if schema_registry_url: conf['schema.registry.url'] = schema_registry_url p = avro.AvroProducer(conf) else: p = avro.AvroProducer(conf, schema_registry=InMemorySchemaRegistry()) key_schema = avro.load(os.path.join(avsc_dir, "primitive_float.avsc")) schema1 = avro.load(os.path.join(avsc_dir, "user_v1.avsc")) schema2 = avro.load(os.path.join(avsc_dir, "user_v2.avsc")) float_value = 32. val = {"name": "abc", "favorite_number": 42, "favorite_colo": "orange"} val1 = {"name": "abc"} combinations = [ dict(value=val, value_schema=schema2, key=float_value, key_schema=key_schema, reader_value_schema=schema1, reader_key_schema=key_schema), dict(value=val1, value_schema=schema1, key=float_value, key_schema=key_schema, reader_value_schema=schema2, reader_key_schema=key_schema), ] # Consumer config cons_conf = { 'bootstrap.servers': bootstrap_servers, 'group.id': 'test.py', 'session.timeout.ms': 6000, 'enable.auto.commit': False, 'api.version.request': api_version_request, 'on_commit': print_commit_result, 'error_cb': error_cb, 'default.topic.config': { 'auto.offset.reset': 'earliest' } } for i, combo in enumerate(combinations): reader_key_schema = combo.pop("reader_key_schema") reader_value_schema = combo.pop("reader_value_schema") combo['topic'] = str(uuid.uuid4()) p.produce(**combo) p.poll(0) p.flush() # Create consumer conf = copy(cons_conf) if schema_registry_url: conf['schema.registry.url'] = schema_registry_url c = avro.AvroConsumer(conf, reader_key_schema=reader_key_schema, reader_value_schema=reader_value_schema) else: c = avro.AvroConsumer(conf, schema_registry=InMemorySchemaRegistry(), reader_key_schema=reader_key_schema, reader_value_schema=reader_value_schema) c.subscribe([combo['topic']]) while True: msg = c.poll(0) if msg is None: continue if msg.error(): if msg.error().code( ) == confluent_kafka.KafkaError._PARTITION_EOF: break else: continue tstype, timestamp = msg.timestamp() print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' % (msg.topic(), msg.partition(), msg.offset(), msg.key(), msg.value(), tstype, timestamp)) # omit empty Avro fields from payload for comparison record_key = msg.key() record_value = msg.value() if isinstance(msg.key(), dict): record_key = { k: v for k, v in msg.key().items() if v is not None } if isinstance(msg.value(), dict): record_value = { k: v for k, v in msg.value().items() if v is not None } assert combo.get('key') == record_key assert combo.get('value')['name'] == record_value['name'] c.commit(msg, asynchronous=False) # Close consumer c.close() pass
def run_avro_loop(producer_conf, consumer_conf): from confluent_kafka import avro avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'avro') p = avro.AvroProducer(producer_conf) prim_float = avro.load(os.path.join(avsc_dir, "primitive_float.avsc")) prim_string = avro.load(os.path.join(avsc_dir, "primitive_string.avsc")) basic = avro.load(os.path.join(avsc_dir, "basic_schema.avsc")) str_value = 'abc' float_value = 32.0 combinations = [ dict(key=float_value, key_schema=prim_float), dict(value=float_value, value_schema=prim_float), dict(key={'name': 'abc'}, key_schema=basic), dict(value={'name': 'abc'}, value_schema=basic), dict(value={'name': 'abc'}, value_schema=basic, key=float_value, key_schema=prim_float), dict(value={'name': 'abc'}, value_schema=basic, key=str_value, key_schema=prim_string), dict(value=float_value, value_schema=prim_float, key={'name': 'abc'}, key_schema=basic), dict(value=float_value, value_schema=prim_float, key=str_value, key_schema=prim_string), dict(value=str_value, value_schema=prim_string, key={'name': 'abc'}, key_schema=basic), dict(value=str_value, value_schema=prim_string, key=float_value, key_schema=prim_float), # Verify identity check allows Falsy object values(e.g., 0, empty string) to be handled properly (issue #342) dict(value='', value_schema=prim_string, key=0.0, key_schema=prim_float), dict(value=0.0, value_schema=prim_float, key='', key_schema=prim_string), ] for i, combo in enumerate(combinations): combo['topic'] = str(uuid.uuid4()) combo['headers'] = [('index', str(i))] p.produce(**combo) p.flush() c = avro.AvroConsumer(consumer_conf) c.subscribe([(t['topic']) for t in combinations]) msgcount = 0 while msgcount < len(combinations): msg = c.poll(1) if msg is None: continue if msg.error(): print(msg.error()) continue tstype, timestamp = msg.timestamp() print('%s[%d]@%d: key=%s, value=%s, tstype=%d, timestamp=%s' % (msg.topic(), msg.partition(), msg.offset(), msg.key(), msg.value(), tstype, timestamp)) # omit empty Avro fields from payload for comparison record_key = msg.key() record_value = msg.value() index = int(dict(msg.headers())['index']) if isinstance(msg.key(), dict): record_key = {k: v for k, v in msg.key().items() if v is not None} if isinstance(msg.value(), dict): record_value = {k: v for k, v in msg.value().items() if v is not None} assert combinations[index].get('key') == record_key assert combinations[index].get('value') == record_value c.commit() msgcount += 1 # Close consumer c.close()
def verify_avro_explicit_read_schema(): from confluent_kafka import avro """ verify that reading Avro with explicit reader schema works""" base_conf = {'bootstrap.servers': bootstrap_servers, 'error_cb': error_cb, 'schema.registry.url': schema_registry_url} consumer_conf = dict(base_conf, **{ 'group.id': 'test.py', 'session.timeout.ms': 6000, 'enable.auto.commit': False, 'on_commit': print_commit_result, 'auto.offset.reset': 'earliest', 'schema.registry.url': schema_registry_url}) avsc_dir = os.path.join(os.path.dirname(__file__), os.pardir, 'avro') writer_schema = avro.load(os.path.join(avsc_dir, "user_v1.avsc")) reader_schema = avro.load(os.path.join(avsc_dir, "user_v2.avsc")) user_value1 = { "name": " Rogers Nelson" } user_value2 = { "name": "Kenny Loggins" } combinations = [ dict(key=user_value1, key_schema=writer_schema, value=user_value2, value_schema=writer_schema), dict(key=user_value2, key_schema=writer_schema, value=user_value1, value_schema=writer_schema) ] avro_topic = topic + str(uuid.uuid4()) p = avro.AvroProducer(base_conf) for i, combo in enumerate(combinations): p.produce(topic=avro_topic, **combo) p.flush() c = avro.AvroConsumer(consumer_conf, reader_key_schema=reader_schema, reader_value_schema=reader_schema) c.subscribe([avro_topic]) msgcount = 0 while msgcount < len(combinations): msg = c.poll(1) if msg is None: continue if msg.error(): print("Consumer error {}".format(msg.error())) continue msgcount += 1 # Avro schema projection should return the two fields not present in the writer schema try: assert(msg.key().get('favorite_number') == 42) assert(msg.key().get('favorite_color') == "purple") assert(msg.value().get('favorite_number') == 42) assert(msg.value().get('favorite_color') == "purple") print("success: schema projection worked for explicit reader schema") except KeyError: raise confluent_kafka.avro.SerializerError("Schema projection failed when setting reader schema.")