class AvroConsumerFacade: def __init__(self, name, emit_datum, broker, schema_registry_url, topic): self.name = name self.emit_datum = emit_datum self.consumer = AvroConsumer({ 'bootstrap.servers': broker, 'group.id': name, 'schema.registry.url': schema_registry_url, **get_sr_config_from_environment(), **get_kafka_config_from_environment(), }) # Subscribe to topics/partitions, and seek to end. Following that we need # to poll until the topics have actually been assigned. def on_assign(consumer, partitions): for p in partitions: p.offset = OFFSET_END self.consumer.assign(partitions) self.consumer.subscribe([topic], on_assign=on_assign) self.consumer.poll(10) def consume_one(self, poll_wait=0): consumed_message = self.consumer.poll(poll_wait) if consumed_message is not None: self.emit_datum(Datum(good_count=1)) else: self.emit_datum(Datum(bad_count=1)) def close(self): self.consumer.commit() self.consumer.close()
def consume(): c = AvroConsumer({ 'bootstrap.servers': 'kafka-1:19092, kafka-2:29092', 'schema.registry.url': 'http://schema-registry:8081', 'group.id': 'citibike_station_data', }) c.subscribe(['station_status']) while True: try: msg = c.poll(10) except SerializerError as e: print("Message deserialization failed for {}: {}".format(msg, e)) break if msg is None: print("~~~~MESSAGE IS NONE") continue if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: continue else: print("~~A MESSAGE ERROR", msg.error()) break # do something with the data make_request(msg.key()['id'], msg.value()) c.close()
async def consume(topic_name): """Consumes data from the Kafka Topic""" # # Create a CachedSchemaRegistryClient # schema_registry = CachedSchemaRegistryClient({"url": SCHEMA_REGISTRY_URL}) # # Use the Avro Consumer # c = AvroConsumer( { "bootstrap.servers": BROKER_URL, "group.id": "0" }, schema_registry=schema_registry, ) c.subscribe([topic_name]) while True: message = c.poll(1.0) if message is None: print("no message received by consumer") elif message.error() is not None: print(f"error from consumer {message.error()}") else: try: #The print from console print(message.value()) except KeyError as e: print(f"Failed to unpack message {e}") await asyncio.sleep(1.0)
def consume_test_messages(broker, schema_registry, topic): consumer = AvroConsumer({ 'bootstrap.servers': broker, 'group.id': 'groupid', 'schema.registry.url': schema_registry, 'auto.offset.reset': 'earliest' }) consumer.subscribe([topic]) count = 0 while True: try: msg = consumer.poll(1) except SerializerError as e: print("Message deserialization failed for {}: {}".format(msg, e)) break if msg is None: count += 1 if count == 10: break continue if msg.error(): print("AvroConsumer error: {}".format(msg.error())) continue print('Consumed', msg.value()) consumer.close()
def consume(topic: str, brokers: str, schema_registry_url: str): avro_consumer_settings = { 'bootstrap.servers': brokers, 'group.id': 'raw_1', 'client.id': 'client-1', 'session.timeout.ms': 6000, 'schema.registry.url': schema_registry_url, 'default.topic.config': { 'auto.offset.reset': 'smallest' }, #'debug' : 'all' } # you can pass schemas in a file or it will take it from schema registry consumer = AvroConsumer(avro_consumer_settings) consumer.subscribe([topic]) try: while True: msg = consumer.poll(10.0) if msg is None: continue elif msg.error(): print("Consumer error: {}".format(msg.error())) else: print("Message Consumed: key = {} value = {}".format( msg.key(), msg.value())) except Exception as e: print(e) finally: consumer.close()
def test_dnn(): copyfile('research.zip', '/mnt/archives/research.zip') value_schema = avro.load('avro_sch/res_prod.json') value = {"command": "start", "path": "research.zip", "id": "1"} avroProducer = AvroProducer( { 'bootstrap.servers': KAFKA_BROKER_URL, 'schema.registry.url': 'http://schema_registry:8081' }, default_value_schema=value_schema) avroProducer.produce(topic='dnn.data', value=value) print("msg produced") c = AvroConsumer({ 'bootstrap.servers': "broker:9092", 'group.id': 'groupid', 'schema.registry.url': 'http://schema_registry:8081' }) c.subscribe(["dnn.results"]) while True: try: print("Start polling") msg = c.poll(10) except SerializerError as e: print("Message deserialization failed for {}: {}".format(msg, e)) break if msg is None: continue if msg.error(): print("AvroConsumer error: {}".format(msg.error())) continue msg = msg.value() assert msg["code"] == "success", "Inference failed" assert type(msg) == dict, "Wrong type of msg variable" assert 'path' in msg, "No path field in returned message" for res in os.listdir( os.path.join("/mnt/results/experiments", msg["path"])): if os.path.isdir( os.path.join("/mnt/results/experiments", msg["path"], res)): png_files = glob.glob( os.path.join("/mnt/results/experiments", msg["path"], res, "*.png")) assert len( png_files) > 0, "No png output files found for {}".format( res) assert len(msg["nods"]) < 5, "Too many nodules found" break
class KafkaAvroConsumer: def __init__(self, brokers, group, schema_registry_url): self.avro_consumer = AvroConsumer({ 'bootstrap.servers': brokers, 'group.id': group, 'auto.offset.reset': 'earliest', 'schema.registry.url': schema_registry_url}) def subscribe(self, topics): self.avro_consumer.subscribe(topics=topics) def pull_message(self): while True: try: msg = self.avro_consumer.poll(2) except SerializerError as e: print("Message deserialization failed for {}: {}".format(msg, e)) break if msg is None: continue if msg.error(): print("AvroConsumer error: {}".format(msg.error())) continue print(msg.key(), ": ", msg.value()) def close(self): self.avro_consumer.close()
def __seek_from_to_offsets(self, partition, start_offset, end_offset, fft): self.log.info( f'Start : __seek_from_to_offsets({partition}, {start_offset}, {end_offset})' ) consumer = AvroConsumer({ 'bootstrap.servers': self.bootstrap_servers, 'group.id': self.group_id, 'schema.registry.url': self.schema_registry_url }) topic_partition = TopicPartition(self.topic, partition) topic_partition.offset = start_offset consumer.assign([topic_partition]) messages = [] while True: message = consumer.poll(10) if fft: dasfft = DasFft() message.value()['fft'] = dasfft.amplitudes_fft( message.value()['amplitudes']) messages.append(message) if (message.offset() >= end_offset): self.log.info( f'End : __seek_from_to_offsets({partition}, {start_offset}, {end_offset})' ) return messages
def avro_consumer(urls, topics, uav_name, duration): c = AvroConsumer(urls) c.subscribe(topics) msges = [] # list of avro messages start_time = time.time() while (time.time() - start_time) <= duration: try: msg = c.poll(10) except SerializerError as e: # print("Message deserialization failed for {}: {}".format(msg, e)) break if msg is None: continue if msg.error(): # print("AvroConsumer error: {}".format(msg.error())) continue m = msg.value() print(m) time.sleep(1) if (m["header"]['sourceSystem']) == uav_name: msges.append(msg) c.close() return msges
async def consume(topic_name): """Consumes data from the Kafka Topic""" # # TODO: Create a CachedSchemaRegistryClient # schema_registry = CachedSchemaRegistryClient({"url": SCHEMA_REGISTRY_URL}) # # TODO: Use the Avro Consumer # See: https://docs.confluent.io/current/clients/confluent-kafka-python/index.html?highlight=loads#confluent_kafka.avro.AvroConsumer # c = AvroConsumer( { "bootstrap.servers": BROKER_URL, "group.id": "0" }, schema_registry=schema_registry, ) c.subscribe([topic_name]) while True: message = c.poll(1.0) if message is None: print("no message received by consumer") elif message.error() is not None: print(f"error from consumer {message.error()}") else: try: print(message.value()) except KeyError as e: print(f"Failed to unpack message {e}") await asyncio.sleep(1.0)
def consume(conf, schema_record): """ Consume MetadataChangeEvent records """ print( "Consuming MetadataChangeEvent records from topic {} with group {}. ^c to exit." .format(topic, conf["group.id"])) c = AvroConsumer(conf, reader_value_schema=avro.load(schema_record)) c.subscribe([topic]) while True: try: msg = c.poll(1) # There were no messages on the queue, continue polling if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue record = MetadataChangeEvent(msg.value()) print("avro_event: {}\n\t".format(record.value)) except SerializerError as e: # Report malformed record, discard results, continue polling print("Message deserialization failed {}".format(e)) continue except KeyboardInterrupt: break print("Shutting down consumer..") c.close()
def consume_messages(): print("Start Consumer") settings = get_settings() consumer = AvroConsumer(settings) consumer.subscribe([TEST_TOPIC]) while True: try: message = consumer.poll(timeout=5.0) except SerializerError as err: logger.error("Message deserialization failed {}".format(err)) continue except Exception as err: logger.error( "Caught exception while polling consumer: {}".format(err)) continue if message is None: continue if message.error(): if message.error().code() == KafkaError._PARTITION_EOF: continue else: logger.error(message.error()) continue try: logger.debug(message.value()) print(message.value()) except Exception as err: logger.error( "Caught exception while processing message: {}".format(err))
def consumer_message(): print('consumer initialized') consumer = AvroConsumer( { 'bootstrap.servers': broker, 'group.id': 'test_poc' }, schema_registry=schema_registry) consumer.subscribe([topic]) while True: msg = consumer.poll(0.5) if msg is None: continue if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: continue else: print(msg.error()) break print('\nReceived message: {}\n'.format(msg.value()))
def test_transfer_avro_message_using_file( avro_producer: AvroProducer, target_topic_avro_consumer: AvroConsumer, source_topic: Tuple[str, int], target_topic: Tuple[str, int], non_interactive_cli_runner: CliRunner, tmpdir_factory, ): output_directory = tmpdir_factory.mktemp("output_directory") expected_messages = produce_avro_test_messages(topic_name=source_topic[0], avro_producer=avro_producer) non_interactive_cli_runner.invoke( esque, args=["consume", "-d", str(output_directory), "--avro", "--number", "10", source_topic[0]], catch_exceptions=False, ) non_interactive_cli_runner.invoke( esque, args=["produce", "-d", str(output_directory), "--avro", target_topic[0]], catch_exceptions=False ) actual_messages = set() start = time.monotonic() while len(actual_messages) < 10: msg = target_topic_avro_consumer.poll(timeout=2) if msg is not None: actual_messages.add((msg.key()["key"], msg.value()["value"], msg.partition())) elif time.monotonic() - start >= 20: raise TimeoutError("Timeout reading data from topic") expected_messages = {(msg.key["key"], msg.value["value"], msg.partition) for msg in expected_messages} assert expected_messages == actual_messages
def test_transfer_avro_with_single_command( avro_producer: AvroProducer, target_topic_avro_consumer: AvroConsumer, source_topic: Tuple[str, int], target_topic: Tuple[str, int], non_interactive_cli_runner: CliRunner, ): expected_messages = produce_avro_test_messages(topic_name=source_topic[0], avro_producer=avro_producer) non_interactive_cli_runner.invoke( esque, args=[ "transfer", "--from-topic", source_topic[0], "--to-topic", target_topic[0], "--avro", "--number", "10", "--first", ], catch_exceptions=False, ) actual_messages = set() start = time.monotonic() while len(actual_messages) < 10: msg = target_topic_avro_consumer.poll(timeout=2) if msg is not None: actual_messages.add((msg.key()["key"], msg.value()["value"], msg.partition())) elif time.monotonic() - start >= 20: raise TimeoutError("Timeout reading data from topic") expected_messages = {(msg.key["key"], msg.value["value"], msg.partition) for msg in expected_messages} assert expected_messages == actual_messages
def consume_record(args): default_group_name = "default-consumer-group" consumer_config = { "bootstrap.servers": args.bootstrap_servers, "schema.registry.url": args.schema_registry, "group.id": default_group_name, "auto.offset.reset": "earliest" } consumer = AvroConsumer(consumer_config) consumer.subscribe([args.topic]) try: message = consumer.poll(5) except Exception as e: print(f"Exception while trying to poll messages - {e}") else: if message: print( f"Successfully poll a record from " f"Kafka topic: {message.topic()}, partition: {message.partition()}, offset: {message.offset()}\n" f"message key: {message.key()} || message value: {message.value()}" ) consumer.commit() else: print("No new messages at this point. Try again later.") consumer.close()
async def converter(CONSUME_TOPIC, PRODUCE_TOPIC, BROKER_URL, SCHEMA_REGISTRY_URL): """Consumes data from the Kafka Topic """ schema_registry = CachedSchemaRegistryClient({"url": SCHEMA_REGISTRY_URL}) c = AvroConsumer( { "bootstrap.servers": BROKER_URL, "client.id": "project-insight", "group.id": "convertor-in-consumer", "auto.offset.reset": "earliest", }, schema_registry=schema_registry, ) c.subscribe([CONSUME_TOPIC]) p = Producer({"bootstrap.servers": BROKER_URL}) while True: message = c.poll(1.0) if message is None: logger.info("no message received by consumer") elif message.error() is not None: logger.error(f"error from consumer {message.error()}") else: try: print(message.value()) p.produce(topic=PRODUCE_TOPIC, key=str(uuid4()), value=json.dumps(message.value())) except KeyError as e: logger.error(f"Failed to unpack message {e}") await asyncio.sleep(0.01)
async def consume(topic_name): """Consumes data from the Kafka Topic""" # # TODO: Create a CachedSchemaRegistryClient # # schema_registry = TODO # # TODO: Use the Avro Consumer # See: https://docs.confluent.io/current/clients/confluent-kafka-python/index.html?highlight=loads#confluent_kafka.avro.AvroConsumer # c = AvroConsumer(broker_config) c.subscribe([topic_name]) while True: message = c.poll(1.0) if message is None: print("no message received by consumer") elif message.error() is not None: print(f"error from consumer {message.error()}") else: try: print(message.key(), message.value()) except KeyError as e: print(f"Failed to unpack message {e}") await asyncio.sleep(1.0)
def __get_message(self, partition, offset, fft): self.log.info(f'Start : __get_message({partition},{offset})') consumer = AvroConsumer({ 'bootstrap.servers': self.bootstrap_servers, 'group.id': self.group_id, 'schema.registry.url': self.schema_registry_url }) topic_partition = TopicPartition(self.topic, partition) topic_partition.offset = offset consumer.assign([topic_partition]) message = consumer.poll(10) consumer.close() if fft: dasfft = DasFft() message.value()['fft'] = dasfft.amplitudes_fft( message.value()['amplitudes']) self.log.info(f'End : __get_message({partition},{offset})') return message
def consumeToList(self): consConf = self.consumerConfig() consumer = AvroConsumer(consConf) consumer.subscribe([self.getTopic()]) messageList = list() i = 0 while i < 20: print(i) i += 1 try: msg = consumer.poll(1) # There were no messages on the queue, continue polling if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue record = json.loads(json.dumps(msg.value()), object_hook=lambda d: namedtuple( self.consumedObjectName, d.keys()) (*d.values())) messageList.append(record) except SerializerError as e: # Report malformed record, discard results, continue polling print("Message deserialization failed {}".format(e)) continue except KeyboardInterrupt: break print("Shutting down consumer..") consumer.close() return messageList
class KafkaConsumer: def __init__(self, kafka_brokers="", kafka_apikey="", topic_name="", schema_registry_url="", autocommit=True): self.kafka_brokers = kafka_brokers self.kafka_apikey = kafka_apikey self.topic_name = topic_name self.schema_registry_url = schema_registry_url self.kafka_auto_commit = autocommit # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md def prepareConsumer(self, groupID="pythonconsumers"): options = { 'bootstrap.servers': self.kafka_brokers, 'group.id': groupID, 'auto.offset.reset': 'earliest', 'schema.registry.url': self.schema_registry_url, 'enable.auto.commit': self.kafka_auto_commit, 'security.protocol': 'SASL_SSL', 'sasl.mechanisms': 'PLAIN', 'sasl.username': '******', 'sasl.password': self.kafka_apikey } # Print the configuration print("--- This is the configuration for the Avro consumer: ---") print(options) print("---------------------------------------------------") # Create the Avro consumer self.consumer = AvroConsumer(options) # Subscribe to the topic self.consumer.subscribe([self.topic_name]) def traceResponse(self, msg): print( '[Message] - Next message consumed from {} partition: [{}] at offset {} with key {}:\n\tvalue: {}' .format(msg.topic(), msg.partition(), msg.offset(), msg.key(), msg.value())) # Polls for next event def pollNextEvent(self): # Poll for messages msg = self.consumer.poll(timeout=10.0) # Validate the returned message if msg is None: print("[INFO] - No new messages on the topic") elif msg.error(): if ("PARTITION_EOF" in msg.error()): print("[INFO] - End of partition") else: print("[ERROR] - Consumer error: {}".format(msg.error())) else: # Print the message msgStr = self.traceResponse(msg) def close(self): self.consumer.close()
def __init__( self, topic_name_pattern, message_handler, is_avro=True, offset_earliest=False, sleep_secs=1.0, consume_timeout=0.1, ): """Creates a consumer object for asynchronous use""" self.topic_name_pattern = topic_name_pattern self.message_handler = message_handler self.sleep_secs = sleep_secs self.consume_timeout = consume_timeout self.offset_earliest = offset_earliest # # # TODO: Configure the broker properties below. Make sure to reference the project README # and use the Host URL for Kafka and Schema Registry! BROKER_URL = "PLAINTEXT://localhost:9092" SCHEMA_REGISTRY_URL = "http://localhost:8081" self.broker_properties = { "schema.registry.url": "http://localhost:8081", "broker.url": "PLAINTEXT://localhost:9092", "auto.offset.reset": "earliest" if offset_earliest else "latest" } # TODO: Create the Consumer, using the appropriate type. if is_avro is True: self.broker_properties[ "schema.registry.url"] = "http://localhost:8081" consumer = AvroConsumer( {"bootstrap.servers": self.broker_properties["broker.url"]}, schema_registry=self.broker_properties["schema.registry.url"]) else: consumer = Consumer({ "group.id": "0", "bootstrap.servers": "PLAINTEXT://localhost:9092", "enable.auto.commit": True, "default.topic.config": { "auto.offset.reset": "earliest" if offset_earliest else "latest" }, "enable.auto.offset.store": True, "message_handler": "message_handler" }) consumer.subscribe([topic_name_pattern], on_assign=on_assign) messages = consumer.poll(1, timeout=consume_timeout) for message in messages: if message is None: continue elif message.error() is not None: continue else: message_handler(message.key(), message.value())
class KafkaConsumer: def __init__(self, kafka_env = 'LOCAL', kafka_brokers = "", kafka_apikey = "", topic_name = "", schema_registry_url = "", autocommit = True): self.kafka_env = kafka_env self.kafka_brokers = kafka_brokers self.kafka_apikey = kafka_apikey self.topic_name = topic_name self.schema_registry_url = schema_registry_url self.kafka_auto_commit = autocommit # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md def prepareConsumer(self, groupID = "pythonconsumers"): options ={ 'bootstrap.servers': self.kafka_brokers, 'group.id': groupID, 'auto.offset.reset': 'earliest', 'schema.registry.url': self.schema_registry_url, 'enable.auto.commit': self.kafka_auto_commit, } if (self.kafka_env != 'LOCAL' and self.kafka_env != 'MINIKUBE'): options['security.protocol'] = 'SASL_SSL' options['sasl.mechanisms'] = 'PLAIN' options['sasl.username'] = '******' options['sasl.password'] = self.kafka_apikey if (self.kafka_env == 'ICP'): options['ssl.ca.location'] = os.environ['PEM_CERT'] options['schema.registry.ssl.ca.location'] = os.environ['PEM_CERT'] print("This is the configuration for the consumer:") print(options) self.consumer = AvroConsumer(options) self.consumer.subscribe([self.topic_name]) def traceResponse(self, msg): print('@@@ pollNextOrder {} partition: [{}] at offset {} with key {}:\n\tvalue: {}' .format(msg.topic(), msg.partition(), msg.offset(), msg.key(), msg.value() )) def pollNextEvent(self, keyID, keyname): gotIt = False while not gotIt: try: msg = self.consumer.poll(timeout=10.0) except SerializerError as e: print("Message deserialization failed for {}: {}".format(msg, e)) break if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) if ("PARTITION_EOF" in msg.error()): gotIt= True continue self.traceResponse(msg) if (msg.key()[keyname] == keyID): gotIt = True def close(self): self.consumer.close()
def consumer(config, topic): config = dict(config, **{ 'group.id': 'test_group_35', }) consumer = AvroConsumer(config) # Subscribe to topics/partitions, and seek to end. Following that we need # to poll until the topics have actually been assigned. def on_assign(consumer, partitions): for p in partitions: p.offset = OFFSET_END consumer.assign(partitions) consumer.subscribe([topic], on_assign=on_assign) consumer.poll(10) yield consumer consumer.commit() consumer.close()
class ConsumerSubscribe: log = logging.getLogger('Kafka ConsumerSubscribe') log.setLevel(logging.INFO) handler = logging.FileHandler('./consumerSubscribe.log') formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) log.addHandler(handler) def __init__(self, topic, group_id, bootstrap_servers=os.environ["BOOTSTRAP_SERVER"], schema_registry_url=os.environ["SCHEMA_REGISTRY_URL"], client_id='pythonClient', auto_offset_reset='latest'): self.consumer = AvroConsumer({ 'bootstrap.servers': bootstrap_servers, 'group.id': group_id, 'client.id': client_id, 'schema.registry.url': schema_registry_url, 'auto.offset.reset': auto_offset_reset }) self.consumer.subscribe([topic]) def __str__(self): sb = [] for key in self.__dict__: sb.append("{key}='{value}'".format(key=key, value=self.__dict__[key])) return ', '.join(sb) def __repr__(self): return self.__str__() def get_message(self, fft=False): self.log.info(f'Start : get_messages({fft})') message = self.consumer.poll(100) if fft: dasfft = DasFft() message.value()['fft'] = dasfft.amplitudes_fft(message.value()['amplitudes']) self.log.info(f'End : get_messages({fft})') return message def close(self): self.log.info(f'Start : close()') self.consumer.close() self.log.info(f'End : close()')
class HttpCheckConsumer: """Consume Kafka messages""" def __init__(self, config: KafkaConfig): self.config = config self.consumer = AvroConsumer({ "bootstrap.servers": KAFKA_BROKER, "group.id": "groupid", "schema.registry.url": KAFKA_SCHEMA_REGISTRY_URL, "auto.offset.reset": "smallest", "enable.auto.commit": False, }) self.consumer.subscribe([KAFKA_TOPIC]) @staticmethod def _process_message(message) -> HttpCheckResult: key = message.key() value = message.value() timestamp = datetime.datetime.fromisoformat(key["timestamp"]) status_code = value["status_code"] matches_regex = value["matches_regex"] response_time_seconds = value["response_time_seconds"] return HttpCheckResult( status_code=status_code, timestamp=timestamp, matches_regex=matches_regex, response_time_seconds=response_time_seconds, ) def consume(self) -> Generator[HttpCheckResult, None, None]: while True: try: msg = self.consumer.poll(1) except SerializerError as error: raise HttpCheckSerializerError( f"Message deserialization failed: {error}") if msg is None: continue if msg.error(): raise HttpCheckConsumerError("AvroConsumer error: {}".format( msg.error())) logger.debug(f"Offset: {msg.offset()}") yield self._process_message(msg) def commit(self): self.consumer.commit()
async def consume(broker_url, topic, schema_registry_url): schema_registry = CachedSchemaRegistryClient({"url": schema_registry_url}) conf = {"bootstrap.servers": broker_url, "group.id": "A"} c = AvroConsumer(conf, schema_registry=schema_registry) c.subscribe([topic]) while True: message = c.poll(1.) if message is None: print("no message received by consumer") elif message.error() is not None: print(f"error from consumer {message.error()}") else: try: print(message.value()) except KeyError as e: print(f"Failed to unpack message {e}") await asyncio.sleep(1.0)
def main(self): print("Creating Avro Consumer") c = AvroConsumer({ 'bootstrap.servers': KAFKA_BOOTSTRAP_SERVERS, 'group.id': 'example', 'default.topic.config': { 'auto.offset.reset': 'smallest' }, 'schema.registry.url': SCHEMA_REGISTRY_URL, 'enable.partition.eof': 'false' }) try: c.subscribe([INPUT_TOPIC], lambda consumer, partitions: print( "subscribed to " + INPUT_TOPIC + " num. partitions=" + str( len(partitions)))) print("Creating Avro Producer") p = AvroProducer({ 'bootstrap.servers': KAFKA_BOOTSTRAP_SERVERS, 'schema.registry.url': SCHEMA_REGISTRY_URL }) print("Running Mirror Loop") while self.running: msg = c.poll(1) try: if msg: if not msg.error(): print(msg) p.produce(topic=OUTPUT_TOPIC, key=None, value=msg.value(), key_schema=None, value_schema=SCHEMA) p.flush() else: print(msg.error()) self.running = False except SerializerError as e: print("Message deserialization failed for %s: %s" % (msg, e)) self.running = False finally: print("Closing consumer cleanly") c.close()
def consume(broker_url, schema_registry_url): schema_registry = CachedSchemaRegistryClient({"url": schema_registry_url}) conf = { "bootstrap.servers": broker_url, "group.id": "0", "auto.offset.reset": "earliest" } consumer = AvroConsumer(conf, schema_registry=schema_registry) consumer.subscribe(["^tracking.*"]) while True: msg = consumer.poll(1.0) if msg is None: print("No message received") else: print(msg.value())
def avro_consumer(urls, topics, uav_name): c = AvroConsumer(urls) c.subscribe(topics) check_time = 0 msges = [] c_topic = "" loop = len(topics) while True: try: msg = c.poll(10) except SerializerError as e: # print("Message deserialization failed for {}: {}".format(msg, e)) break if msg is None: continue if msg.error(): # print("AvroConsumer error: {}".format(msg.error())) continue m = msg.value() if (m["header"]['sourceSystem'])== uav_name: if check_time==0 or check_time==m["header"]["time"]: c.unsubscribe() check_time=m["header"]["time"] c_topic = msg.topic() d = topics.index(c_topic) del topics[d] msges.append(msg) loop = loop - 1 if loop==0: break c.subscribe(topics) c.close() # return the list of consumed avro messages (one for each topic - same timestamp) return(msges)
def consume(topic, conf): """ Consume User records """ from confluent_kafka.avro import AvroConsumer from confluent_kafka.avro.serializer import SerializerError print("Consuming user records from topic {} with group {}. ^c to exit.".format(topic, conf["group.id"])) c = AvroConsumer(conf, reader_value_schema=record_schema) c.subscribe([topic]) while True: try: msg = c.poll(1) # There were no messages on the queue, continue polling if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue record = User(msg.value()) print("name: {}\n\tfavorite_number: {}\n\tfavorite_color: {}\n".format( record.name, record.favorite_number, record.favorite_color)) except SerializerError as e: # Report malformed record, discard results, continue polling print("Message deserialization failed {}".format(e)) continue except KeyboardInterrupt: break print("Shutting down consumer..") c.close()
class KafkaWorker(BaseWorker): topic_name = None consumer_name = None consumer_settings = {} commit_on_complete = False async_commit = True poll_timeout = 0 auto_offset_reset = 'earliest' consumer = None last_message = None def setup(self): self.consumer = AvroConsumer(self.get_consumer_settings()) self.consumer.subscribe([self.get_topic_name()]) def teardown(self): if self.consumer: self.consumer.close() def get_topic_name(self): return self.topic_name or utils.config_missing('topic name') def get_consumer_name(self): return self.consumer_name or utils.generate_random_consumer_name() def get_consumer_settings(self): default_settings = { 'group.id': self.get_consumer_name(), 'default.topic.config': {'auto.offset.reset': self.auto_offset_reset}, 'enable.auto.commit': False, 'bootstrap.servers': utils.get_broker_url(), 'schema.registry.url': utils.get_schema_registry_url(), 'session.timeout.ms': 10000, 'heartbeat.interval.ms': 1000, 'api.version.request': True, } return utils.generate_client_settings(default_settings, self.consumer_settings) def poll(self): message = self.consumer.poll(timeout=self.poll_timeout) if message is not None: self.last_message = message return message def get_partitions(self): partitions = self.consumer.assignment() if not partitions: self.poll() partitions = self.consumer.assignment() return partitions def get_current_offsets(self): return self.consumer.position(self.get_partitions()) def reset_consumer_offsets(self, offset): self.consumer.assign([TopicPartition(tp.topic, tp.partition, offset) for tp in self.get_partitions()]) def seek_to_timestamp(self, timestamp): timestamp_ms = dt_to_unix_ms(timestamp) partitions = self.get_partitions() for tp in partitions: tp.offset = timestamp_ms partitions = self.consumer.offsets_for_times(partitions) self.consumer.assign(partitions) def handle(self): message = self.poll() if message is None: self.wait() elif message.error(): if message.error().code() == KafkaError._PARTITION_EOF: self.partition_eof(message) else: raise KafkaException(message.error()) else: self._consume(message) if self.commit_on_complete: self.commit() self.done() def commit(self): if not self.consumer_settings.get('enable.auto.commit'): self.consumer.commit(async=self.async_commit) def _consume(self, message): self.consume_message(MessageValue(message)) def consume_message(self, message): pass def partition_eof(self, message): pass