class KafkaBackend(BroadcastBackend): def __init__(self, url: str): self._servers = [urlparse(url).netloc] self._consumer_channels: typing.Set = set() async def connect(self) -> None: loop = asyncio.get_event_loop() self._producer = AIOKafkaProducer(loop=loop, bootstrap_servers=self._servers) self._consumer = AIOKafkaConsumer(loop=loop, bootstrap_servers=self._servers) await self._producer.start() await self._consumer.start() async def disconnect(self) -> None: await self._producer.stop() await self._consumer.stop() async def subscribe(self, channel: str) -> None: self._consumer_channels.add(channel) self._consumer.subscribe(topics=self._consumer_channels) async def unsubscribe(self, channel: str) -> None: await self._consumer.unsubscribe() async def publish(self, channel: str, message: typing.Any) -> None: await self._producer.send_and_wait(channel, message.encode("utf8")) async def next_published(self) -> Event: message = await self._consumer.getone() return Event(channel=message.topic, message=message.value.decode("utf8"))
async def onJoin(self): loop = asyncio.get_event_loop() for handler in self.handlers: # initialize handler handler_instance = handler() handler_instance.set_session(self) if hasattr(handler_instance, 'init'): await handler_instance.init() if hasattr(handler_instance, 'on_event'): self.log.debug("subscribing to topic %s", handler_instance.subscribe_topic) # Used with base handler defined subscribe_topic if handler_instance.subscribe_topic is not None: consumer = AIOKafkaConsumer( handler_instance.subscribe_topic, bootstrap_servers=self.transport_host, loop=loop) await consumer.start() self.log.debug("subscribed to topic: %s", handler_instance.subscribe_topic) try: async for msg in consumer: await handler_instance.on_event(msg.value) finally: await consumer.stop() else: # Used with config.json defined topics if self.subscribed_topics is not None: consumer = AIOKafkaConsumer( bootstrap_servers=self.transport_host, loop=loop, group_id='my-group') await consumer.start() # Subscribe to all topics for topic in self.subscribed_topics: consumer.subscribe(topic) try: async for msg in consumer: value = msg.value.decode() await handler_instance.on_event(value) except Exception as error: self.log.error("Consumer error. %s", error) await asyncio.sleep(0) if hasattr(handler_instance, 'worker'): while True: try: await handler_instance.worker() except Exception as error: self.log.error("Operation failed. %s", error) traceback.print_exc(file=sys.stdout) continue
async def consume(): consumer = AIOKafkaConsumer( loop=loop, bootstrap_servers='localhost:9092', metadata_max_age_ms=5000, group_id="test2") consumer.subscribe(pattern="test*") # Get cluster layout and topic/partition allocation await consumer.start() try: async for msg in consumer: print(msg.value) finally: await consumer.stop()
async def consume(): consumer = AIOKafkaConsumer(loop=loop, bootstrap_servers='localhost:9092', metadata_max_age_ms=5000, group_id="test2") consumer.subscribe(pattern="test*") # Get cluster layout and topic/partition allocation await consumer.start() try: async for msg in consumer: print(msg.value) finally: await consumer.stop()
class Consumer(object): def __init__(self, topics, loop=None, worker=lambda data: print(data), **kwargs): self.topics = topics self.worker = worker self._consumer = None conn_settings = { "bootstrap_servers": app_settings['kafka']['brokers'], 'loop': loop or asyncio.get_event_loop(), } conn_settings.update(kwargs) conn_settings.update(app_settings['kafka'].get( 'consumer_connection_settings', {})) self.config = conn_settings async def init(self): if self._consumer is None: self._consumer = AIOKafkaConsumer(**self.config) if isinstance(self.topics, str): self._consumer.subscribe(pattern=self.topics) if isinstance(self.topics, (list, set, tuple)): self._consumer.subscribe(topics=self.topics) await self._consumer.start() return self._consumer @property def has_regex_topic(self): return isinstance(self.topics, str) @property def is_ready(self): return self._consumer is not None async def get(self, max_records=1, within=60 * 1000): return await self._consumer.getmany(timeout_ms=within, max_records=max_records) async def stop(self): return await self._consumer.stop()
async def _run(self, arguments, app): self.tasks = [] worker_names = arguments.consumer_worker if isinstance(worker_names, str): # we could just specify one here worker_names = [worker_names] conn_settings = { "api_version": arguments.api_version, "bootstrap_servers": app_settings["kafka"]["brokers"], "loop": self.get_loop(), } conn_settings.update(app_settings["kafka"].get( "consumer_connection_settings", {})) for worker_name in worker_names: worker_conf = self.init_worker_conf(worker_name, arguments) topic_prefix = app_settings["kafka"].get("topic_prefix", "") worker_conn_settings = { **conn_settings, **(getattr(worker_conf["handler"], "connection_settings", {}) or {}), **(worker_conf.get("connection_settings") or {}), } if worker_conf.get("regex_topic"): consumer = AIOKafkaConsumer(group_id=worker_conf.get( "group", "default"), **worker_conn_settings) self.tasks.append( self.run_consumer(self._get_worker(consumer, worker_conf), consumer, worker_conf)) else: for topic in worker_conf["topics"]: topic_id = f"{topic_prefix}{topic}" group_id = worker_conf.get( "group", "default").format(topic=topic_id) consumer = AIOKafkaConsumer(group_id=group_id, **worker_conn_settings) worker = self._get_worker(consumer, worker_conf) listener = ConsumerGroupeRebalancer(consumer=consumer, worker=worker) consumer.subscribe(topics=[topic_id], listener=listener) self.tasks.append( self.run_consumer(worker, consumer, worker_conf)) asyncio.create_task(asyncio.wait(self.tasks))
async def _do_some_work(self, work, topics, group_id, offsets, listener, bootstrap_servers, enable_commit, **kwargs): consumer = AIOKafkaConsumer(loop=self.loop, bootstrap_servers=bootstrap_servers, group_id=group_id, fetch_max_wait_ms=self.fetch_max_wait_ms, max_partition_fetch_bytes=self.max_partition_fetch_bytes, request_timeout_ms=self.request_timeout_ms, auto_offset_reset=self.auto_offset_reset, enable_auto_commit=self.enable_auto_commit, auto_commit_interval_ms=self.auto_commit_interval_ms, check_crcs=self.check_crcs, metadata_max_age_ms=self.metadata_max_age_ms, heartbeat_interval_ms=self.heartbeat_interval_ms, session_timeout_ms=self.session_timeout_ms, exclude_internal_topics=self.exclude_internal_topics, connections_max_idle_ms=self.connections_max_idle_ms, **kwargs) consumer.subscribe(topics=topics, listener=listener) await consumer.start() if offsets is not None: await self._seek_offsets(consumer, topics, offsets) try: async for msg in consumer: try: if msg is None: continue await work(msg) if enable_commit: meta = "Some utf-8 metadata" tp = TopicPartition(msg.topic, msg.partition) offsets = {tp: OffsetAndMetadata(msg.offset + 1, meta)} await consumer.commit(offsets) except OffsetOutOfRangeError as err: tps = err.args[0].keys() await consumer.seek_to_beginning(*tps) continue except Exception as e: root_logger.error(f'{traceback.format_exc()}') continue except Exception as e: raise e finally: await consumer.stop()
async def consume(loop): consumer = AIOKafkaConsumer( loop=loop, bootstrap_servers='localhost:9092', group_id="my_group", # Consumer must be in a group to commit enable_auto_commit=False, # Will disable autocommit auto_offset_reset="none", key_deserializer=lambda key: key.decode("utf-8") if key else "", ) await consumer.start() local_state = LocalState() listener = RebalanceListener(consumer, local_state) consumer.subscribe(topics=["test"], listener=listener) save_task = loop.create_task(save_state_every_second(local_state)) try: while True: try: msg_set = await consumer.getmany(timeout_ms=1000) except OffsetOutOfRangeError as err: # This means that saved file is outdated and should be # discarded tps = err.args[0].keys() local_state.discard_state(tps) await consumer.seek_to_beginning(*tps) continue for tp, msgs in msg_set.items(): counts = Counter() for msg in msgs: print("Process", tp, msg.key) counts[msg.key] += 1 local_state.add_counts(tp, counts, msg.offset) finally: await consumer.stop() save_task.cancel() await save_task
async def consume(loop): consumer = AIOKafkaConsumer( loop=loop, bootstrap_servers='localhost:9092', group_id="my_group", # Consumer must be in a group to commit enable_auto_commit=False, # Will disable autocommit auto_offset_reset="none", key_deserializer=lambda key: key.decode("utf-8") if key else "", ) await consumer.start() local_state = LocalState() listener = RebalanceListener(consumer, local_state) consumer.subscribe(topics=["test"], listener=listener) save_task = loop.create_task(save_state_every_second(local_state)) try: while True: try: msg_set = await consumer.getmany(timeout_ms=1000) except OffsetOutOfRangeError as err: # This means that saved file is outdated and should be # discarded tps = err.args[0].keys() local_state.discard_state(tps) await consumer.seek_to_beginning(*tps) continue for tp, msgs in msg_set.items(): counts = Counter() for msg in msgs: print("Process", tp, msg.key) counts[msg.key] += 1 local_state.add_counts(tp, counts, msg.offset) finally: await consumer.stop() save_task.cancel() await save_task
async def main(): consumer = AIOKafkaConsumer( group_id="dev-group", loop=asyncio.get_event_loop(), enable_auto_commit=False, bootstrap_servers="localhost:9092", ) # listener = ConsumerGroupeRebalancer(consumer=consumer) # consumer.subscribe(topics=["dev-topic"], listener=listener) consumer.subscribe(topics=["dev-topic"]) await consumer.start() try: print("Ready !!!") # while True: # msgs = consumer.getmany(timeout_ms=200, max_records=5) # print(f"Got {len(msgs)} messages ") async for msg in consumer: print(msg) tp = TopicPartition(msg.topic, msg.partition) await consumer.commit({tp: msg.offset + 1}) finally: await consumer.stop()
async def consume_events(app: web.Application) -> None: """The main Kafka consumer, which routes messages to processing functions or tasks. """ logger = structlog.get_logger(app["safir/config"].logger_name) registry = RegistryApi( session=app["safir/http_session"], url=app["safir/config"].schema_registry_url, ) deserializer = Deserializer(registry=registry) consumer_settings = { "bootstrap_servers": app["safir/config"].kafka_broker_url, "group_id": app["safir/config"].kafka_consumer_group_id, "auto_offset_reset": "latest", "security_protocol": app["safir/config"].kafka_protocol, } if consumer_settings["security_protocol"] == "SSL": consumer_settings["ssl_context"] = app["safir/kafka_ssl_context"] consumer = AIOKafkaConsumer( loop=asyncio.get_event_loop(), **consumer_settings ) topic_names = get_configured_topics(app) scheduler = await aiojobs.create_scheduler() try: await consumer.start() logger.info("Started Kafka consumer") logger.info("Subscribing to Kafka topics", names=topic_names) consumer.subscribe(topic_names) partitions = consumer.assignment() while len(partitions) == 0: # Wait for the consuemr to get partition assignment await asyncio.sleep(1.0) partitions = consumer.assignment() logger.info( "Got initial partition assignment for Kafka topics", partitions=[str(p) for p in partitions], ) async for message in consumer: try: value_info = await deserializer.deserialize( message.value, include_schema=True ) except Exception: logger.exception( "Failed to deserialize a Kafka message value", topic=message.topic, partition=message.partition, offset=message.offset, ) continue try: await route_message( app=app, scheduler=scheduler, message=value_info["message"], schema_id=value_info["id"], schema=value_info["schema"], topic=message.topic, partition=message.partition, offset=message.offset, ) except Exception: logger.exception( "Failed to route a Kafka message", topic=message.topic, partition=message.partition, offset=message.offset, ) except asyncio.CancelledError: logger.info("consume_events task got cancelled") finally: logger.info("consume_events task cancelling") await consumer.stop() await scheduler.close()
async def _run_consumer(topic_queue): control = {} control_disposables = {} topics = {} # context of each subscribed topic def on_next_control(obv, i): nonlocal control control[obv] = i def on_partition_subscribe(tp_context, observer, scheduler): tp_context.observer = observer if feed_mode is DataFeedMode.PULL: observer.on_next( functools.partial(on_partition_back, tp_context.tp)) def on_revoked(tps): inactive_topics = {} for topic in topics: inactive_topics[topic] = False for tp in tps: topics[tp.topic].partitions[ tp.partition].observer.on_completed() del topics[tp.topic].partitions[tp.partition] if len(topics[tp.topic].partitions) == 0: inactive_topics[tp.topic] == True all_inactive = [inactive_topics[s] for s in inactive_topics] if all(all_inactive): topic_queue.put_nowait(RevokedCmd()) def on_assigned(tps): for tp in tps: context = TopicPartitionContext() context.tp = tp topics[tp.topic].partitions[tp] = context topics[tp.topic].observer.on_next( rx.create( functools.partial(on_partition_subscribe, context))) topic_queue.put_nowait(AssignedCmd()) async def tp_is_completed(topic_partition): if source_type is DataSourceType.BATCH: highwater = client.highwater(topic_partition) if highwater: position = await client.position(topic_partition) if highwater == position: print("no more lag on {}-{}".format( topic_partition.topic, topic_partition.partition)) topics[topic_partition.topic].partitions[ topic_partition].completed = True return True return False async def process_next_batch(topic_partition, count): tp = [topic_partition] if topic_partition else [] read_count = 0 if count == 1: msg = await client.getone(*tp) if topic_partition is None: topic_partition = TopicPartition(msg.topic, msg.partition) topic = topics[topic_partition.topic] decoded_msg = topic.decode(msg.value) topic.partitions[topic_partition].observer.on_next(decoded_msg) read_count += 1 else: data = await client.getmany(*tp, timeout_ms=5000, max_records=count) if len(data) > 0: msgs = data[topic_partition] topic = topics[topic_partition.topic] for msg in msgs: decoded_msg = topic.decode(msg.value) topic.partitions[topic_partition].observer.on_next( decoded_msg) read_count += 1 return read_count try: client = AIOKafkaConsumer( loop=loop, bootstrap_servers=server, group_id=group, auto_offset_reset='latest', enable_auto_commit=True, max_partition_fetch_bytes=max_partition_fetch_bytes, ) print("start kafka consumer") await client.start() partition_assigned = False yield_countdown = 5000 prev_partition = None pcount = 0 while True: try: cmd = topic_queue.get_nowait() except asyncio.QueueEmpty as e: print("queue empty") cmd = await topic_queue.get() #if len(topics) == 0 or not topic_queue.empty(): #cmd = await topic_queue.get() if type(cmd) is AddConsumerCmd: print('run consumer: add {}'.format(cmd.consumer.topic)) if cmd.consumer.topic in topics: source_observer.on_error( ValueError( "topic already subscribed for this consumer: {}" .format(cmd.consumer.decode))) break if cmd.consumer.control is not None: control_disposables[ cmd.observer] = cmd.consumer.control.subscribe( on_next=functools.partial( on_next_control, cmd.observer), on_error=source_observer.on_error, ) topics[cmd.consumer.topic] = TopicContext( observer=cmd.observer, topic=cmd.consumer.topic, decode=cmd.consumer.decode, start_from=cmd.consumer.start_from, partitions={}) sub_start_positions = {} sub_topics = [] for k, c in topics.items(): sub_topics.append(c.topic) sub_start_positions[c.topic] = c.start_from sub_topics = set(sub_topics) client.subscribe(topics=sub_topics, listener=ConsumerRebalancer( client, sub_start_positions, on_revoked=on_revoked, on_assigned=on_assigned, )) elif type(cmd) is DelConsumerCmd: print('run consumer: del {}'.format(cmd)) topic = topics[cmd.topic] disposable = control_disposables.pop(topic.observer, None) if disposable is not None: disposable.dispose() topics.pop(cmd.topic) sub_start_positions = {} sub_topics = [] for k, c in topics.items(): sub_topics.append(c.topic) sub_start_positions[c.topic] = c.start_from sub_topics = set(sub_topics) if len(sub_topics) > 0: client.subscribe(topics=sub_topics, listener=ConsumerRebalancer( client, sub_start_positions, on_revoked=on_revoked, on_assigned=on_assigned, )) topic.observer.on_completed() elif type(cmd) is PullTopicPartitionCmd: no_lag = await tp_is_completed(cmd.topic_partition) if source_type is DataSourceType.BATCH and no_lag == True: topic = topics[cmd.topic_partition.topic] topic.partitions[ cmd.topic_partition].observer.on_completed() if all( [i.completed for _, i in topic.partitions.items()]): print("completed processing topic {}".format( cmd.topic_partition.topic)) topic.observer.on_completed() else: await process_next_batch(cmd.topic_partition, cmd.count) elif type(cmd) is PushRecordCmd: read_count = await process_next_batch(None, 1) if read_count > 0: topic_queue.put_nowait(PushRecordCmd()) elif type(cmd) is AssignedCmd: if partition_assigned is False: partition_assigned = True if feed_mode is DataFeedMode.PUSH: topic_queue.put_nowait(PushRecordCmd()) elif type(cmd) is RevokedCmd: partition_assigned = False else: source_observer.on_error( TypeError( "invalid type for queue command: {}".format(cmd))) if len(topics) == 0: print("no more topic subscribed, ending consumer task") break regulated = False for topic, consumer in topics.items(): regulation_time = control.get(consumer.observer, None) if regulation_time is not None and regulation_time > 0: await asyncio.sleep(regulation_time) regulated = True yield_countdown = 5000 control[consumer.observer] = None break # limitation only one controllable topic for now yield_countdown -= 1 if yield_countdown == 0 and regulated is False: await asyncio.sleep(0) yield_countdown = 5000 await client.stop() except asyncio.CancelledError as e: print("cancelled {}".format(e)) except Exception as e: print("consummer exception: {}:{}".format(type(e), e)) print(traceback.format_list(traceback.extract_tb(e.__traceback__))) raise e
async def consume_kafka(app): """Consume Kafka messages directed to templatebot's functionality.""" logger = structlog.get_logger(app["root"]["api.lsst.codes/loggerName"]) registry = RegistryApi( session=app["root"]["api.lsst.codes/httpSession"], url=app["root"]["templatebot/registryUrl"], ) deserializer = Deserializer(registry=registry) consumer_settings = { "bootstrap_servers": app["root"]["templatebot/brokerUrl"], "group_id": app["root"]["templatebot/slackGroupId"], "auto_offset_reset": "latest", "ssl_context": app["root"]["templatebot/kafkaSslContext"], "security_protocol": app["root"]["templatebot/kafkaProtocol"], } consumer = AIOKafkaConsumer(loop=asyncio.get_event_loop(), **consumer_settings) try: await consumer.start() logger.info("Started Kafka consumer", **consumer_settings) topic_names = [ app["root"]["templatebot/appMentionTopic"], app["root"]["templatebot/messageImTopic"], app["root"]["templatebot/interactionTopic"], ] logger.info("Subscribing to Kafka topics", names=topic_names) consumer.subscribe(topic_names) logger.info("Finished subscribing ot Kafka topics", names=topic_names) partitions = consumer.assignment() logger.info("Waiting on partition assignment", names=topic_names) while len(partitions) == 0: # Wait for the consumer to get partition assignment await asyncio.sleep(1.0) partitions = consumer.assignment() logger.info( "Initial partition assignment", partitions=[str(p) for p in partitions], ) async for message in consumer: logger.info( "Got Kafka message from sqrbot", topic=message.topic, partition=message.partition, offset=message.offset, ) try: message_info = await deserializer.deserialize(message.value) except Exception: logger.exception( "Failed to deserialize a message", topic=message.topic, partition=message.partition, offset=message.offset, ) continue event = message_info["message"] logger.debug( "New message", topic=message.topic, partition=message.partition, offset=message.offset, contents=event, ) try: await route_event( event=message_info["message"], app=app, schema_id=message_info["id"], topic=message.topic, partition=message.partition, offset=message.offset, ) except Exception: logger.exception( "Failed to handle message", topic=message.topic, partition=message.partition, offset=message.offset, ) except asyncio.CancelledError: logger.info("consume_kafka task got cancelled") finally: logger.info("consume_kafka task cancelling") await consumer.stop()
class MsgKafka(MsgBase): def __init__(self, logger_name='msg', lock=False): super().__init__(logger_name, lock) self.host = None self.port = None self.consumer = None self.producer = None self.loop = None self.broker = None self.group_id = None def connect(self, config): try: if "logger_name" in config: self.logger = logging.getLogger(config["logger_name"]) self.host = config["host"] self.port = config["port"] self.loop = config.get("loop") or asyncio.get_event_loop() self.broker = str(self.host) + ":" + str(self.port) self.group_id = config.get("group_id") except Exception as e: # TODO refine raise MsgException(str(e)) def disconnect(self): try: pass # self.loop.close() except Exception as e: # TODO refine raise MsgException(str(e)) def write(self, topic, key, msg): """ Write a message at kafka bus :param topic: message topic, must be string :param key: message key, must be string :param msg: message content, can be string or dictionary :return: None or raises MsgException on failing """ retry = 2 # Try two times while retry: try: self.loop.run_until_complete( self.aiowrite(topic=topic, key=key, msg=msg)) break except Exception as e: retry -= 1 if retry == 0: raise MsgException("Error writing {} topic: {}".format( topic, str(e))) def read(self, topic): """ Read from one or several topics. :param topic: can be str: single topic; or str list: several topics :return: topic, key, message; or None """ try: return self.loop.run_until_complete(self.aioread(topic, self.loop)) except MsgException: raise except Exception as e: raise MsgException("Error reading {} topic: {}".format( topic, str(e))) async def aiowrite(self, topic, key, msg, loop=None): """ Asyncio write :param topic: str kafka topic :param key: str kafka key :param msg: str or dictionary kafka message :param loop: asyncio loop. To be DEPRECATED! in near future!!! loop must be provided inside config at connect :return: None """ if not loop: loop = self.loop try: self.producer = AIOKafkaProducer(loop=loop, key_serializer=str.encode, value_serializer=str.encode, bootstrap_servers=self.broker) await self.producer.start() await self.producer.send(topic=topic, key=key, value=yaml.safe_dump( msg, default_flow_style=True)) except Exception as e: raise MsgException( "Error publishing topic '{}', key '{}': {}".format( topic, key, e)) finally: await self.producer.stop() async def aioread(self, topic, loop=None, callback=None, aiocallback=None, group_id=None, from_beginning=None, **kwargs): """ Asyncio read from one or several topics. :param topic: can be str: single topic; or str list: several topics :param loop: asyncio loop. To be DEPRECATED! in near future!!! loop must be provided inside config at connect :param callback: synchronous callback function that will handle the message in kafka bus :param aiocallback: async callback function that will handle the message in kafka bus :param group_id: kafka group_id to use. Can be False (set group_id to None), None (use general group_id provided at connect inside config), or a group_id string :param from_beginning: if True, messages will be obtained from beginning instead of only new ones. If group_id is supplied, only the not processed messages by other worker are obtained. If group_id is None, all messages stored at kafka are obtained. :param kwargs: optional keyword arguments for callback function :return: If no callback defined, it returns (topic, key, message) """ if not loop: loop = self.loop if group_id is False: group_id = None elif group_id is None: group_id = self.group_id try: if isinstance(topic, (list, tuple)): topic_list = topic else: topic_list = (topic, ) self.consumer = AIOKafkaConsumer( loop=loop, bootstrap_servers=self.broker, group_id=group_id, auto_offset_reset="earliest" if from_beginning else "latest") await self.consumer.start() self.consumer.subscribe(topic_list) async for message in self.consumer: if callback: callback(message.topic, yaml.safe_load(message.key), yaml.safe_load(message.value), **kwargs) elif aiocallback: await aiocallback(message.topic, yaml.safe_load(message.key), yaml.safe_load(message.value), **kwargs) else: return message.topic, yaml.safe_load( message.key), yaml.safe_load(message.value) except KafkaError as e: raise MsgException(str(e)) finally: await self.consumer.stop()
class MsgKafka(MsgBase): def __init__(self, logger_name='msg'): self.logger = logging.getLogger(logger_name) self.host = None self.port = None self.consumer = None self.producer = None def connect(self, config): try: if "logger_name" in config: self.logger = logging.getLogger(config["logger_name"]) self.host = config["host"] self.port = config["port"] self.loop = asyncio.get_event_loop() self.broker = str(self.host) + ":" + str(self.port) except Exception as e: # TODO refine raise MsgException(str(e)) def disconnect(self): try: self.loop.close() except Exception as e: # TODO refine raise MsgException(str(e)) def write(self, topic, key, msg): try: self.loop.run_until_complete( self.aiowrite(topic=topic, key=key, msg=yaml.safe_dump(msg, default_flow_style=True), loop=self.loop)) except Exception as e: raise MsgException("Error writing {} topic: {}".format( topic, str(e))) def read(self, topic): """ Read from one or several topics. it is non blocking returning None if nothing is available :param topic: can be str: single topic; or str list: several topics :return: topic, key, message; or None """ try: return self.loop.run_until_complete(self.aioread(topic, self.loop)) except MsgException: raise except Exception as e: raise MsgException("Error reading {} topic: {}".format( topic, str(e))) async def aiowrite(self, topic, key, msg, loop=None): if not loop: loop = self.loop try: self.producer = AIOKafkaProducer(loop=loop, key_serializer=str.encode, value_serializer=str.encode, bootstrap_servers=self.broker) await self.producer.start() await self.producer.send(topic=topic, key=key, value=yaml.safe_dump( msg, default_flow_style=True)) except Exception as e: raise MsgException( "Error publishing topic '{}', key '{}': {}".format( topic, key, e)) finally: await self.producer.stop() async def aioread(self, topic, loop=None, callback=None, *args): """ Asyncio read from one or several topics. It blocks :param topic: can be str: single topic; or str list: several topics :param loop: asyncio loop :callback: callback function that will handle the message in kafka bus :*args: optional arguments for callback function :return: topic, key, message """ if not loop: loop = self.loop try: if isinstance(topic, (list, tuple)): topic_list = topic else: topic_list = (topic, ) self.consumer = AIOKafkaConsumer(loop=loop, bootstrap_servers=self.broker) await self.consumer.start() self.consumer.subscribe(topic_list) async for message in self.consumer: if callback: callback(message.topic, yaml.load(message.key), yaml.load(message.value), *args) else: return message.topic, yaml.load(message.key), yaml.load( message.value) except KafkaError as e: raise MsgException(str(e)) finally: await self.consumer.stop()
async def consume_events(app): """Consume events from templatebot-related topics in SQuaRE Events (Kafka). Notes ----- Templatebot has *two* Kafka consumers. This is one, and the other is in `templatebot.slack`. The Slack consumer only listens to topics from Slack (SQuaRE Bot), and is focused on responding to Slack-based workflows. This consumer is focused on backend-driven events, such as the ``templatebot-render_ready`` topic. """ logger = structlog.get_logger(app["root"]["api.lsst.codes/loggerName"]) registry = RegistryApi( session=app["root"]["api.lsst.codes/httpSession"], url=app["root"]["templatebot/registryUrl"], ) deserializer = Deserializer(registry=registry) consumer_settings = { "bootstrap_servers": app["root"]["templatebot/brokerUrl"], "group_id": app["root"]["templatebot/eventsGroupId"], "auto_offset_reset": "latest", "ssl_context": app["root"]["templatebot/kafkaSslContext"], "security_protocol": app["root"]["templatebot/kafkaProtocol"], } consumer = AIOKafkaConsumer(loop=asyncio.get_event_loop(), **consumer_settings) try: await consumer.start() logger.info("Started Kafka consumer for events", **consumer_settings) topic_names = [app["root"]["templatebot/renderreadyTopic"]] logger.info("Subscribing to Kafka topics", names=topic_names) consumer.subscribe(topic_names) partitions = consumer.assignment() while len(partitions) == 0: # Wait for the consumer to get partition assignment await asyncio.sleep(1.0) partitions = consumer.assignment() logger.info( "Initial partition assignment for event topics", partitions=[str(p) for p in partitions], ) async for message in consumer: try: message_info = await deserializer.deserialize( message.value, include_schema=True) except Exception: logger.exception( "Failed to deserialize an event message", topic=message.topic, partition=message.partition, offset=message.offset, ) continue event = message_info["message"] logger.debug( "New event message", topic=message.topic, partition=message.partition, offset=message.offset, contents=event, ) try: await route_event( app=app, event=message_info["message"], schema_id=message_info["id"], schema=message_info["schema"], topic=message.topic, partition=message.partition, offset=message.offset, ) except Exception: logger.exception( "Failed to handle event message", topic=message.topic, partition=message.partition, offset=message.offset, ) except asyncio.CancelledError: logger.info("consume_events task got cancelled") finally: logger.info("consume_events task cancelling") await consumer.stop()
class MsgKafka(MsgBase): def __init__(self, logger_name='msg'): self.logger = logging.getLogger(logger_name) self.host = None self.port = None self.consumer = None self.producer = None # create a different file for each topic #self.files = {} def connect(self, config): try: if "logger_name" in config: self.logger = logging.getLogger(config["logger_name"]) self.host = config["host"] self.port = config["port"] self.topic_lst = [] self.loop = asyncio.get_event_loop() self.broker = str(self.host) + ":" + str(self.port) except Exception as e: # TODO refine raise MsgException(str(e)) def write(self, topic, key, msg): try: self.loop.run_until_complete( self.aiowrite(topic=topic, key=key, msg=yaml.safe_dump(msg, default_flow_style=True))) except Exception as e: raise MsgException("Error writing {} topic: {}".format( topic, str(e))) def read(self, topic): #self.topic_lst.append(topic) try: return self.loop.run_until_complete(self.aioread(topic)) except Exception as e: raise MsgException("Error reading {} topic: {}".format( topic, str(e))) async def aiowrite(self, topic, key, msg, loop=None): try: if not loop: loop = self.loop self.producer = AIOKafkaProducer(loop=loop, key_serializer=str.encode, value_serializer=str.encode, bootstrap_servers=self.broker) await self.producer.start() await self.producer.send(topic=topic, key=key, value=msg) except Exception as e: raise MsgException("Error publishing to {} topic: {}".format( topic, str(e))) finally: await self.producer.stop() async def aioread(self, topic, loop=None): if not loop: loop = self.loop self.consumer = AIOKafkaConsumer(loop=loop, bootstrap_servers=self.broker) await self.consumer.start() self.consumer.subscribe([topic]) try: async for message in self.consumer: return yaml.load(message.key), yaml.load(message.value) except KafkaError as e: raise MsgException(str(e)) finally: await self.consumer.stop()