Beispiel #1
0
class KafkaBackend(BroadcastBackend):
    def __init__(self, url: str):
        self._servers = [urlparse(url).netloc]
        self._consumer_channels: typing.Set = set()

    async def connect(self) -> None:
        loop = asyncio.get_event_loop()
        self._producer = AIOKafkaProducer(loop=loop,
                                          bootstrap_servers=self._servers)
        self._consumer = AIOKafkaConsumer(loop=loop,
                                          bootstrap_servers=self._servers)
        await self._producer.start()
        await self._consumer.start()

    async def disconnect(self) -> None:
        await self._producer.stop()
        await self._consumer.stop()

    async def subscribe(self, channel: str) -> None:
        self._consumer_channels.add(channel)
        self._consumer.subscribe(topics=self._consumer_channels)

    async def unsubscribe(self, channel: str) -> None:
        await self._consumer.unsubscribe()

    async def publish(self, channel: str, message: typing.Any) -> None:
        await self._producer.send_and_wait(channel, message.encode("utf8"))

    async def next_published(self) -> Event:
        message = await self._consumer.getone()
        return Event(channel=message.topic,
                     message=message.value.decode("utf8"))
Beispiel #2
0
    async def onJoin(self):
        loop = asyncio.get_event_loop()

        for handler in self.handlers:
            # initialize handler
            handler_instance = handler()
            handler_instance.set_session(self)

            if hasattr(handler_instance, 'init'):
                await handler_instance.init()

            if hasattr(handler_instance, 'on_event'):
                self.log.debug("subscribing to topic %s",
                               handler_instance.subscribe_topic)
                # Used with base handler defined subscribe_topic
                if handler_instance.subscribe_topic is not None:
                    consumer = AIOKafkaConsumer(
                        handler_instance.subscribe_topic,
                        bootstrap_servers=self.transport_host,
                        loop=loop)
                    await consumer.start()
                    self.log.debug("subscribed to topic: %s",
                                   handler_instance.subscribe_topic)

                    try:
                        async for msg in consumer:
                            await handler_instance.on_event(msg.value)
                    finally:
                        await consumer.stop()
                else:
                    # Used with config.json defined topics
                    if self.subscribed_topics is not None:
                        consumer = AIOKafkaConsumer(
                            bootstrap_servers=self.transport_host,
                            loop=loop,
                            group_id='my-group')
                        await consumer.start()

                        # Subscribe to all topics
                        for topic in self.subscribed_topics:
                            consumer.subscribe(topic)

                        try:
                            async for msg in consumer:
                                value = msg.value.decode()
                                await handler_instance.on_event(value)
                        except Exception as error:
                            self.log.error("Consumer error. %s", error)
                            await asyncio.sleep(0)

            if hasattr(handler_instance, 'worker'):
                while True:
                    try:
                        await handler_instance.worker()
                    except Exception as error:
                        self.log.error("Operation failed. %s", error)
                        traceback.print_exc(file=sys.stdout)
                        continue
async def consume():
    consumer = AIOKafkaConsumer(
        loop=loop, bootstrap_servers='localhost:9092',
        metadata_max_age_ms=5000, group_id="test2")
    consumer.subscribe(pattern="test*")
    # Get cluster layout and topic/partition allocation
    await consumer.start()
    try:
        async for msg in consumer:
            print(msg.value)
    finally:
        await consumer.stop()
async def consume():
    consumer = AIOKafkaConsumer(loop=loop,
                                bootstrap_servers='localhost:9092',
                                metadata_max_age_ms=5000,
                                group_id="test2")
    consumer.subscribe(pattern="test*")
    # Get cluster layout and topic/partition allocation
    await consumer.start()
    try:
        async for msg in consumer:
            print(msg.value)
    finally:
        await consumer.stop()
Beispiel #5
0
class Consumer(object):
    def __init__(self,
                 topics,
                 loop=None,
                 worker=lambda data: print(data),
                 **kwargs):

        self.topics = topics
        self.worker = worker
        self._consumer = None

        conn_settings = {
            "bootstrap_servers": app_settings['kafka']['brokers'],
            'loop': loop or asyncio.get_event_loop(),
        }
        conn_settings.update(kwargs)
        conn_settings.update(app_settings['kafka'].get(
            'consumer_connection_settings', {}))

        self.config = conn_settings

    async def init(self):
        if self._consumer is None:
            self._consumer = AIOKafkaConsumer(**self.config)
            if isinstance(self.topics, str):
                self._consumer.subscribe(pattern=self.topics)
            if isinstance(self.topics, (list, set, tuple)):
                self._consumer.subscribe(topics=self.topics)
            await self._consumer.start()
        return self._consumer

    @property
    def has_regex_topic(self):
        return isinstance(self.topics, str)

    @property
    def is_ready(self):
        return self._consumer is not None

    async def get(self, max_records=1, within=60 * 1000):
        return await self._consumer.getmany(timeout_ms=within,
                                            max_records=max_records)

    async def stop(self):
        return await self._consumer.stop()
Beispiel #6
0
    async def _run(self, arguments, app):
        self.tasks = []
        worker_names = arguments.consumer_worker
        if isinstance(worker_names, str):
            # we could just specify one here
            worker_names = [worker_names]

        conn_settings = {
            "api_version": arguments.api_version,
            "bootstrap_servers": app_settings["kafka"]["brokers"],
            "loop": self.get_loop(),
        }
        conn_settings.update(app_settings["kafka"].get(
            "consumer_connection_settings", {}))

        for worker_name in worker_names:
            worker_conf = self.init_worker_conf(worker_name, arguments)
            topic_prefix = app_settings["kafka"].get("topic_prefix", "")
            worker_conn_settings = {
                **conn_settings,
                **(getattr(worker_conf["handler"], "connection_settings", {}) or {}),
                **(worker_conf.get("connection_settings") or {}),
            }
            if worker_conf.get("regex_topic"):
                consumer = AIOKafkaConsumer(group_id=worker_conf.get(
                    "group", "default"),
                                            **worker_conn_settings)
                self.tasks.append(
                    self.run_consumer(self._get_worker(consumer, worker_conf),
                                      consumer, worker_conf))
            else:
                for topic in worker_conf["topics"]:
                    topic_id = f"{topic_prefix}{topic}"
                    group_id = worker_conf.get(
                        "group", "default").format(topic=topic_id)
                    consumer = AIOKafkaConsumer(group_id=group_id,
                                                **worker_conn_settings)
                    worker = self._get_worker(consumer, worker_conf)
                    listener = ConsumerGroupeRebalancer(consumer=consumer,
                                                        worker=worker)
                    consumer.subscribe(topics=[topic_id], listener=listener)
                    self.tasks.append(
                        self.run_consumer(worker, consumer, worker_conf))
        asyncio.create_task(asyncio.wait(self.tasks))
Beispiel #7
0
 async def _do_some_work(self, work, topics, group_id, offsets, listener, bootstrap_servers, enable_commit, **kwargs):
     consumer = AIOKafkaConsumer(loop=self.loop,
                                 bootstrap_servers=bootstrap_servers,
                                 group_id=group_id,
                                 fetch_max_wait_ms=self.fetch_max_wait_ms,
                                 max_partition_fetch_bytes=self.max_partition_fetch_bytes,
                                 request_timeout_ms=self.request_timeout_ms,
                                 auto_offset_reset=self.auto_offset_reset,
                                 enable_auto_commit=self.enable_auto_commit,
                                 auto_commit_interval_ms=self.auto_commit_interval_ms,
                                 check_crcs=self.check_crcs,
                                 metadata_max_age_ms=self.metadata_max_age_ms,
                                 heartbeat_interval_ms=self.heartbeat_interval_ms,
                                 session_timeout_ms=self.session_timeout_ms,
                                 exclude_internal_topics=self.exclude_internal_topics,
                                 connections_max_idle_ms=self.connections_max_idle_ms,
                                 **kwargs)
     consumer.subscribe(topics=topics, listener=listener)
     await consumer.start()
     if offsets is not None:
         await self._seek_offsets(consumer, topics, offsets)
     try:
         async for msg in consumer:
             try:
                 if msg is None:
                     continue
                 await work(msg)
                 if enable_commit:
                     meta = "Some utf-8 metadata"
                     tp = TopicPartition(msg.topic, msg.partition)
                     offsets = {tp: OffsetAndMetadata(msg.offset + 1, meta)}
                     await consumer.commit(offsets)
             except OffsetOutOfRangeError as err:
                 tps = err.args[0].keys()
                 await consumer.seek_to_beginning(*tps)
                 continue
             except Exception as e:
                 root_logger.error(f'{traceback.format_exc()}')
                 continue
     except Exception as e:
         raise e
     finally:
         await consumer.stop()
Beispiel #8
0
async def consume(loop):
    consumer = AIOKafkaConsumer(
        loop=loop,
        bootstrap_servers='localhost:9092',
        group_id="my_group",  # Consumer must be in a group to commit
        enable_auto_commit=False,  # Will disable autocommit
        auto_offset_reset="none",
        key_deserializer=lambda key: key.decode("utf-8") if key else "",
    )
    await consumer.start()

    local_state = LocalState()
    listener = RebalanceListener(consumer, local_state)
    consumer.subscribe(topics=["test"], listener=listener)

    save_task = loop.create_task(save_state_every_second(local_state))

    try:

        while True:
            try:
                msg_set = await consumer.getmany(timeout_ms=1000)
            except OffsetOutOfRangeError as err:
                # This means that saved file is outdated and should be
                # discarded
                tps = err.args[0].keys()
                local_state.discard_state(tps)
                await consumer.seek_to_beginning(*tps)
                continue

            for tp, msgs in msg_set.items():
                counts = Counter()
                for msg in msgs:
                    print("Process", tp, msg.key)
                    counts[msg.key] += 1
                local_state.add_counts(tp, counts, msg.offset)

    finally:
        await consumer.stop()
        save_task.cancel()
        await save_task
async def consume(loop):
    consumer = AIOKafkaConsumer(
        loop=loop, bootstrap_servers='localhost:9092',
        group_id="my_group",           # Consumer must be in a group to commit
        enable_auto_commit=False,      # Will disable autocommit
        auto_offset_reset="none",
        key_deserializer=lambda key: key.decode("utf-8") if key else "",
    )
    await consumer.start()

    local_state = LocalState()
    listener = RebalanceListener(consumer, local_state)
    consumer.subscribe(topics=["test"], listener=listener)

    save_task = loop.create_task(save_state_every_second(local_state))

    try:

        while True:
            try:
                msg_set = await consumer.getmany(timeout_ms=1000)
            except OffsetOutOfRangeError as err:
                # This means that saved file is outdated and should be
                # discarded
                tps = err.args[0].keys()
                local_state.discard_state(tps)
                await consumer.seek_to_beginning(*tps)
                continue

            for tp, msgs in msg_set.items():
                counts = Counter()
                for msg in msgs:
                    print("Process", tp, msg.key)
                    counts[msg.key] += 1
                local_state.add_counts(tp, counts, msg.offset)

    finally:
        await consumer.stop()
        save_task.cancel()
        await save_task
Beispiel #10
0
async def main():
    consumer = AIOKafkaConsumer(
        group_id="dev-group",
        loop=asyncio.get_event_loop(),
        enable_auto_commit=False,
        bootstrap_servers="localhost:9092",
    )
    # listener = ConsumerGroupeRebalancer(consumer=consumer)
    # consumer.subscribe(topics=["dev-topic"], listener=listener)
    consumer.subscribe(topics=["dev-topic"])
    await consumer.start()
    try:
        print("Ready !!!")
        # while True:
        #     msgs = consumer.getmany(timeout_ms=200, max_records=5)
        #     print(f"Got {len(msgs)} messages ")
        async for msg in consumer:
            print(msg)
            tp = TopicPartition(msg.topic, msg.partition)
            await consumer.commit({tp: msg.offset + 1})
    finally:
        await consumer.stop()
Beispiel #11
0
async def consume_events(app: web.Application) -> None:
    """The main Kafka consumer, which routes messages to processing functions
    or tasks.
    """
    logger = structlog.get_logger(app["safir/config"].logger_name)

    registry = RegistryApi(
        session=app["safir/http_session"],
        url=app["safir/config"].schema_registry_url,
    )
    deserializer = Deserializer(registry=registry)

    consumer_settings = {
        "bootstrap_servers": app["safir/config"].kafka_broker_url,
        "group_id": app["safir/config"].kafka_consumer_group_id,
        "auto_offset_reset": "latest",
        "security_protocol": app["safir/config"].kafka_protocol,
    }
    if consumer_settings["security_protocol"] == "SSL":
        consumer_settings["ssl_context"] = app["safir/kafka_ssl_context"]
    consumer = AIOKafkaConsumer(
        loop=asyncio.get_event_loop(), **consumer_settings
    )

    topic_names = get_configured_topics(app)

    scheduler = await aiojobs.create_scheduler()

    try:
        await consumer.start()
        logger.info("Started Kafka consumer")

        logger.info("Subscribing to Kafka topics", names=topic_names)
        consumer.subscribe(topic_names)

        partitions = consumer.assignment()
        while len(partitions) == 0:
            # Wait for the consuemr to get partition assignment
            await asyncio.sleep(1.0)
            partitions = consumer.assignment()
        logger.info(
            "Got initial partition assignment for Kafka topics",
            partitions=[str(p) for p in partitions],
        )

        async for message in consumer:
            try:
                value_info = await deserializer.deserialize(
                    message.value, include_schema=True
                )
            except Exception:
                logger.exception(
                    "Failed to deserialize a Kafka message value",
                    topic=message.topic,
                    partition=message.partition,
                    offset=message.offset,
                )
                continue

            try:
                await route_message(
                    app=app,
                    scheduler=scheduler,
                    message=value_info["message"],
                    schema_id=value_info["id"],
                    schema=value_info["schema"],
                    topic=message.topic,
                    partition=message.partition,
                    offset=message.offset,
                )
            except Exception:
                logger.exception(
                    "Failed to route a Kafka message",
                    topic=message.topic,
                    partition=message.partition,
                    offset=message.offset,
                )

    except asyncio.CancelledError:
        logger.info("consume_events task got cancelled")
    finally:
        logger.info("consume_events task cancelling")
        await consumer.stop()
        await scheduler.close()
Beispiel #12
0
    async def _run_consumer(topic_queue):
        control = {}
        control_disposables = {}
        topics = {}  # context of each subscribed topic

        def on_next_control(obv, i):
            nonlocal control
            control[obv] = i

        def on_partition_subscribe(tp_context, observer, scheduler):
            tp_context.observer = observer
            if feed_mode is DataFeedMode.PULL:
                observer.on_next(
                    functools.partial(on_partition_back, tp_context.tp))

        def on_revoked(tps):
            inactive_topics = {}
            for topic in topics:
                inactive_topics[topic] = False

            for tp in tps:
                topics[tp.topic].partitions[
                    tp.partition].observer.on_completed()
                del topics[tp.topic].partitions[tp.partition]
                if len(topics[tp.topic].partitions) == 0:
                    inactive_topics[tp.topic] == True

            all_inactive = [inactive_topics[s] for s in inactive_topics]
            if all(all_inactive):
                topic_queue.put_nowait(RevokedCmd())

        def on_assigned(tps):
            for tp in tps:
                context = TopicPartitionContext()
                context.tp = tp
                topics[tp.topic].partitions[tp] = context
                topics[tp.topic].observer.on_next(
                    rx.create(
                        functools.partial(on_partition_subscribe, context)))

            topic_queue.put_nowait(AssignedCmd())

        async def tp_is_completed(topic_partition):
            if source_type is DataSourceType.BATCH:
                highwater = client.highwater(topic_partition)
                if highwater:
                    position = await client.position(topic_partition)
                    if highwater == position:
                        print("no more lag on {}-{}".format(
                            topic_partition.topic, topic_partition.partition))
                        topics[topic_partition.topic].partitions[
                            topic_partition].completed = True
                        return True
            return False

        async def process_next_batch(topic_partition, count):
            tp = [topic_partition] if topic_partition else []
            read_count = 0
            if count == 1:
                msg = await client.getone(*tp)
                if topic_partition is None:
                    topic_partition = TopicPartition(msg.topic, msg.partition)
                topic = topics[topic_partition.topic]

                decoded_msg = topic.decode(msg.value)
                topic.partitions[topic_partition].observer.on_next(decoded_msg)
                read_count += 1
            else:
                data = await client.getmany(*tp,
                                            timeout_ms=5000,
                                            max_records=count)
                if len(data) > 0:
                    msgs = data[topic_partition]
                    topic = topics[topic_partition.topic]
                    for msg in msgs:
                        decoded_msg = topic.decode(msg.value)
                        topic.partitions[topic_partition].observer.on_next(
                            decoded_msg)
                        read_count += 1

            return read_count

        try:
            client = AIOKafkaConsumer(
                loop=loop,
                bootstrap_servers=server,
                group_id=group,
                auto_offset_reset='latest',
                enable_auto_commit=True,
                max_partition_fetch_bytes=max_partition_fetch_bytes,
            )
            print("start kafka consumer")
            await client.start()

            partition_assigned = False
            yield_countdown = 5000
            prev_partition = None
            pcount = 0
            while True:
                try:
                    cmd = topic_queue.get_nowait()
                except asyncio.QueueEmpty as e:
                    print("queue empty")
                    cmd = await topic_queue.get()

                #if len(topics) == 0 or not topic_queue.empty():
                #cmd = await topic_queue.get()
                if type(cmd) is AddConsumerCmd:
                    print('run consumer: add {}'.format(cmd.consumer.topic))

                    if cmd.consumer.topic in topics:
                        source_observer.on_error(
                            ValueError(
                                "topic already subscribed for this consumer: {}"
                                .format(cmd.consumer.decode)))
                        break

                    if cmd.consumer.control is not None:
                        control_disposables[
                            cmd.observer] = cmd.consumer.control.subscribe(
                                on_next=functools.partial(
                                    on_next_control, cmd.observer),
                                on_error=source_observer.on_error,
                            )

                    topics[cmd.consumer.topic] = TopicContext(
                        observer=cmd.observer,
                        topic=cmd.consumer.topic,
                        decode=cmd.consumer.decode,
                        start_from=cmd.consumer.start_from,
                        partitions={})
                    sub_start_positions = {}
                    sub_topics = []
                    for k, c in topics.items():
                        sub_topics.append(c.topic)
                        sub_start_positions[c.topic] = c.start_from
                    sub_topics = set(sub_topics)
                    client.subscribe(topics=sub_topics,
                                     listener=ConsumerRebalancer(
                                         client,
                                         sub_start_positions,
                                         on_revoked=on_revoked,
                                         on_assigned=on_assigned,
                                     ))

                elif type(cmd) is DelConsumerCmd:
                    print('run consumer: del {}'.format(cmd))
                    topic = topics[cmd.topic]
                    disposable = control_disposables.pop(topic.observer, None)
                    if disposable is not None:
                        disposable.dispose()

                    topics.pop(cmd.topic)
                    sub_start_positions = {}
                    sub_topics = []
                    for k, c in topics.items():
                        sub_topics.append(c.topic)
                        sub_start_positions[c.topic] = c.start_from
                    sub_topics = set(sub_topics)
                    if len(sub_topics) > 0:
                        client.subscribe(topics=sub_topics,
                                         listener=ConsumerRebalancer(
                                             client,
                                             sub_start_positions,
                                             on_revoked=on_revoked,
                                             on_assigned=on_assigned,
                                         ))
                    topic.observer.on_completed()
                elif type(cmd) is PullTopicPartitionCmd:
                    no_lag = await tp_is_completed(cmd.topic_partition)
                    if source_type is DataSourceType.BATCH and no_lag == True:
                        topic = topics[cmd.topic_partition.topic]
                        topic.partitions[
                            cmd.topic_partition].observer.on_completed()
                        if all(
                            [i.completed
                             for _, i in topic.partitions.items()]):
                            print("completed processing topic {}".format(
                                cmd.topic_partition.topic))
                            topic.observer.on_completed()
                    else:
                        await process_next_batch(cmd.topic_partition,
                                                 cmd.count)
                elif type(cmd) is PushRecordCmd:
                    read_count = await process_next_batch(None, 1)
                    if read_count > 0:
                        topic_queue.put_nowait(PushRecordCmd())
                elif type(cmd) is AssignedCmd:
                    if partition_assigned is False:
                        partition_assigned = True
                        if feed_mode is DataFeedMode.PUSH:
                            topic_queue.put_nowait(PushRecordCmd())
                elif type(cmd) is RevokedCmd:
                    partition_assigned = False
                else:
                    source_observer.on_error(
                        TypeError(
                            "invalid type for queue command: {}".format(cmd)))

                if len(topics) == 0:
                    print("no more topic subscribed, ending consumer task")
                    break

                regulated = False
                for topic, consumer in topics.items():
                    regulation_time = control.get(consumer.observer, None)
                    if regulation_time is not None and regulation_time > 0:
                        await asyncio.sleep(regulation_time)
                        regulated = True
                        yield_countdown = 5000
                        control[consumer.observer] = None
                        break  # limitation only one controllable topic for now

                yield_countdown -= 1
                if yield_countdown == 0 and regulated is False:
                    await asyncio.sleep(0)
                    yield_countdown = 5000

            await client.stop()

        except asyncio.CancelledError as e:
            print("cancelled {}".format(e))
        except Exception as e:
            print("consummer exception: {}:{}".format(type(e), e))
            print(traceback.format_list(traceback.extract_tb(e.__traceback__)))
            raise e
Beispiel #13
0
async def consume_kafka(app):
    """Consume Kafka messages directed to templatebot's functionality."""
    logger = structlog.get_logger(app["root"]["api.lsst.codes/loggerName"])

    registry = RegistryApi(
        session=app["root"]["api.lsst.codes/httpSession"],
        url=app["root"]["templatebot/registryUrl"],
    )
    deserializer = Deserializer(registry=registry)

    consumer_settings = {
        "bootstrap_servers": app["root"]["templatebot/brokerUrl"],
        "group_id": app["root"]["templatebot/slackGroupId"],
        "auto_offset_reset": "latest",
        "ssl_context": app["root"]["templatebot/kafkaSslContext"],
        "security_protocol": app["root"]["templatebot/kafkaProtocol"],
    }
    consumer = AIOKafkaConsumer(loop=asyncio.get_event_loop(),
                                **consumer_settings)

    try:
        await consumer.start()
        logger.info("Started Kafka consumer", **consumer_settings)

        topic_names = [
            app["root"]["templatebot/appMentionTopic"],
            app["root"]["templatebot/messageImTopic"],
            app["root"]["templatebot/interactionTopic"],
        ]
        logger.info("Subscribing to Kafka topics", names=topic_names)
        consumer.subscribe(topic_names)

        logger.info("Finished subscribing ot Kafka topics", names=topic_names)

        partitions = consumer.assignment()
        logger.info("Waiting on partition assignment", names=topic_names)
        while len(partitions) == 0:
            # Wait for the consumer to get partition assignment
            await asyncio.sleep(1.0)
            partitions = consumer.assignment()
        logger.info(
            "Initial partition assignment",
            partitions=[str(p) for p in partitions],
        )

        async for message in consumer:
            logger.info(
                "Got Kafka message from sqrbot",
                topic=message.topic,
                partition=message.partition,
                offset=message.offset,
            )
            try:
                message_info = await deserializer.deserialize(message.value)
            except Exception:
                logger.exception(
                    "Failed to deserialize a message",
                    topic=message.topic,
                    partition=message.partition,
                    offset=message.offset,
                )
                continue

            event = message_info["message"]
            logger.debug(
                "New message",
                topic=message.topic,
                partition=message.partition,
                offset=message.offset,
                contents=event,
            )

            try:
                await route_event(
                    event=message_info["message"],
                    app=app,
                    schema_id=message_info["id"],
                    topic=message.topic,
                    partition=message.partition,
                    offset=message.offset,
                )
            except Exception:
                logger.exception(
                    "Failed to handle message",
                    topic=message.topic,
                    partition=message.partition,
                    offset=message.offset,
                )

    except asyncio.CancelledError:
        logger.info("consume_kafka task got cancelled")
    finally:
        logger.info("consume_kafka task cancelling")
        await consumer.stop()
Beispiel #14
0
class MsgKafka(MsgBase):
    def __init__(self, logger_name='msg', lock=False):
        super().__init__(logger_name, lock)
        self.host = None
        self.port = None
        self.consumer = None
        self.producer = None
        self.loop = None
        self.broker = None
        self.group_id = None

    def connect(self, config):
        try:
            if "logger_name" in config:
                self.logger = logging.getLogger(config["logger_name"])
            self.host = config["host"]
            self.port = config["port"]
            self.loop = config.get("loop") or asyncio.get_event_loop()
            self.broker = str(self.host) + ":" + str(self.port)
            self.group_id = config.get("group_id")

        except Exception as e:  # TODO refine
            raise MsgException(str(e))

    def disconnect(self):
        try:
            pass
            # self.loop.close()
        except Exception as e:  # TODO refine
            raise MsgException(str(e))

    def write(self, topic, key, msg):
        """
        Write a message at kafka bus
        :param topic: message topic, must be string
        :param key: message key, must be string
        :param msg: message content, can be string or dictionary
        :return: None or raises MsgException on failing
        """
        retry = 2  # Try two times
        while retry:
            try:
                self.loop.run_until_complete(
                    self.aiowrite(topic=topic, key=key, msg=msg))
                break
            except Exception as e:
                retry -= 1
                if retry == 0:
                    raise MsgException("Error writing {} topic: {}".format(
                        topic, str(e)))

    def read(self, topic):
        """
        Read from one or several topics.
        :param topic: can be str: single topic; or str list: several topics
        :return: topic, key, message; or None
        """
        try:
            return self.loop.run_until_complete(self.aioread(topic, self.loop))
        except MsgException:
            raise
        except Exception as e:
            raise MsgException("Error reading {} topic: {}".format(
                topic, str(e)))

    async def aiowrite(self, topic, key, msg, loop=None):
        """
        Asyncio write
        :param topic: str kafka topic
        :param key: str kafka key
        :param msg: str or dictionary  kafka message
        :param loop: asyncio loop. To be DEPRECATED! in near future!!!  loop must be provided inside config at connect
        :return: None
        """

        if not loop:
            loop = self.loop
        try:
            self.producer = AIOKafkaProducer(loop=loop,
                                             key_serializer=str.encode,
                                             value_serializer=str.encode,
                                             bootstrap_servers=self.broker)
            await self.producer.start()
            await self.producer.send(topic=topic,
                                     key=key,
                                     value=yaml.safe_dump(
                                         msg, default_flow_style=True))
        except Exception as e:
            raise MsgException(
                "Error publishing topic '{}', key '{}': {}".format(
                    topic, key, e))
        finally:
            await self.producer.stop()

    async def aioread(self,
                      topic,
                      loop=None,
                      callback=None,
                      aiocallback=None,
                      group_id=None,
                      from_beginning=None,
                      **kwargs):
        """
        Asyncio read from one or several topics.
        :param topic: can be str: single topic; or str list: several topics
        :param loop: asyncio loop. To be DEPRECATED! in near future!!!  loop must be provided inside config at connect
        :param callback: synchronous callback function that will handle the message in kafka bus
        :param aiocallback: async callback function that will handle the message in kafka bus
        :param group_id: kafka group_id to use. Can be False (set group_id to None), None (use general group_id provided
                         at connect inside config), or a group_id string
        :param from_beginning: if True, messages will be obtained from beginning instead of only new ones.
                               If group_id is supplied, only the not processed messages by other worker are obtained.
                               If group_id is None, all messages stored at kafka are obtained.
        :param kwargs: optional keyword arguments for callback function
        :return: If no callback defined, it returns (topic, key, message)
        """

        if not loop:
            loop = self.loop
        if group_id is False:
            group_id = None
        elif group_id is None:
            group_id = self.group_id
        try:
            if isinstance(topic, (list, tuple)):
                topic_list = topic
            else:
                topic_list = (topic, )
            self.consumer = AIOKafkaConsumer(
                loop=loop,
                bootstrap_servers=self.broker,
                group_id=group_id,
                auto_offset_reset="earliest" if from_beginning else "latest")
            await self.consumer.start()
            self.consumer.subscribe(topic_list)

            async for message in self.consumer:
                if callback:
                    callback(message.topic, yaml.safe_load(message.key),
                             yaml.safe_load(message.value), **kwargs)
                elif aiocallback:
                    await aiocallback(message.topic,
                                      yaml.safe_load(message.key),
                                      yaml.safe_load(message.value), **kwargs)
                else:
                    return message.topic, yaml.safe_load(
                        message.key), yaml.safe_load(message.value)
        except KafkaError as e:
            raise MsgException(str(e))
        finally:
            await self.consumer.stop()
Beispiel #15
0
class MsgKafka(MsgBase):
    def __init__(self, logger_name='msg'):
        self.logger = logging.getLogger(logger_name)
        self.host = None
        self.port = None
        self.consumer = None
        self.producer = None

    def connect(self, config):
        try:
            if "logger_name" in config:
                self.logger = logging.getLogger(config["logger_name"])
            self.host = config["host"]
            self.port = config["port"]
            self.loop = asyncio.get_event_loop()
            self.broker = str(self.host) + ":" + str(self.port)

        except Exception as e:  # TODO refine
            raise MsgException(str(e))

    def disconnect(self):
        try:
            self.loop.close()
        except Exception as e:  # TODO refine
            raise MsgException(str(e))

    def write(self, topic, key, msg):
        try:
            self.loop.run_until_complete(
                self.aiowrite(topic=topic,
                              key=key,
                              msg=yaml.safe_dump(msg, default_flow_style=True),
                              loop=self.loop))

        except Exception as e:
            raise MsgException("Error writing {} topic: {}".format(
                topic, str(e)))

    def read(self, topic):
        """
        Read from one or several topics. it is non blocking returning None if nothing is available
        :param topic: can be str: single topic; or str list: several topics
        :return: topic, key, message; or None
        """
        try:
            return self.loop.run_until_complete(self.aioread(topic, self.loop))
        except MsgException:
            raise
        except Exception as e:
            raise MsgException("Error reading {} topic: {}".format(
                topic, str(e)))

    async def aiowrite(self, topic, key, msg, loop=None):

        if not loop:
            loop = self.loop
        try:
            self.producer = AIOKafkaProducer(loop=loop,
                                             key_serializer=str.encode,
                                             value_serializer=str.encode,
                                             bootstrap_servers=self.broker)
            await self.producer.start()
            await self.producer.send(topic=topic,
                                     key=key,
                                     value=yaml.safe_dump(
                                         msg, default_flow_style=True))
        except Exception as e:
            raise MsgException(
                "Error publishing topic '{}', key '{}': {}".format(
                    topic, key, e))
        finally:
            await self.producer.stop()

    async def aioread(self, topic, loop=None, callback=None, *args):
        """
        Asyncio read from one or several topics. It blocks
        :param topic: can be str: single topic; or str list: several topics
        :param loop: asyncio loop
        :callback: callback function that will handle the message in kafka bus
        :*args: optional arguments for callback function
        :return: topic, key, message
        """

        if not loop:
            loop = self.loop
        try:
            if isinstance(topic, (list, tuple)):
                topic_list = topic
            else:
                topic_list = (topic, )

            self.consumer = AIOKafkaConsumer(loop=loop,
                                             bootstrap_servers=self.broker)
            await self.consumer.start()
            self.consumer.subscribe(topic_list)

            async for message in self.consumer:
                if callback:
                    callback(message.topic, yaml.load(message.key),
                             yaml.load(message.value), *args)
                else:
                    return message.topic, yaml.load(message.key), yaml.load(
                        message.value)
        except KafkaError as e:
            raise MsgException(str(e))
        finally:
            await self.consumer.stop()
Beispiel #16
0
async def consume_events(app):
    """Consume events from templatebot-related topics in SQuaRE Events (Kafka).

    Notes
    -----
    Templatebot has *two* Kafka consumers. This is one, and the other is
    in `templatebot.slack`. The Slack consumer only listens to topics from
    Slack (SQuaRE Bot), and is focused on responding to Slack-based workflows.
    This consumer is focused on backend-driven events, such as the
    ``templatebot-render_ready`` topic.
    """
    logger = structlog.get_logger(app["root"]["api.lsst.codes/loggerName"])

    registry = RegistryApi(
        session=app["root"]["api.lsst.codes/httpSession"],
        url=app["root"]["templatebot/registryUrl"],
    )
    deserializer = Deserializer(registry=registry)

    consumer_settings = {
        "bootstrap_servers": app["root"]["templatebot/brokerUrl"],
        "group_id": app["root"]["templatebot/eventsGroupId"],
        "auto_offset_reset": "latest",
        "ssl_context": app["root"]["templatebot/kafkaSslContext"],
        "security_protocol": app["root"]["templatebot/kafkaProtocol"],
    }
    consumer = AIOKafkaConsumer(loop=asyncio.get_event_loop(),
                                **consumer_settings)

    try:
        await consumer.start()
        logger.info("Started Kafka consumer for events", **consumer_settings)

        topic_names = [app["root"]["templatebot/renderreadyTopic"]]
        logger.info("Subscribing to Kafka topics", names=topic_names)
        consumer.subscribe(topic_names)

        partitions = consumer.assignment()
        while len(partitions) == 0:
            # Wait for the consumer to get partition assignment
            await asyncio.sleep(1.0)
            partitions = consumer.assignment()
        logger.info(
            "Initial partition assignment for event topics",
            partitions=[str(p) for p in partitions],
        )

        async for message in consumer:
            try:
                message_info = await deserializer.deserialize(
                    message.value, include_schema=True)
            except Exception:
                logger.exception(
                    "Failed to deserialize an event message",
                    topic=message.topic,
                    partition=message.partition,
                    offset=message.offset,
                )
                continue

            event = message_info["message"]
            logger.debug(
                "New event message",
                topic=message.topic,
                partition=message.partition,
                offset=message.offset,
                contents=event,
            )

            try:
                await route_event(
                    app=app,
                    event=message_info["message"],
                    schema_id=message_info["id"],
                    schema=message_info["schema"],
                    topic=message.topic,
                    partition=message.partition,
                    offset=message.offset,
                )
            except Exception:
                logger.exception(
                    "Failed to handle event message",
                    topic=message.topic,
                    partition=message.partition,
                    offset=message.offset,
                )

    except asyncio.CancelledError:
        logger.info("consume_events task got cancelled")
    finally:
        logger.info("consume_events task cancelling")
        await consumer.stop()
Beispiel #17
0
class MsgKafka(MsgBase):
    def __init__(self, logger_name='msg'):
        self.logger = logging.getLogger(logger_name)
        self.host = None
        self.port = None
        self.consumer = None
        self.producer = None
        # create a different file for each topic
        #self.files = {}

    def connect(self, config):
        try:
            if "logger_name" in config:
                self.logger = logging.getLogger(config["logger_name"])
            self.host = config["host"]
            self.port = config["port"]
            self.topic_lst = []
            self.loop = asyncio.get_event_loop()
            self.broker = str(self.host) + ":" + str(self.port)

        except Exception as e:  # TODO refine
            raise MsgException(str(e))

    def write(self, topic, key, msg):
        try:
            self.loop.run_until_complete(
                self.aiowrite(topic=topic,
                              key=key,
                              msg=yaml.safe_dump(msg,
                                                 default_flow_style=True)))

        except Exception as e:
            raise MsgException("Error writing {} topic: {}".format(
                topic, str(e)))

    def read(self, topic):
        #self.topic_lst.append(topic)
        try:
            return self.loop.run_until_complete(self.aioread(topic))
        except Exception as e:
            raise MsgException("Error reading {} topic: {}".format(
                topic, str(e)))

    async def aiowrite(self, topic, key, msg, loop=None):
        try:
            if not loop:
                loop = self.loop
            self.producer = AIOKafkaProducer(loop=loop,
                                             key_serializer=str.encode,
                                             value_serializer=str.encode,
                                             bootstrap_servers=self.broker)
            await self.producer.start()
            await self.producer.send(topic=topic, key=key, value=msg)
        except Exception as e:
            raise MsgException("Error publishing to {} topic: {}".format(
                topic, str(e)))
        finally:
            await self.producer.stop()

    async def aioread(self, topic, loop=None):
        if not loop:
            loop = self.loop
        self.consumer = AIOKafkaConsumer(loop=loop,
                                         bootstrap_servers=self.broker)
        await self.consumer.start()
        self.consumer.subscribe([topic])
        try:
            async for message in self.consumer:
                return yaml.load(message.key), yaml.load(message.value)
        except KafkaError as e:
            raise MsgException(str(e))
        finally:
            await self.consumer.stop()