Esempio n. 1
0
def create_batching_kafka_consumer(topic_names, worker, **options):
    cluster_names = set(settings.KAFKA_TOPICS[topic_name]["cluster"] for topic_name in topic_names)
    if len(cluster_names) > 1:
        raise ValueError(
            "Cannot launch Kafka consumer listening to multiple topics ({}) on different clusters ({})".format(
                topic_names, cluster_names
            )
        )

    (cluster_name,) = cluster_names

    consumer = BatchingKafkaConsumer(
        topics=topic_names,
        cluster_name=cluster_name,
        worker=worker,
        metrics=metrics,
        metrics_default_tags={
            "topics": ",".join(sorted(topic_names)),
            "group_id": options.get("group_id"),
        },
        **options
    )

    def handler(signum, frame):
        consumer.signal_shutdown()

    signal.signal(signal.SIGINT, handler)
    signal.signal(signal.SIGTERM, handler)

    return consumer
Esempio n. 2
0
    def _build_consumer(
        self,
        consumer_group,
        commit_log_topic,
        synchronize_commit_group,
        commit_batch_size=100,
        commit_batch_timeout_ms=5000,
        initial_offset_reset="latest",
    ):
        cluster_name = settings.KAFKA_TOPICS[settings.KAFKA_EVENTS]["cluster"]

        synchronized_consumer = SynchronizedConsumer(
            cluster_name=cluster_name,
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset=initial_offset_reset,
        )

        concurrency = options.get(_CONCURRENCY_OPTION)
        worker = PostProcessForwarderWorker(concurrency=concurrency)

        consumer = BatchingKafkaConsumer(
            topics=self.topic,
            worker=worker,
            max_batch_size=commit_batch_size,
            max_batch_time=commit_batch_timeout_ms,
            consumer=synchronized_consumer,
            commit_on_shutdown=True,
        )
        return consumer
Esempio n. 3
0
def create_batching_kafka_consumer(topic_names, worker, **options):
    cluster_names = set(settings.KAFKA_TOPICS[topic_name]["cluster"]
                        for topic_name in topic_names)
    if len(cluster_names) > 1:
        raise ValueError(
            "Cannot launch Kafka consumer listening to multiple topics ({}) on different clusters ({})"
            .format(topic_names, cluster_names))

    cluster_name, = cluster_names

    bootstrap_servers = settings.KAFKA_CLUSTERS[cluster_name][
        "bootstrap.servers"]
    if not isinstance(bootstrap_servers, (list, tuple)):
        bootstrap_servers = bootstrap_servers.split(",")

    consumer = BatchingKafkaConsumer(topics=topic_names,
                                     bootstrap_servers=bootstrap_servers,
                                     worker=worker,
                                     metrics=metrics,
                                     metrics_default_tags={
                                         "topics":
                                         ",".join(sorted(topic_names)),
                                         "group_id": options.get("group_id"),
                                     },
                                     **options)

    def handler(signum, frame):
        consumer.signal_shutdown()

    signal.signal(signal.SIGINT, handler)
    signal.signal(signal.SIGTERM, handler)

    return consumer
Esempio n. 4
0
def create_batching_kafka_consumer(topic_names, worker, **options):
    # In some cases we want to override the configuration stored in settings from the command line
    force_topic = options.pop("force_topic", None)
    force_cluster = options.pop("force_cluster", None)

    if force_topic and force_cluster:
        topic_names = {force_topic}
        cluster_names = {force_cluster}
    elif force_topic or force_cluster:
        raise ValueError(
            "Both 'force_topic' and 'force_cluster' have to be provided to override the configuration"
        )
    else:
        cluster_names = {
            settings.KAFKA_TOPICS[topic_name]["cluster"]
            for topic_name in topic_names
        }

    if len(cluster_names) > 1:
        raise ValueError(
            f"Cannot launch Kafka consumer listening to multiple topics ({topic_names}) on different clusters ({cluster_names})"
        )

    (cluster_name, ) = cluster_names

    consumer = BatchingKafkaConsumer(
        topics=topic_names,
        cluster_name=cluster_name,
        worker=worker,
        metrics=metrics,
        metrics_default_tags={
            "topics": ",".join(sorted(topic_names)),
            "group_id": options.get("group_id"),
        },
        **options,
    )

    def handler(signum, frame):
        consumer.signal_shutdown()

    signal.signal(signal.SIGINT, handler)
    signal.signal(signal.SIGTERM, handler)

    return consumer
Esempio n. 5
0
    def _build_consumer(
        self,
        entity,
        consumer_group,
        commit_log_topic,
        synchronize_commit_group,
        commit_batch_size=100,
        commit_batch_timeout_ms=5000,
        initial_offset_reset="latest",
    ):
        cluster_name = settings.KAFKA_TOPICS[settings.KAFKA_EVENTS]["cluster"]

        synchronized_consumer = SynchronizedConsumer(
            cluster_name=cluster_name,
            consumer_group=consumer_group,
            commit_log_topic=commit_log_topic,
            synchronize_commit_group=synchronize_commit_group,
            initial_offset_reset=initial_offset_reset,
        )

        concurrency = options.get(_CONCURRENCY_OPTION)
        logger.info(
            f"Starting post process forwrader to consume {entity} messages")
        if entity == PostProcessForwarderType.TRANSACTIONS:
            worker = TransactionsPostProcessForwarderWorker(
                concurrency=concurrency)
        elif entity == PostProcessForwarderType.ERRORS:
            worker = ErrorsPostProcessForwarderWorker(concurrency=concurrency)
        else:
            # Default implementation which processes both errors and transactions
            # irrespective of values in the header. This would most likely be the case
            # for development environments.
            worker = PostProcessForwarderWorker(concurrency=concurrency)

        consumer = BatchingKafkaConsumer(
            topics=self.topic,
            worker=worker,
            max_batch_size=commit_batch_size,
            max_batch_time=commit_batch_timeout_ms,
            consumer=synchronized_consumer,
            commit_on_shutdown=True,
        )
        return consumer
Esempio n. 6
0
def create_batching_kafka_consumer(topic_name, worker, **options):
    cluster_name = settings.KAFKA_TOPICS[topic_name]["cluster"]
    bootstrap_servers = settings.KAFKA_CLUSTERS[cluster_name][
        "bootstrap.servers"]
    if not isinstance(bootstrap_servers, (list, tuple)):
        bootstrap_servers = bootstrap_servers.split(",")

    consumer = BatchingKafkaConsumer(topics=[topic_name],
                                     bootstrap_servers=bootstrap_servers,
                                     worker=worker,
                                     metrics=metrics,
                                     metrics_default_tags={
                                         "topic": topic_name,
                                         "group_id": options.get("group_id")
                                     },
                                     **options)

    def handler(signum, frame):
        consumer.signal_shutdown()

    signal.signal(signal.SIGINT, handler)
    signal.signal(signal.SIGTERM, handler)

    return consumer