build_kafka_consumer_configuration, build_kafka_producer_configuration, ) from snuba.utils.streams.kafka_consumer_with_commit_log import ( KafkaConsumerWithCommitLog, ) from snuba.utils.streams.metrics_adapter import StreamMetricsAdapter logger = logging.getLogger(__name__) @click.command(hidden=True) @click.option( "--storage", "storage_names", type=click.Choice( [storage_key.value for storage_key in WRITABLE_STORAGES.keys()]), multiple=True, required=True, ) @click.option( "--consumer-group", default="snuba-consumers", ) @click.option( "--max-batch-size", default=settings.DEFAULT_MAX_BATCH_SIZE, type=int, help="Max number of messages to batch in memory before writing to Kafka.", ) @click.option( "--max-batch-time-ms",
def consumer( *, raw_events_topic: Optional[str], replacements_topic: Optional[str], commit_log_topic: Optional[str], control_topic: Optional[str], consumer_group: str, bootstrap_server: Sequence[str], dataset_name: Optional[str], storage_name: str, max_batch_size: int, max_batch_time_ms: int, auto_offset_reset: str, queued_max_messages_kbytes: int, queued_min_messages: int, stateful_consumer: bool, rapidjson_deserialize: bool, rapidjson_serialize: bool, log_level: Optional[str] = None, ) -> None: if not bootstrap_server: if dataset_name: bootstrap_server = settings.DEFAULT_DATASET_BROKERS.get( dataset_name, settings.DEFAULT_BROKERS, ) else: bootstrap_server = settings.DEFAULT_STORAGE_BROKERS.get( storage_name, settings.DEFAULT_BROKERS, ) setup_logging(log_level) setup_sentry() # TODO: Remove this once dataset_name is no longer being passed if dataset_name: dataset_writable_storage = get_dataset( dataset_name).get_writable_storage() if not dataset_writable_storage: raise click.ClickException( f"Dataset {dataset_name} has no writable storage") storage_name = {v: k for k, v in WRITABLE_STORAGES.items() }[dataset_writable_storage] consumer_builder = ConsumerBuilder( storage_name=storage_name, raw_topic=raw_events_topic, replacements_topic=replacements_topic, max_batch_size=max_batch_size, max_batch_time_ms=max_batch_time_ms, bootstrap_servers=bootstrap_server, group_id=consumer_group, commit_log_topic=commit_log_topic, auto_offset_reset=auto_offset_reset, queued_max_messages_kbytes=queued_max_messages_kbytes, queued_min_messages=queued_min_messages, rapidjson_deserialize=rapidjson_deserialize, rapidjson_serialize=rapidjson_serialize, ) if stateful_consumer: storage = get_cdc_storage(storage_name) assert storage is not None, "Only CDC storages have a control topic thus are supported." context = ConsumerStateMachine( consumer_builder=consumer_builder, topic=control_topic or storage.get_default_control_topic(), group_id=consumer_group, storage=storage, ) def handler(signum, frame) -> None: context.signal_shutdown() signal.signal(signal.SIGINT, handler) signal.signal(signal.SIGTERM, handler) context.run() else: consumer = consumer_builder.build_base_consumer() def handler(signum, frame) -> None: consumer.signal_shutdown() signal.signal(signal.SIGINT, handler) signal.signal(signal.SIGTERM, handler) consumer.run()
help="Topic for committed offsets to be written to, triggering post-processing task(s)", ) @click.option( "--consumer-group", default="snuba-consumers", help="Consumer group use for consuming the raw events topic.", ) @click.option( "--bootstrap-server", multiple=True, help="Kafka bootstrap server to use.", ) @click.option( "--storage", "storage_name", type=click.Choice([storage_key.value for storage_key in WRITABLE_STORAGES.keys()]), help="The storage to target", required=True, ) @click.option( "--max-batch-size", default=settings.DEFAULT_MAX_BATCH_SIZE, type=int, help="Max number of messages to batch in memory before writing to Kafka.", ) @click.option( "--max-batch-time-ms", default=settings.DEFAULT_MAX_BATCH_TIME_MS, type=int, help="Max length of time to buffer messages in memory before writing to Kafka.", )
@click.option( "--bootstrap-server", multiple=True, help="Kafka bootstrap server to use.", ) @click.option( "--dataset", "dataset_name", type=click.Choice(DATASET_NAMES), help="The dataset to target", ) @click.option( "--storage", "storage_name", default="events", type=click.Choice(WRITABLE_STORAGES.keys()), help="The storage to target", ) @click.option( "--max-batch-size", default=settings.DEFAULT_MAX_BATCH_SIZE, type=int, help="Max number of messages to batch in memory before writing to Kafka.", ) @click.option( "--max-batch-time-ms", default=settings.DEFAULT_MAX_BATCH_TIME_MS, type=int, help= "Max length of time to buffer messages in memory before writing to Kafka.", )