Esempio n. 1
0
def test_default_config_cli_bootstrap_servers() -> None:
    broker_config = get_default_kafka_configuration(
        bootstrap_servers=["cli.server:9092"]
    )
    assert broker_config["bootstrap.servers"] == "cli.server:9092"
    broker_config = get_default_kafka_configuration(
        bootstrap_servers=["cli.server:9092", "cli2.server:9092"]
    )
    assert broker_config["bootstrap.servers"] == "cli.server:9092,cli2.server:9092"
Esempio n. 2
0
def test_kafka_broker_config() -> None:
    default_broker = "my.broker:9092"
    events_broker = "my.other.broker:9092"
    settings.BROKER_CONFIG = {
        "bootstrap.servers": default_broker,
    }

    settings.KAFKA_BROKER_CONFIG = {
        Topic.EVENTS.value: {"bootstrap.servers": events_broker}
    }

    events_broker_config = get_default_kafka_configuration(Topic.EVENTS)
    assert events_broker_config["bootstrap.servers"] == events_broker

    other_broker_config = get_default_kafka_configuration(Topic.EVENT_REPLACEMENTS)
    assert other_broker_config["bootstrap.servers"] == default_broker
Esempio n. 3
0
def test_default_config_override_new_config() -> None:
    default_broker = "my.broker:9092"
    default_broker_config = {
        "bootstrap.servers": default_broker,
    }
    settings.BROKER_CONFIG = default_broker_config
    broker_config = get_default_kafka_configuration()
    assert broker_config["bootstrap.servers"] == default_broker
Esempio n. 4
0
def test_commit_log_consumer() -> None:
    # XXX: This would be better as an integration test (or at least a test
    # against an abstract Producer interface) instead of against a test against
    # a mock.
    commit_log_producer = FakeConfluentKafkaProducer()

    configuration = get_default_kafka_configuration()

    consumer: KafkaConsumer = KafkaConsumerWithCommitLog(
        {
            **configuration,
            "auto.offset.reset": "earliest",
            "enable.auto.commit": "false",
            "enable.auto.offset.store": "false",
            "enable.partition.eof": "true",
            "group.id": "test",
            "session.timeout.ms": 10000,
        },
        producer=commit_log_producer,
        commit_log_topic=Topic("commit-log"),
    )

    producer = KafkaProducer(configuration)

    topic = Topic("topic")

    with closing(consumer) as consumer:
        with closing(producer) as producer:
            producer.produce(topic, next(get_payloads())).result(5.0)

        consumer.subscribe([topic])

        message = consumer.poll(10.0)  # XXX: getting the subscription is slow
        assert isinstance(message, Message)

        now = datetime.now()

        position = Position(message.next_offset, now)

        consumer.stage_positions({message.partition: position})

        assert consumer.commit_positions() == {Partition(topic, 0): position}

        assert len(commit_log_producer.messages) == 1
        commit_message = commit_log_producer.messages[0]
        assert commit_message.topic() == "commit-log"

        assert commit_codec.decode(
            KafkaPayload(
                commit_message.key(),
                commit_message.value(),
                commit_message.headers(),
            )) == Commit("test", Partition(topic, 0), message.next_offset, now)
Esempio n. 5
0
class TestStrictConsumer:
    broker_config = get_default_kafka_configuration(bootstrap_servers=["somewhere"])

    def __consumer(self, on_message) -> StrictConsumer:
        return StrictConsumer(
            topic="my_topic",
            group_id="something",
            broker_config=self.broker_config,
            initial_auto_offset_reset="earliest",
            partition_assignment_timeout=1,
            on_partitions_assigned=None,
            on_partitions_revoked=None,
            on_message=on_message,
        )

    @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer")
    def test_empty_topic(self, create_consumer) -> None:
        kafka_consumer = FakeConfluentKafkaConsumer()
        kafka_consumer.items = [
            build_confluent_kafka_message(0, 0, None, True),
        ]
        create_consumer.return_value = kafka_consumer

        on_message = MagicMock()
        consumer = self.__consumer(on_message)

        consumer.run()
        on_message.assert_not_called()

    @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer")
    def test_failure(self, create_consumer) -> None:
        kafka_consumer = FakeConfluentKafkaConsumer()
        create_consumer.return_value = kafka_consumer

        on_message = MagicMock()
        consumer = self.__consumer(on_message)

        with pytest.raises(NoPartitionAssigned):
            consumer.run()

        on_message.assert_not_called()

    @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer")
    def test_one_message(self, create_consumer) -> None:
        kafka_consumer = FakeConfluentKafkaConsumer()
        create_consumer.return_value = kafka_consumer

        msg = build_confluent_kafka_message(0, 0, b"ABCABC", False)
        kafka_consumer.items = [
            msg,
            build_confluent_kafka_message(0, 0, None, True),
        ]

        on_message = MagicMock()
        on_message.return_value = CommitDecision.DO_NOT_COMMIT
        consumer = self.__consumer(on_message)

        consumer.run()
        on_message.assert_called_once_with(msg)
        assert kafka_consumer.commit_calls == 0

    @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer")
    def test_commits(self, create_consumer) -> None:
        kafka_consumer = FakeConfluentKafkaConsumer()
        create_consumer.return_value = kafka_consumer
        error = MagicMock()
        error.code.return_value = KafkaError._PARTITION_EOF
        kafka_consumer.items = [
            build_confluent_kafka_message(0, 0, b"ABCABC", False),
            build_confluent_kafka_message(1, 0, b"ABCABC", False),
            build_confluent_kafka_message(2, 0, b"ABCABC", False),
            build_confluent_kafka_message(0, 0, None, True),
        ]

        on_message = MagicMock()
        on_message.return_value = CommitDecision.COMMIT_PREV
        consumer = self.__consumer(on_message)

        consumer.run()
        on_message.assert_called()
        assert kafka_consumer.commit_calls == 2
Esempio n. 6
0
def test_scheduler_consumer() -> None:
    settings.TOPIC_PARTITION_COUNTS = {"events": 2}
    importlib.reload(scheduler_consumer)

    admin_client = AdminClient(get_default_kafka_configuration())
    create_topics(admin_client, [SnubaTopic.COMMIT_LOG])

    metrics_backend = TestingMetricsBackend()
    entity_name = "events"
    entity = get_entity(EntityKey(entity_name))
    storage = entity.get_writable_storage()
    assert storage is not None
    stream_loader = storage.get_table_writer().get_stream_loader()

    commit_log_topic = Topic("snuba-commit-log")

    mock_scheduler_producer = mock.Mock()

    from snuba.redis import redis_client
    from snuba.subscriptions.data import PartitionId, SubscriptionData
    from snuba.subscriptions.entity_subscription import EventsSubscription
    from snuba.subscriptions.store import RedisSubscriptionDataStore

    entity_key = EntityKey(entity_name)
    partition_index = 0

    store = RedisSubscriptionDataStore(redis_client, entity_key,
                                       PartitionId(partition_index))
    store.create(
        uuid.uuid4(),
        SubscriptionData(
            project_id=1,
            time_window_sec=60,
            resolution_sec=60,
            query="MATCH events SELECT count()",
            entity_subscription=EventsSubscription(data_dict={}),
        ),
    )

    builder = scheduler_consumer.SchedulerBuilder(
        entity_name,
        str(uuid.uuid1().hex),
        "events",
        mock_scheduler_producer,
        "latest",
        False,
        60 * 5,
        None,
        None,
        metrics_backend,
    )
    scheduler = builder.build_consumer()
    time.sleep(2)
    scheduler._run_once()
    scheduler._run_once()
    scheduler._run_once()

    epoch = datetime(1970, 1, 1)

    producer = KafkaProducer(
        build_kafka_producer_configuration(
            stream_loader.get_default_topic_spec().topic, ))

    for (partition, offset, orig_message_ts) in [
        (0, 0, epoch),
        (1, 0, epoch + timedelta(minutes=1)),
        (0, 1, epoch + timedelta(minutes=2)),
        (1, 1, epoch + timedelta(minutes=3)),
    ]:
        fut = producer.produce(
            commit_log_topic,
            payload=commit_codec.encode(
                Commit(
                    "events",
                    Partition(commit_log_topic, partition),
                    offset,
                    orig_message_ts,
                )),
        )
        fut.result()

    producer.close()

    for _ in range(5):
        scheduler._run_once()

    scheduler._shutdown()

    assert mock_scheduler_producer.produce.call_count == 2

    settings.TOPIC_PARTITION_COUNTS = {}
Esempio n. 7
0
    def __init__(
        self,
        storage_key: StorageKey,
        raw_topic: Optional[str],
        replacements_topic: Optional[str],
        max_batch_size: int,
        max_batch_time_ms: int,
        bootstrap_servers: Sequence[str],
        group_id: str,
        commit_log_topic: Optional[str],
        auto_offset_reset: str,
        queued_max_messages_kbytes: int,
        queued_min_messages: int,
        metrics: MetricsBackend,
        processes: Optional[int],
        input_block_size: Optional[int],
        output_block_size: Optional[int],
        commit_retry_policy: Optional[RetryPolicy] = None,
        profile_path: Optional[str] = None,
    ) -> None:
        self.storage = get_writable_storage(storage_key)
        self.bootstrap_servers = bootstrap_servers
        topic = (self.storage.get_table_writer().get_stream_loader().
                 get_default_topic_spec().topic)

        self.broker_config = get_default_kafka_configuration(
            topic, bootstrap_servers=bootstrap_servers)
        self.producer_broker_config = build_kafka_producer_configuration(
            topic,
            bootstrap_servers=bootstrap_servers,
            override_params={
                "partitioner": "consistent",
                "message.max.bytes": 50000000,  # 50MB, default is 1MB
            },
        )

        stream_loader = self.storage.get_table_writer().get_stream_loader()

        self.raw_topic: Topic
        if raw_topic is not None:
            self.raw_topic = Topic(raw_topic)
        else:
            self.raw_topic = Topic(
                stream_loader.get_default_topic_spec().topic_name)

        self.replacements_topic: Optional[Topic]
        if replacements_topic is not None:
            self.replacements_topic = Topic(replacements_topic)
        else:
            replacement_topic_spec = stream_loader.get_replacement_topic_spec()
            if replacement_topic_spec is not None:
                self.replacements_topic = Topic(
                    replacement_topic_spec.topic_name)
            else:
                self.replacements_topic = None

        self.commit_log_topic: Optional[Topic]
        if commit_log_topic is not None:
            self.commit_log_topic = Topic(commit_log_topic)
        else:
            commit_log_topic_spec = stream_loader.get_commit_log_topic_spec()
            if commit_log_topic_spec is not None:
                self.commit_log_topic = Topic(commit_log_topic_spec.topic_name)
            else:
                self.commit_log_topic = None

        # XXX: This can result in a producer being built in cases where it's
        # not actually required.
        self.producer = Producer(self.producer_broker_config)

        self.metrics = metrics

        self.max_batch_size = max_batch_size
        self.max_batch_time_ms = max_batch_time_ms
        self.group_id = group_id
        self.auto_offset_reset = auto_offset_reset
        self.queued_max_messages_kbytes = queued_max_messages_kbytes
        self.queued_min_messages = queued_min_messages
        self.processes = processes
        self.input_block_size = input_block_size
        self.output_block_size = output_block_size
        self.__profile_path = profile_path

        if commit_retry_policy is None:
            commit_retry_policy = BasicRetryPolicy(
                3,
                1,
                lambda e: isinstance(e, KafkaException) and e.args[0].code() in
                (
                    KafkaError.REQUEST_TIMED_OUT,
                    KafkaError.NOT_COORDINATOR,
                    KafkaError._WAIT_COORD,
                ),
            )

        self.__commit_retry_policy = commit_retry_policy
Esempio n. 8
0
def test_default_config() -> None:
    broker_config = get_default_kafka_configuration()
    assert (
        broker_config["bootstrap.servers"]
        == settings.BROKER_CONFIG["bootstrap.servers"]
    )
Esempio n. 9
0
def bootstrap(
    *,
    bootstrap_server: Sequence[str],
    kafka: bool,
    migrate: bool,
    force: bool,
    log_level: Optional[str] = None,
) -> None:
    """
    Warning: Not intended to be used in production yet.
    """
    if not force:
        raise click.ClickException("Must use --force to run")

    setup_logging(log_level)

    logger = logging.getLogger("snuba.bootstrap")

    import time

    if kafka:
        logger.debug("Using Kafka with %r", bootstrap_server)
        from confluent_kafka.admin import AdminClient

        override_params = {
            # Same as above: override socket timeout as we expect Kafka
            # to not getting ready for a while
            "socket.timeout.ms": 1000,
        }
        if logger.getEffectiveLevel() != logging.DEBUG:
            # Override rdkafka loglevel to be critical unless we are
            # debugging as we expect failures when trying to connect
            # (Kafka may not be up yet)
            override_params["log_level"] = LOG_CRIT

        attempts = 0
        while True:
            try:
                logger.info("Attempting to connect to Kafka (attempt %d)...",
                            attempts)
                client = AdminClient(
                    get_default_kafka_configuration(
                        bootstrap_servers=bootstrap_server,
                        override_params=override_params,
                    ))
                client.list_topics(timeout=1)
                break
            except KafkaException as err:
                logger.debug("Connection to Kafka failed (attempt %d)",
                             attempts,
                             exc_info=err)
                attempts += 1
                if attempts == 60:
                    raise
                time.sleep(1)

        logger.info("Connected to Kafka on attempt %d", attempts)

        create_topics(client, [t for t in Topic])

    if migrate:
        check_clickhouse_connections()
        Runner().run_all(force=True)
Esempio n. 10
0
class TestBootstrapState:
    broker_config = get_default_kafka_configuration(bootstrap_servers=["somewhere"])

    @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer")
    def test_empty_topic(self, create_consumer: Mock) -> None:
        kafka_consumer = FakeConfluentKafkaConsumer()
        kafka_consumer.items = [
            build_confluent_kafka_message(0, 0, None, True),
        ]
        create_consumer.return_value = kafka_consumer

        bootstrap = BootstrapState(
            "cdc_control",
            self.broker_config,
            "something",
            get_cdc_storage(StorageKey.GROUPEDMESSAGES),
        )

        ret = bootstrap.handle(None)
        assert ret[0] == ConsumerStateCompletionEvent.NO_SNAPSHOT
        assert kafka_consumer.commit_calls == 0

    @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer")
    def test_snapshot_for_other_table(self, create_consumer: Mock) -> None:
        kafka_consumer = FakeConfluentKafkaConsumer()
        kafka_consumer.items = [
            build_confluent_kafka_message(
                0,
                0,
                b'{"snapshot-id":"abc123", "tables": ["someone_else"], "product":"snuba", "event":"snapshot-init"}',
                False,
            ),
            build_confluent_kafka_message(0, 0, None, True),
        ]
        create_consumer.return_value = kafka_consumer

        bootstrap = BootstrapState(
            "cdc_control",
            self.broker_config,
            "something",
            get_cdc_storage(StorageKey.GROUPEDMESSAGES),
        )

        ret = bootstrap.handle(None)
        assert ret[0] == ConsumerStateCompletionEvent.NO_SNAPSHOT
        assert kafka_consumer.commit_calls == 1

    @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer")
    def test_init_snapshot(self, create_consumer: Mock) -> None:
        kafka_consumer = FakeConfluentKafkaConsumer()
        kafka_consumer.items = [
            build_confluent_kafka_message(
                0,
                0,
                b'{"snapshot-id":"abc123", "tables": ["sentry_groupedmessage"], "product":"snuba", "event":"snapshot-init"}',
                False,
            ),
            build_confluent_kafka_message(0, 0, None, True),
        ]
        create_consumer.return_value = kafka_consumer

        bootstrap = BootstrapState(
            "cdc_control",
            self.broker_config,
            "something",
            get_cdc_storage(StorageKey.GROUPEDMESSAGES),
        )

        ret = bootstrap.handle(None)
        assert ret[0] == ConsumerStateCompletionEvent.SNAPSHOT_INIT_RECEIVED
        assert kafka_consumer.commit_calls == 0

    @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer")
    def test_snapshot_loaded(self, create_consumer: Mock) -> None:
        kafka_consumer = FakeConfluentKafkaConsumer()
        kafka_consumer.items = [
            build_confluent_kafka_message(
                0,
                0,
                b'{"snapshot-id":"abc123", "product":"somewhere-else", "tables": [], "event":"snapshot-init"}',
                False,
            ),
            build_confluent_kafka_message(
                1,
                0,
                b'{"snapshot-id":"abc123", "product":"snuba", "tables": ["sentry_groupedmessage"], "event":"snapshot-init"}',
                False,
            ),
            build_confluent_kafka_message(
                2,
                0,
                (
                    b'{"snapshot-id":"abc123", "event":"snapshot-loaded",'
                    b'"transaction-info": {"xmin":123, "xmax":124, "xip-list": []}'
                    b"}"
                ),
                False,
            ),
            build_confluent_kafka_message(0, 0, None, True),
        ]
        create_consumer.return_value = kafka_consumer

        bootstrap = BootstrapState(
            "cdc_control",
            self.broker_config,
            "something",
            get_cdc_storage(StorageKey.GROUPEDMESSAGES),
        )

        ret = bootstrap.handle(None)
        assert ret[0] == ConsumerStateCompletionEvent.SNAPSHOT_READY_RECEIVED
        assert kafka_consumer.commit_calls == 2
Esempio n. 11
0
    def __init__(
        self,
        storage_key: StorageKey,
        kafka_params: KafkaParameters,
        processing_params: ProcessingParameters,
        max_batch_size: int,
        max_batch_time_ms: int,
        metrics: MetricsBackend,
        parallel_collect: bool,
        stats_callback: Optional[Callable[[str], None]] = None,
        commit_retry_policy: Optional[RetryPolicy] = None,
        profile_path: Optional[str] = None,
        mock_parameters: Optional[MockParameters] = None,
        cooperative_rebalancing: bool = False,
    ) -> None:
        self.storage = get_writable_storage(storage_key)
        self.bootstrap_servers = kafka_params.bootstrap_servers
        self.consumer_group = kafka_params.group_id
        topic = (self.storage.get_table_writer().get_stream_loader().
                 get_default_topic_spec().topic)

        self.broker_config = get_default_kafka_configuration(
            topic, bootstrap_servers=kafka_params.bootstrap_servers)
        logger.info(
            f"librdkafka log level: {self.broker_config.get('log_level', 6)}")
        self.producer_broker_config = build_kafka_producer_configuration(
            topic,
            bootstrap_servers=kafka_params.bootstrap_servers,
            override_params={
                "partitioner": "consistent",
                "message.max.bytes": 50000000,  # 50MB, default is 1MB
            },
        )

        stream_loader = self.storage.get_table_writer().get_stream_loader()

        self.raw_topic: Topic
        if kafka_params.raw_topic is not None:
            self.raw_topic = Topic(kafka_params.raw_topic)
        else:
            self.raw_topic = Topic(
                stream_loader.get_default_topic_spec().topic_name)

        self.replacements_topic: Optional[Topic]
        if kafka_params.replacements_topic is not None:
            self.replacements_topic = Topic(kafka_params.replacements_topic)
        else:
            replacement_topic_spec = stream_loader.get_replacement_topic_spec()
            if replacement_topic_spec is not None:
                self.replacements_topic = Topic(
                    replacement_topic_spec.topic_name)
            else:
                self.replacements_topic = None

        self.commit_log_topic: Optional[Topic]
        if kafka_params.commit_log_topic is not None:
            self.commit_log_topic = Topic(kafka_params.commit_log_topic)
        else:
            commit_log_topic_spec = stream_loader.get_commit_log_topic_spec()
            if commit_log_topic_spec is not None:
                self.commit_log_topic = Topic(commit_log_topic_spec.topic_name)
            else:
                self.commit_log_topic = None

        self.stats_callback = stats_callback

        # XXX: This can result in a producer being built in cases where it's
        # not actually required.
        self.producer = Producer(self.producer_broker_config)

        self.metrics = metrics
        self.max_batch_size = max_batch_size
        self.max_batch_time_ms = max_batch_time_ms
        self.group_id = kafka_params.group_id
        self.auto_offset_reset = kafka_params.auto_offset_reset
        self.strict_offset_reset = kafka_params.strict_offset_reset
        self.queued_max_messages_kbytes = kafka_params.queued_max_messages_kbytes
        self.queued_min_messages = kafka_params.queued_min_messages
        self.processes = processing_params.processes
        self.input_block_size = processing_params.input_block_size
        self.output_block_size = processing_params.output_block_size
        self.__profile_path = profile_path
        self.__mock_parameters = mock_parameters
        self.__parallel_collect = parallel_collect
        self.__cooperative_rebalancing = cooperative_rebalancing

        if commit_retry_policy is None:
            commit_retry_policy = BasicRetryPolicy(
                3,
                1,
                lambda e: isinstance(e, KafkaException) and e.args[0].code() in
                (
                    KafkaError.REQUEST_TIMED_OUT,
                    KafkaError.NOT_COORDINATOR,
                    KafkaError._WAIT_COORD,
                ),
            )

        self.__commit_retry_policy = commit_retry_policy
Esempio n. 12
0
def test_executor_consumer() -> None:
    """
    End to end integration test
    """
    state.set_config("subscription_mode_events", "new")
    admin_client = AdminClient(get_default_kafka_configuration())
    create_topics(admin_client, [SnubaTopic.SUBSCRIPTION_SCHEDULED_EVENTS])
    create_topics(admin_client, [SnubaTopic.SUBSCRIPTION_RESULTS_EVENTS])

    dataset_name = "events"
    entity_name = "events"
    entity_key = EntityKey(entity_name)
    entity = get_entity(entity_key)
    storage = entity.get_writable_storage()
    assert storage is not None
    stream_loader = storage.get_table_writer().get_stream_loader()

    scheduled_result_topic_spec = stream_loader.get_subscription_result_topic_spec(
    )
    assert scheduled_result_topic_spec is not None
    result_producer = KafkaProducer(
        build_kafka_producer_configuration(scheduled_result_topic_spec.topic))

    result_consumer = KafkaConsumer(
        build_kafka_consumer_configuration(
            scheduled_result_topic_spec.topic,
            str(uuid.uuid1().hex),
            auto_offset_reset="latest",
            strict_offset_reset=False,
        ))
    assigned = False

    def on_partitions_assigned(partitions: Mapping[Partition, int]) -> None:
        nonlocal assigned
        assigned = True

    result_consumer.subscribe(
        [Topic(scheduled_result_topic_spec.topic_name)],
        on_assign=on_partitions_assigned,
    )

    attempts = 10
    while attempts > 0 and not assigned:
        result_consumer.poll(1.0)
        attempts -= 1

    # We need to wait for the consumer to receive partitions otherwise,
    # when we try to consume messages, we will not find anything.
    # Subscription is an async process.
    assert assigned == True, "Did not receive assignment within 10 attempts"

    consumer_group = str(uuid.uuid1().hex)
    auto_offset_reset = "latest"
    strict_offset_reset = False
    executor = build_executor_consumer(
        dataset_name,
        [entity_name],
        consumer_group,
        result_producer,
        2,
        2,
        auto_offset_reset,
        strict_offset_reset,
        TestingMetricsBackend(),
        None,
    )
    for i in range(1, 5):
        # Give time to the executor to subscribe
        time.sleep(1)
        executor._run_once()

    # Produce a scheduled task to the scheduled subscriptions topic
    subscription_data = SubscriptionData(
        project_id=1,
        query="MATCH (events) SELECT count()",
        time_window_sec=60,
        resolution_sec=60,
        entity_subscription=EventsSubscription(data_dict={}),
    )

    task = ScheduledSubscriptionTask(
        timestamp=datetime(1970, 1, 1),
        task=SubscriptionWithMetadata(
            entity_key,
            Subscription(
                SubscriptionIdentifier(
                    PartitionId(1),
                    uuid.UUID("91b46cb6224f11ecb2ddacde48001122")),
                subscription_data,
            ),
            1,
        ),
    )

    encoder = SubscriptionScheduledTaskEncoder()
    encoded_task = encoder.encode(task)

    scheduled_topic_spec = stream_loader.get_subscription_scheduled_topic_spec(
    )
    assert scheduled_topic_spec is not None
    tasks_producer = KafkaProducer(
        build_kafka_producer_configuration(scheduled_topic_spec.topic))

    scheduled_topic = Topic(scheduled_topic_spec.topic_name)
    tasks_producer.produce(scheduled_topic, payload=encoded_task).result()
    tasks_producer.close()

    executor._run_once()
    executor.signal_shutdown()
    # Call run here so that the executor shuts down itself cleanly.
    executor.run()
    result = result_consumer.poll(5)
    assert result is not None, "Did not receive a result message"
    data = json.loads(result.payload.value)
    assert (data["payload"]["subscription_id"] ==
            "1/91b46cb6224f11ecb2ddacde48001122"), "Invalid subscription id"

    result_producer.close()
Esempio n. 13
0
def bootstrap(
    *,
    bootstrap_server: Sequence[str],
    kafka: bool,
    migrate: bool,
    force: bool,
    log_level: Optional[str] = None,
) -> None:
    """
    Warning: Not intended to be used in production yet.
    """
    if not force:
        raise click.ClickException("Must use --force to run")

    setup_logging(log_level)

    logger = logging.getLogger("snuba.bootstrap")

    import time

    if kafka:
        logger.debug("Using Kafka with %r", bootstrap_server)
        from confluent_kafka.admin import AdminClient, NewTopic

        override_params = {
            # Same as above: override socket timeout as we expect Kafka
            # to not getting ready for a while
            "socket.timeout.ms": 1000,
        }
        if logger.getEffectiveLevel() != logging.DEBUG:
            # Override rdkafka loglevel to be critical unless we are
            # debugging as we expect failures when trying to connect
            # (Kafka may not be up yet)
            override_params["log_level"] = LOG_CRIT

        attempts = 0
        while True:
            try:
                logger.info("Attempting to connect to Kafka (attempt %d)...", attempts)
                client = AdminClient(
                    get_default_kafka_configuration(
                        bootstrap_servers=bootstrap_server,
                        override_params=override_params,
                    )
                )
                client.list_topics(timeout=1)
                break
            except KafkaException as err:
                logger.debug(
                    "Connection to Kafka failed (attempt %d)", attempts, exc_info=err
                )
                attempts += 1
                if attempts == 60:
                    raise
                time.sleep(1)

        logger.info("Connected to Kafka on attempt %d", attempts)

        topics = {}

        for topic in Topic:
            topic_spec = KafkaTopicSpec(topic)
            logger.debug("Adding topic %s to creation list", topic_spec.topic_name)
            topics[topic_spec.topic_name] = NewTopic(
                topic_spec.topic_name,
                num_partitions=topic_spec.partitions_number,
                replication_factor=topic_spec.replication_factor,
                config=topic_spec.topic_creation_config,
            )

        logger.info("Creating Kafka topics...")
        for topic, future in client.create_topics(
            list(topics.values()), operation_timeout=1
        ).items():
            try:
                future.result()
                logger.info("Topic %s created", topic)
            except KafkaException as err:
                if err.args[0].code() != KafkaError.TOPIC_ALREADY_EXISTS:
                    logger.error("Failed to create topic %s", topic, exc_info=err)

    if migrate:
        check_clickhouse_connections()
        Runner().run_all(force=True)