Ejemplo n.º 1
0
def test_default_config_cli_bootstrap_servers():
    broker_config = get_default_kafka_configuration(
        bootstrap_servers=["cli.server:9092"])
    assert broker_config["bootstrap.servers"] == "cli.server:9092"
    broker_config = get_default_kafka_configuration(
        bootstrap_servers=["cli.server:9092", "cli2.server:9092"])
    assert broker_config[
        "bootstrap.servers"] == "cli.server:9092,cli2.server:9092"
Ejemplo n.º 2
0
def test_default_config_legacy_override_default_servers():
    default_broker = "my.broker:9092"
    settings.DEFAULT_BROKERS = [default_broker]
    broker_config = get_default_kafka_configuration()
    assert broker_config["bootstrap.servers"] == default_broker

    default_brokers = ["my.broker:9092", "my.second.broker:9092"]
    settings.DEFAULT_BROKERS = default_brokers
    broker_config = get_default_kafka_configuration()
    assert broker_config["bootstrap.servers"] == ",".join(default_brokers)
Ejemplo n.º 3
0
def test_default_config_legacy_override_storage_servers():
    storage_name = StorageKey.EVENTS.value
    storage_key = StorageKey(storage_name)
    default_broker = "my.broker:9092"
    settings.DEFAULT_STORAGE_BROKERS = {storage_name: [default_broker]}
    broker_config = get_default_kafka_configuration(storage_key=storage_key)
    assert broker_config["bootstrap.servers"] == default_broker

    default_brokers = ["my.broker:9092", "my.second.broker:9092"]
    settings.DEFAULT_STORAGE_BROKERS = {storage_name: default_brokers}
    broker_config = get_default_kafka_configuration(storage_key=storage_key)
    assert broker_config["bootstrap.servers"] == ",".join(default_brokers)
Ejemplo n.º 4
0
def test_default_config_override_new_config_storage():
    default_broker = "my.other.broker:9092"
    default_broker_config = {
        "bootstrap.servers": default_broker,
    }
    settings.STORAGE_BROKER_CONFIG = {
        StorageKey.EVENTS.value: default_broker_config,
    }
    broker_config = get_default_kafka_configuration(StorageKey.EVENTS)
    assert broker_config["bootstrap.servers"] == default_broker

    other_broker = "another.broker:9092"
    settings.BROKER_CONFIG = {
        "bootstrap.servers": other_broker,
    }
    broker_config = get_default_kafka_configuration(StorageKey.ERRORS)
    assert broker_config["bootstrap.servers"] == other_broker
Ejemplo n.º 5
0
def test_default_config_override_new_config():
    default_broker = "my.broker:9092"
    default_broker_config = {
        "bootstrap.servers": default_broker,
    }
    settings.BROKER_CONFIG = default_broker_config
    broker_config = get_default_kafka_configuration()
    assert broker_config["bootstrap.servers"] == default_broker
Ejemplo n.º 6
0
def test_default_config_legacy_override_storage_servers_fallback():
    default_broker = "my.other.broker:9092"
    default_brokers = ["my.broker:9092", "my.second.broker:9092"]
    settings.DEFAULT_BROKERS = [default_broker]
    settings.DEFAULT_STORAGE_BROKERS = {
        StorageKey.EVENTS.value: default_brokers,
    }
    storage_key = StorageKey(StorageKey.ERRORS)
    broker_config = get_default_kafka_configuration(storage_key=storage_key)
    assert broker_config["bootstrap.servers"] == default_broker
Ejemplo n.º 7
0
def test_default_config_new_fallback_old():
    old_default_broker = "my.broker:9092"
    default_broker = "my.other.broker:9092"
    default_broker_config = {
        "bootstrap.servers": default_broker,
    }
    settings.DEFAULT_BROKERS = [old_default_broker]
    settings.STORAGE_BROKER_CONFIG = {
        StorageKey.EVENTS.value: default_broker_config,
    }
    broker_config = get_default_kafka_configuration(StorageKey.ERRORS)
    assert broker_config["bootstrap.servers"] == old_default_broker
Ejemplo n.º 8
0
    def __init__(
        self,
        storage_key: StorageKey,
        raw_topic: Optional[str],
        replacements_topic: Optional[str],
        max_batch_size: int,
        max_batch_time_ms: int,
        bootstrap_servers: Sequence[str],
        group_id: str,
        commit_log_topic: Optional[str],
        auto_offset_reset: str,
        queued_max_messages_kbytes: int,
        queued_min_messages: int,
        processes: Optional[int],
        input_block_size: Optional[int],
        output_block_size: Optional[int],
        commit_retry_policy: Optional[RetryPolicy] = None,
        profile_path: Optional[str] = None,
    ) -> None:
        self.storage = get_writable_storage(storage_key)
        self.bootstrap_servers = bootstrap_servers
        self.broker_config = get_default_kafka_configuration(
            storage_key, bootstrap_servers=bootstrap_servers
        )
        self.producer_broker_config = build_kafka_producer_configuration(
            storage_key,
            bootstrap_servers=bootstrap_servers,
            override_params={
                "partitioner": "consistent",
                "message.max.bytes": 50000000,  # 50MB, default is 1MB
            },
        )

        stream_loader = self.storage.get_table_writer().get_stream_loader()

        self.raw_topic: Topic
        if raw_topic is not None:
            self.raw_topic = Topic(raw_topic)
        else:
            self.raw_topic = Topic(stream_loader.get_default_topic_spec().topic_name)

        self.replacements_topic: Optional[Topic]
        if replacements_topic is not None:
            self.replacements_topic = Topic(replacements_topic)
        else:
            replacement_topic_spec = stream_loader.get_replacement_topic_spec()
            if replacement_topic_spec is not None:
                self.replacements_topic = Topic(replacement_topic_spec.topic_name)
            else:
                self.replacements_topic = None

        self.commit_log_topic: Optional[Topic]
        if commit_log_topic is not None:
            self.commit_log_topic = Topic(commit_log_topic)
        else:
            commit_log_topic_spec = stream_loader.get_commit_log_topic_spec()
            if commit_log_topic_spec is not None:
                self.commit_log_topic = Topic(commit_log_topic_spec.topic_name)
            else:
                self.commit_log_topic = None

        # XXX: This can result in a producer being built in cases where it's
        # not actually required.
        self.producer = Producer(self.producer_broker_config)

        self.metrics = MetricsWrapper(
            environment.metrics,
            "consumer",
            tags={"group": group_id, "storage": storage_key.value},
        )

        self.max_batch_size = max_batch_size
        self.max_batch_time_ms = max_batch_time_ms
        self.group_id = group_id
        self.auto_offset_reset = auto_offset_reset
        self.queued_max_messages_kbytes = queued_max_messages_kbytes
        self.queued_min_messages = queued_min_messages
        self.processes = processes
        self.input_block_size = input_block_size
        self.output_block_size = output_block_size
        self.__profile_path = profile_path

        if commit_retry_policy is None:
            commit_retry_policy = BasicRetryPolicy(
                3,
                constant_delay(1),
                lambda e: isinstance(e, KafkaException)
                and e.args[0].code()
                in (
                    KafkaError.REQUEST_TIMED_OUT,
                    KafkaError.NOT_COORDINATOR,
                    KafkaError._WAIT_COORD,
                ),
            )

        self.__commit_retry_policy = commit_retry_policy
Ejemplo n.º 9
0
class TestStrictConsumer:
    broker_config = get_default_kafka_configuration(
        bootstrap_servers=["somewhere"])

    def __consumer(self, on_message) -> StrictConsumer:
        return StrictConsumer(
            topic="my_topic",
            group_id="something",
            broker_config=self.broker_config,
            initial_auto_offset_reset="earliest",
            partition_assignment_timeout=1,
            on_partitions_assigned=None,
            on_partitions_revoked=None,
            on_message=on_message,
        )

    @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer")
    def test_empty_topic(self, create_consumer) -> None:
        kafka_consumer = FakeConfluentKafkaConsumer()
        kafka_consumer.items = [
            build_confluent_kafka_message(0, 0, None, True),
        ]
        create_consumer.return_value = kafka_consumer

        on_message = MagicMock()
        consumer = self.__consumer(on_message)

        consumer.run()
        on_message.assert_not_called()

    @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer")
    def test_failure(self, create_consumer) -> None:
        kafka_consumer = FakeConfluentKafkaConsumer()
        create_consumer.return_value = kafka_consumer

        on_message = MagicMock()
        consumer = self.__consumer(on_message)

        with pytest.raises(NoPartitionAssigned):
            consumer.run()

        on_message.assert_not_called()

    @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer")
    def test_one_message(self, create_consumer) -> None:
        kafka_consumer = FakeConfluentKafkaConsumer()
        create_consumer.return_value = kafka_consumer

        msg = build_confluent_kafka_message(0, 0, b"ABCABC", False)
        kafka_consumer.items = [
            msg,
            build_confluent_kafka_message(0, 0, None, True),
        ]

        on_message = MagicMock()
        on_message.return_value = CommitDecision.DO_NOT_COMMIT
        consumer = self.__consumer(on_message)

        consumer.run()
        on_message.assert_called_once_with(msg)
        assert kafka_consumer.commit_calls == 0

    @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer")
    def test_commits(self, create_consumer) -> None:
        kafka_consumer = FakeConfluentKafkaConsumer()
        create_consumer.return_value = kafka_consumer
        error = MagicMock()
        error.code.return_value = KafkaError._PARTITION_EOF
        kafka_consumer.items = [
            build_confluent_kafka_message(0, 0, b"ABCABC", False),
            build_confluent_kafka_message(1, 0, b"ABCABC", False),
            build_confluent_kafka_message(2, 0, b"ABCABC", False),
            build_confluent_kafka_message(0, 0, None, True),
        ]

        on_message = MagicMock()
        on_message.return_value = CommitDecision.COMMIT_PREV
        consumer = self.__consumer(on_message)

        consumer.run()
        on_message.assert_called()
        assert kafka_consumer.commit_calls == 2
Ejemplo n.º 10
0
def bootstrap(
    *,
    bootstrap_server: Sequence[str],
    kafka: bool,
    migrate: bool,
    force: bool,
    log_level: Optional[str] = None,
) -> None:
    """
    Warning: Not intended to be used in production yet.
    """
    if not force:
        raise click.ClickException("Must use --force to run")

    setup_logging(log_level)

    logger = logging.getLogger("snuba.bootstrap")

    import time

    if kafka:
        logger.debug("Using Kafka with %r", bootstrap_server)
        from confluent_kafka.admin import AdminClient, NewTopic

        attempts = 0
        while True:
            try:
                logger.debug("Attempting to connect to Kafka (attempt %d)", attempts)
                client = AdminClient(
                    get_default_kafka_configuration(
                        bootstrap_servers=bootstrap_server,
                        override_params={"socket.timeout.ms": 1000},
                    )
                )
                client.list_topics(timeout=1)
                break
            except Exception as e:
                logger.error(
                    "Connection to Kafka failed (attempt %d)", attempts, exc_info=e
                )
                attempts += 1
                if attempts == 60:
                    raise
                time.sleep(1)

        topics = {}
        for name in ACTIVE_DATASET_NAMES:
            dataset = get_dataset(name)
            for entity in dataset.get_all_entities():
                writable_storage = entity.get_writable_storage()
                if writable_storage:
                    table_writer = writable_storage.get_table_writer()
                    stream_loader = table_writer.get_stream_loader()
                    for topic_spec in stream_loader.get_all_topic_specs():
                        if topic_spec.topic_name in topics:
                            continue
                        logger.debug(
                            "Adding topic %s to creation list", topic_spec.topic_name
                        )
                        topics[topic_spec.topic_name] = NewTopic(
                            topic_spec.topic_name,
                            num_partitions=topic_spec.partitions_number,
                            replication_factor=topic_spec.replication_factor,
                        )

        logger.debug("Initiating topic creation")
        for topic, future in client.create_topics(
            list(topics.values()), operation_timeout=1
        ).items():
            try:
                future.result()
                logger.info("Topic %s created", topic)
            except Exception as e:
                logger.error("Failed to create topic %s", topic, exc_info=e)

    if migrate:
        check_clickhouse_connections()
        Runner().run_all(force=True)
Ejemplo n.º 11
0
def test_default_config():
    broker_config = get_default_kafka_configuration()
    assert (broker_config["bootstrap.servers"] ==
            settings.BROKER_CONFIG["bootstrap.servers"])
Ejemplo n.º 12
0
class TestBootstrapState:
    broker_config = get_default_kafka_configuration(
        bootstrap_servers=["somewhere"])

    def __consumer(self, on_message) -> StrictConsumer:
        return StrictConsumer(
            topic="topic",
            group_id="something",
            broker_config=self.broker_config,
            auto_offset_reset="earliest",
            partition_assignment_timeout=1,
            on_partitions_assigned=None,
            on_partitions_revoked=None,
            on_message=on_message,
        )

    @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer")
    def test_empty_topic(self, create_consumer) -> None:
        kafka_consumer = FakeConfluentKafkaConsumer()
        kafka_consumer.items = [
            build_confluent_kafka_message(0, 0, None, True),
        ]
        create_consumer.return_value = kafka_consumer

        bootstrap = BootstrapState(
            "cdc_control",
            self.broker_config,
            "something",
            get_cdc_storage(StorageKey.GROUPEDMESSAGES),
        )

        ret = bootstrap.handle(None)
        assert ret[0] == ConsumerStateCompletionEvent.NO_SNAPSHOT
        assert kafka_consumer.commit_calls == 0

    @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer")
    def test_snapshot_for_other_table(self, create_consumer) -> None:
        kafka_consumer = FakeConfluentKafkaConsumer()
        kafka_consumer.items = [
            build_confluent_kafka_message(
                0,
                0,
                b'{"snapshot-id":"abc123", "tables": ["someone_else"], "product":"snuba", "event":"snapshot-init"}',
                False,
            ),
            build_confluent_kafka_message(0, 0, None, True),
        ]
        create_consumer.return_value = kafka_consumer

        bootstrap = BootstrapState(
            "cdc_control",
            self.broker_config,
            "something",
            get_cdc_storage(StorageKey.GROUPEDMESSAGES),
        )

        ret = bootstrap.handle(None)
        assert ret[0] == ConsumerStateCompletionEvent.NO_SNAPSHOT
        assert kafka_consumer.commit_calls == 1

    @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer")
    def test_init_snapshot(self, create_consumer) -> None:
        kafka_consumer = FakeConfluentKafkaConsumer()
        kafka_consumer.items = [
            build_confluent_kafka_message(
                0,
                0,
                b'{"snapshot-id":"abc123", "tables": ["sentry_groupedmessage"], "product":"snuba", "event":"snapshot-init"}',
                False,
            ),
            build_confluent_kafka_message(0, 0, None, True),
        ]
        create_consumer.return_value = kafka_consumer

        bootstrap = BootstrapState(
            "cdc_control",
            self.broker_config,
            "something",
            get_cdc_storage(StorageKey.GROUPEDMESSAGES),
        )

        ret = bootstrap.handle(None)
        assert ret[0] == ConsumerStateCompletionEvent.SNAPSHOT_INIT_RECEIVED
        assert kafka_consumer.commit_calls == 0

    @patch("snuba.consumers.strict_consumer.StrictConsumer._create_consumer")
    def test_snapshot_loaded(self, create_consumer) -> None:
        kafka_consumer = FakeConfluentKafkaConsumer()
        kafka_consumer.items = [
            build_confluent_kafka_message(
                0,
                0,
                b'{"snapshot-id":"abc123", "product":"somewhere-else", "tables": [], "event":"snapshot-init"}',
                False,
            ),
            build_confluent_kafka_message(
                1,
                0,
                b'{"snapshot-id":"abc123", "product":"snuba", "tables": ["sentry_groupedmessage"], "event":"snapshot-init"}',
                False,
            ),
            build_confluent_kafka_message(
                2,
                0,
                (b'{"snapshot-id":"abc123", "event":"snapshot-loaded",'
                 b'"transaction-info": {"xmin":123, "xmax":124, "xip-list": []}'
                 b"}"),
                False,
            ),
            build_confluent_kafka_message(0, 0, None, True),
        ]
        create_consumer.return_value = kafka_consumer

        bootstrap = BootstrapState(
            "cdc_control",
            self.broker_config,
            "something",
            get_cdc_storage(StorageKey.GROUPEDMESSAGES),
        )

        ret = bootstrap.handle(None)
        assert ret[0] == ConsumerStateCompletionEvent.SNAPSHOT_READY_RECEIVED
        assert kafka_consumer.commit_calls == 2
Ejemplo n.º 13
0
class KafkaStreamsTestCase(StreamsTestMixin[KafkaPayload], TestCase):

    configuration = get_default_kafka_configuration()

    @contextlib.contextmanager
    def get_topic(self, partitions: int = 1) -> Iterator[Topic]:
        name = f"test-{uuid.uuid1().hex}"
        client = AdminClient(self.configuration)
        [[key, future]] = client.create_topics(
            [NewTopic(name, num_partitions=partitions,
                      replication_factor=1)]).items()
        assert key == name
        assert future.result() is None
        try:
            yield Topic(name)
        finally:
            [[key, future]] = client.delete_topics([name]).items()
            assert key == name
            assert future.result() is None

    def get_consumer(
        self,
        group: Optional[str] = None,
        enable_end_of_partition: bool = True,
        auto_offset_reset: str = "earliest",
    ) -> KafkaConsumer:
        return KafkaConsumer(
            {
                **self.configuration,
                "auto.offset.reset": auto_offset_reset,
                "enable.auto.commit": "false",
                "enable.auto.offset.store": "false",
                "enable.partition.eof": enable_end_of_partition,
                "group.id": group if group is not None else uuid.uuid1().hex,
                "session.timeout.ms": 10000,
            }, )

    def get_producer(self) -> KafkaProducer:
        return KafkaProducer(self.configuration)

    def get_payloads(self) -> Iterator[KafkaPayload]:
        for i in itertools.count():
            yield KafkaPayload(None, f"{i}".encode("utf8"), [])

    def test_auto_offset_reset_earliest(self) -> None:
        with self.get_topic() as topic:
            with closing(self.get_producer()) as producer:
                producer.produce(topic, next(self.get_payloads())).result(5.0)

            with closing(self.get_consumer(
                    auto_offset_reset="earliest")) as consumer:
                consumer.subscribe([topic])

                message = consumer.poll(10.0)
                assert isinstance(message, Message)
                assert message.offset == 0

    def test_auto_offset_reset_latest(self) -> None:
        with self.get_topic() as topic:
            with closing(self.get_producer()) as producer:
                producer.produce(topic, next(self.get_payloads())).result(5.0)

            with closing(
                    self.get_consumer(auto_offset_reset="latest")) as consumer:
                consumer.subscribe([topic])

                try:
                    consumer.poll(10.0)  # XXX: getting the subcription is slow
                except EndOfPartition as error:
                    assert error.partition == Partition(topic, 0)
                    assert error.offset == 1
                else:
                    raise AssertionError("expected EndOfPartition error")

    def test_auto_offset_reset_error(self) -> None:
        with self.get_topic() as topic:
            with closing(self.get_producer()) as producer:
                producer.produce(topic, next(self.get_payloads())).result(5.0)

            with closing(
                    self.get_consumer(auto_offset_reset="error")) as consumer:
                consumer.subscribe([topic])

                with pytest.raises(ConsumerError):
                    consumer.poll(10.0)  # XXX: getting the subcription is slow

    def test_commit_log_consumer(self) -> None:
        # XXX: This would be better as an integration test (or at least a test
        # against an abstract Producer interface) instead of against a test against
        # a mock.
        commit_log_producer = FakeConfluentKafkaProducer()

        consumer: KafkaConsumer = KafkaConsumerWithCommitLog(
            {
                **self.configuration,
                "auto.offset.reset": "earliest",
                "enable.auto.commit": "false",
                "enable.auto.offset.store": "false",
                "enable.partition.eof": "true",
                "group.id": "test",
                "session.timeout.ms": 10000,
            },
            producer=commit_log_producer,
            commit_log_topic=Topic("commit-log"),
        )

        with self.get_topic() as topic, closing(consumer) as consumer:
            with closing(self.get_producer()) as producer:
                producer.produce(topic, next(self.get_payloads())).result(5.0)

            consumer.subscribe([topic])

            message = consumer.poll(
                10.0)  # XXX: getting the subscription is slow
            assert isinstance(message, Message)

            consumer.stage_offsets({message.partition: message.next_offset})

            assert consumer.commit_offsets() == {
                Partition(topic, 0): message.next_offset
            }

            assert len(commit_log_producer.messages) == 1
            commit_message = commit_log_producer.messages[0]
            assert commit_message.topic() == "commit-log"

            assert commit_codec.decode(
                KafkaPayload(
                    commit_message.key(),
                    commit_message.value(),
                    commit_message.headers(),
                )) == Commit("test", Partition(topic, 0), message.next_offset)