Ejemplo n.º 1
0
def test_collect() -> None:
    step_factory = Mock()
    step_factory.return_value = inner_step = Mock()

    commit_function = Mock()
    partition = Partition(Topic("topic"), 0)
    messages = message_generator(partition, 0)

    collect_step = CollectStep(step_factory, commit_function, 2, 60)

    # A batch should be started the first time the step receives a message.
    with assert_changes(lambda: step_factory.call_count, 0, 1):
        collect_step.poll()
        collect_step.submit(next(messages))  # offset 0

    # Subsequent messages should reuse the existing batch, ...
    with assert_does_not_change(lambda: step_factory.call_count, 1):
        collect_step.poll()
        collect_step.submit(next(messages))  # offset 1

    # ...until we hit the batch size limit.
    with assert_changes(lambda: inner_step.close.call_count,
                        0, 1), assert_changes(
                            lambda: inner_step.join.call_count, 0,
                            1), assert_changes(
                                lambda: commit_function.call_count, 0, 1):
        collect_step.poll()
        assert commit_function.call_args == call({partition: 2})

    step_factory.return_value = inner_step = Mock()

    # The next message should create a new batch.
    with assert_changes(lambda: step_factory.call_count, 1, 2):
        collect_step.submit(next(messages))

    with assert_changes(lambda: inner_step.close.call_count, 0, 1):
        collect_step.close()

    with assert_changes(lambda: inner_step.join.call_count, 0,
                        1), assert_changes(lambda: commit_function.call_count,
                                           1, 2):
        collect_step.join()
Ejemplo n.º 2
0
def test_message_batch() -> None:
    partition = Partition(Topic("test"), 0)

    with SharedMemoryManager() as smm:
        block = smm.SharedMemory(4096)
        assert block.size == 4096

        message = Message(partition, 0, KafkaPayload(None, b"\x00" * 4000,
                                                     None), datetime.now())

        batch: MessageBatch[KafkaPayload] = MessageBatch(block)
        with assert_changes(lambda: len(batch), 0, 1):
            batch.append(message)

        assert batch[0] == message
        assert list(batch) == [message]

        with assert_does_not_change(lambda: len(batch),
                                    1), pytest.raises(ValueTooLarge):
            batch.append(message)
Ejemplo n.º 3
0
    def __commit(self) -> Mapping[Partition, int]:
        if self.__state in {
                KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR
        }:
            raise InvalidState(self.__state)

        result: Optional[Sequence[ConfluentTopicPartition]]

        if self.__staged_offsets:
            result = self.__consumer.commit(
                offsets=[
                    ConfluentTopicPartition(partition.topic.name,
                                            partition.index, offset)
                    for partition, offset in self.__staged_offsets.items()
                ],
                asynchronous=False,
            )
        else:
            result = []

        assert result is not None  # synchronous commit should return result immediately

        self.__staged_offsets.clear()

        offsets: MutableMapping[Partition, int] = {}

        for value in result:
            # The Confluent Kafka Consumer will include logical offsets in the
            # sequence of ``Partition`` objects returned by ``commit``. These
            # are an implementation detail of the Kafka Consumer, so we don't
            # expose them here.
            # NOTE: These should no longer be seen now that we are forcing
            # offsets to be set as part of the assignment callback.
            if value.offset in self.LOGICAL_OFFSETS:
                continue

            assert value.offset >= 0, "expected non-negative offset"
            offsets[Partition(Topic(value.topic),
                              value.partition)] = value.offset

        return offsets
Ejemplo n.º 4
0
    def produce(self, destination: Union[Topic, Partition],
                payload: TPayload) -> Future[Message[TPayload]]:
        with self.__lock:
            assert not self.__closed

            partition: Partition
            if isinstance(destination, Topic):
                partition = Partition(destination, 0)  # TODO: Randomize?
            elif isinstance(destination, Partition):
                partition = destination
            else:
                raise TypeError("invalid destination type")

            future: Future[Message[TPayload]] = Future()
            future.set_running_or_notify_cancel()
            try:
                message = self.__broker.produce(partition, payload)
                future.set_result(message)
            except Exception as e:
                future.set_exception(e)
            return future
Ejemplo n.º 5
0
        def revocation_callback(
                consumer: ConfluentConsumer,
                partitions: Sequence[ConfluentTopicPartition]) -> None:
            self.__state = KafkaConsumerState.REVOKING

            partitions = [
                Partition(Topic(i.topic), i.partition) for i in partitions
            ]

            try:
                if on_revoke is not None:
                    on_revoke(partitions)
            finally:
                for partition in partitions:
                    # Staged offsets are deleted during partition revocation to
                    # prevent later committing offsets for partitions that are
                    # no longer owned by this consumer.
                    if partition in self.__staged_offsets:
                        logger.warning(
                            "Dropping staged offset for revoked partition (%r)!",
                            partition,
                        )
                        del self.__staged_offsets[partition]

                    try:
                        self.__offsets.pop(partition)
                    except KeyError:
                        # If there was an error during assignment, this
                        # partition may have never been added to the offsets
                        # mapping.
                        logger.warning(
                            "failed to delete offset for unknown partition: %r",
                            partition,
                        )

                    self.__paused.discard(partition)

                self.__state = KafkaConsumerState.CONSUMING
Ejemplo n.º 6
0
    def test_merge_insert(self):
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.write_raw_events(self.event)

        assert self._issue_count(self.project_id) == [{
            "count": 1,
            "group_id": 1
        }]

        timestamp = datetime.now(tz=pytz.utc)

        project_id = self.project_id

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    "end_merge",
                    {
                        "project_id": project_id,
                        "new_group_id": 2,
                        "previous_group_ids": [1],
                        "datetime":
                        timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        assert self._issue_count(1) == [{"count": 1, "group_id": 2}]
Ejemplo n.º 7
0
    def subscribe(self, consumer: DummyConsumer[TPayload],
                  topics: Sequence[Topic]) -> Mapping[Partition, int]:
        with self.__lock:
            if self.__subscriptions[consumer.group]:
                # XXX: Consumer group balancing is not currently implemented.
                if consumer not in self.__subscriptions[consumer.group]:
                    raise NotImplementedError

                # XXX: Updating an existing subscription is currently not implemented.
                if self.__subscriptions[consumer.group][consumer] != topics:
                    raise NotImplementedError

            self.__subscriptions[consumer.group][consumer] = topics

            assignment: MutableMapping[Partition, int] = {}

            for topic in self.__topics.keys() & set(topics):
                for index in range(len(self.__topics[topic])):
                    partition = Partition(topic, index)
                    # TODO: Handle offset reset more realistically.
                    assignment[partition] = self.__offsets[consumer.group].get(
                        partition, 0)

        return assignment
Ejemplo n.º 8
0
    def eventstream(*, dataset: Dataset):
        ensure_table_exists(dataset)
        record = json.loads(http_request.data)

        version = record[0]
        if version != 2:
            raise RuntimeError("Unsupported protocol version: %s" % record)

        message: Message[KafkaPayload] = Message(
            Partition(Topic("topic"), 0),
            0,
            KafkaPayload(None, http_request.data),
            datetime.now(),
        )

        type_ = record[1]
        if type_ == "insert":
            from snuba.consumer import ConsumerWorker

            storage = dataset.get_writable_storage()

            worker = ConsumerWorker(storage, metrics=metrics)
        else:
            from snuba.replacer import ReplacerWorker

            storage = dataset.get_writable_storage()
            assert storage is not None

            worker = ReplacerWorker(clickhouse_rw, storage, metrics=metrics)

        processed = worker.process_message(message)
        if processed is not None:
            batch = [processed]
            worker.flush_batch(batch)

        return ("ok", 200, {"Content-Type": "text/plain"})
Ejemplo n.º 9
0
    def __delivery_callback(
        self,
        future: Future[Message[TPayload]],
        payload: TPayload,
        error: KafkaError,
        message: ConfluentMessage,
    ) -> None:
        if error is not None:
            future.set_exception(TransportError(error))
        else:
            try:
                timestamp_type, timestamp_value = message.timestamp()
                if timestamp_type is TIMESTAMP_NOT_AVAILABLE:
                    raise ValueError("timestamp not available")

                future.set_result(
                    Message(
                        Partition(Topic(message.topic()), message.partition()),
                        message.offset(),
                        payload,
                        datetime.utcfromtimestamp(timestamp_value / 1000.0),
                    ))
            except Exception as error:
                future.set_exception(error)
Ejemplo n.º 10
0
    def test_storage(self) -> None:
        topic = Topic(uuid.uuid1().hex)
        partitions = 3

        self.storage.create_topic(topic, partitions)

        assert [*self.storage.list_topics()] == [topic]

        assert self.storage.get_partition_count(topic) == partitions

        with pytest.raises(TopicExists):
            self.storage.create_topic(topic, partitions)

        with pytest.raises(TopicDoesNotExist):
            self.storage.get_partition_count(Topic("invalid"))

        with pytest.raises(TopicDoesNotExist):
            self.storage.consume(Partition(Topic("invalid"), 0), 0)

        with pytest.raises(TopicDoesNotExist):
            self.storage.produce(Partition(Topic("invalid"), 0), 0,
                                 datetime.now())

        with pytest.raises(PartitionDoesNotExist):
            self.storage.consume(Partition(topic, -1), 0)

        with pytest.raises(PartitionDoesNotExist):
            self.storage.consume(Partition(topic, partitions + 1), 0)

        with pytest.raises(PartitionDoesNotExist):
            self.storage.produce(Partition(topic, -1), 0, datetime.now())

        with pytest.raises(PartitionDoesNotExist):
            self.storage.produce(Partition(topic, partitions + 1), 0,
                                 datetime.now())

        self.storage.delete_topic(topic)

        with pytest.raises(TopicDoesNotExist):
            self.storage.delete_topic(topic)
Ejemplo n.º 11
0
 def assignment_callback(offsets: Mapping[Partition, int]) -> None:
     synchronized_consumer.pause([Partition(topic, 0)])
Ejemplo n.º 12
0
def test_tick_consumer_non_monotonic() -> None:
    topic = Topic("messages")
    partition = Partition(topic, 0)

    clock = TestingClock(epoch.timestamp())
    broker: DummyBroker[int] = DummyBroker(clock)
    broker.create_topic(topic, partitions=1)

    producer: DummyProducer[int] = DummyProducer(broker)

    inner_consumer: Consumer[int] = DummyConsumer(broker, "group")

    consumer = TickConsumer(inner_consumer)

    consumer.subscribe([topic])

    producer.produce(partition, 0)

    clock.sleep(1)

    producer.produce(partition, 1)

    with assert_changes(inner_consumer.tell, {partition: 0},
                        {partition: 1}), assert_does_not_change(
                            consumer.tell, {partition: 0}):
        assert consumer.poll() is None

    with assert_changes(inner_consumer.tell, {partition: 1},
                        {partition: 2}), assert_changes(
                            consumer.tell, {partition: 0}, {partition: 1}):
        assert consumer.poll() == Message(
            partition,
            0,
            Tick(
                offsets=Interval(0, 1),
                timestamps=Interval(epoch, epoch + timedelta(seconds=1)),
            ),
            epoch + timedelta(seconds=1),
        )

    clock.sleep(-1)

    producer.produce(partition, 2)

    with assert_changes(inner_consumer.tell, {partition: 2},
                        {partition: 3}), assert_does_not_change(
                            consumer.tell, {partition: 1}):
        assert consumer.poll() is None

    clock.sleep(2)

    producer.produce(partition, 3)

    with assert_changes(inner_consumer.tell, {partition: 3},
                        {partition: 4}), assert_changes(
                            consumer.tell, {partition: 1}, {partition: 3}):
        assert consumer.poll() == Message(
            partition,
            1,
            Tick(
                offsets=Interval(1, 3),
                timestamps=Interval(epoch + timedelta(seconds=1),
                                    epoch + timedelta(seconds=2)),
            ),
            epoch + timedelta(seconds=2),
        )
Ejemplo n.º 13
0
def test_synchronized_consumer_pause_resume(
        broker: Broker[KafkaPayload]) -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker.create_topic(topic, partitions=1)
    broker.create_topic(commit_log_topic, partitions=1)

    consumer = broker.get_consumer("consumer")
    producer = broker.get_producer()
    commit_log_consumer = broker.get_consumer("commit-log-consumer")

    messages = [
        producer.produce(topic, KafkaPayload(None, f"{i}".encode("utf8"),
                                             [])).result(1.0) for i in range(2)
    ]

    synchronized_consumer: Consumer[KafkaPayload] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader"},
    )

    with closing(synchronized_consumer):

        def assignment_callback(offsets: Mapping[Partition, int]) -> None:
            synchronized_consumer.pause([Partition(topic, 0)])

        synchronized_consumer.subscribe([topic], on_assign=assignment_callback)

        with assert_changes(synchronized_consumer.paused, [],
                            [Partition(topic, 0)]), assert_changes(
                                consumer.paused, [], [Partition(topic, 0)]):
            assert synchronized_consumer.poll(0.0) is None

        # Advancing the commit log offset should not cause the consumer to
        # resume, since it has been explicitly paused.
        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader", Partition(topic, 0),
                           messages[0].next_offset)),
            ).result(),
        )

        with assert_does_not_change(consumer.paused, [Partition(topic, 0)]):
            assert synchronized_consumer.poll(0) is None

        # Resuming the partition does not immediately cause the partition to
        # resume, but it should look as if it is resumed to the caller.
        with assert_changes(synchronized_consumer.paused,
                            [Partition(topic, 0)], []), assert_does_not_change(
                                consumer.paused, [Partition(topic, 0)]):
            synchronized_consumer.resume([Partition(topic, 0)])

        # The partition should be resumed on the next poll call, however.
        with assert_changes(consumer.paused, [Partition(topic, 0)], []):
            assert synchronized_consumer.poll(0) == messages[0]

        # Pausing due to hitting the offset fence should not appear as a paused
        # partition to the caller.
        with assert_does_not_change(synchronized_consumer.paused,
                                    []), assert_changes(
                                        consumer.paused, [],
                                        [Partition(topic, 0)]):
            assert synchronized_consumer.poll(0) is None

        # Other pause and resume actions should not cause the inner consumer to
        # change its state while up against the fence.
        with assert_changes(synchronized_consumer.paused, [],
                            [Partition(topic, 0)]), assert_does_not_change(
                                consumer.paused, [Partition(topic, 0)]):
            synchronized_consumer.pause([Partition(topic, 0)])

        with assert_changes(synchronized_consumer.paused,
                            [Partition(topic, 0)], []), assert_does_not_change(
                                consumer.paused, [Partition(topic, 0)]):
            synchronized_consumer.resume([Partition(topic, 0)])
Ejemplo n.º 14
0
    def poll(self,
             timeout: Optional[float] = None) -> Optional[Message[TPayload]]:
        """
        Return the next message available to be consumed, if one is
        available. If no message is available, this method will block up to
        the ``timeout`` value before returning ``None``. A timeout of
        ``0.0`` represents "do not block", while a timeout of ``None``
        represents "block until a message is available (or forever)".

        Calling this method may also invoke subscription state change
        callbacks.

        This method may also raise an ``EndOfPartition`` error (a subtype of
        ``ConsumerError``) when the consumer has reached the end of a
        partition that it is subscribed to and no additional messages are
        available. The ``partition`` attribute of the raised exception
        specifies the end which partition has been reached. (Since this
        consumer is multiplexing a set of partitions, this exception does not
        mean that *all* of the partitions that the consumer is subscribed to
        do not have any messages, just that it has reached the end of one of
        them. This also does not mean that additional messages won't be
        available in future poll calls.) Not every backend implementation
        supports this feature or is configured to raise in this scenario.

        Raises an ``InvalidState`` exception if called on a closed consumer.

        Raises a ``TransportError`` for various other consumption-related
        errors.
        """
        if self.__state is not KafkaConsumerState.CONSUMING:
            raise InvalidState(self.__state)

        message: Optional[ConfluentMessage] = self.__consumer.poll(
            *[timeout] if timeout is not None else [])
        if message is None:
            return None

        error: Optional[KafkaError] = message.error()
        if error is not None:
            code = error.code()
            if code == KafkaError._PARTITION_EOF:
                raise EndOfPartition(
                    Partition(Topic(message.topic()), message.partition()),
                    message.offset(),
                )
            elif code == KafkaError._TRANSPORT:
                raise TransportError(str(error))
            else:
                raise ConsumerError(str(error))

        headers: Optional[Headers] = message.headers()
        result = Message(
            Partition(Topic(message.topic()), message.partition()),
            message.offset(),
            self.__codec.decode(
                KafkaPayload(
                    message.key(),
                    message.value(),
                    headers if headers is not None else [],
                )),
            datetime.utcfromtimestamp(message.timestamp()[1] / 1000.0),
        )

        self.__offsets[result.partition] = result.get_next_offset()

        return result
Ejemplo n.º 15
0
def test_synchronized_consumer_pause_resume() -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker: DummyBroker[int] = DummyBroker()
    broker.create_topic(topic, partitions=1)
    consumer: Consumer[int] = DummyConsumer(broker, "consumer")
    producer: Producer[int] = DummyProducer(broker)
    messages = [producer.produce(topic, i).result(1.0) for i in range(2)]

    commit_log_broker: DummyBroker[Commit] = DummyBroker()
    commit_log_broker.create_topic(commit_log_topic, partitions=1)
    commit_log_consumer: Consumer[Commit] = DummyConsumer(
        commit_log_broker, "commit-log-consumer")
    commit_log_producer: Producer[Commit] = DummyProducer(commit_log_broker)

    synchronized_consumer: Consumer[int] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader"},
    )

    with closing(synchronized_consumer):
        synchronized_consumer.subscribe([topic])

        # TODO: This test is not ideal -- there are no guarantees that the
        # commit log worker has subscribed and started polling yet.
        with assert_changes(synchronized_consumer.paused, [],
                            [Partition(topic, 0)]), assert_changes(
                                consumer.paused, [], [Partition(topic, 0)]):
            synchronized_consumer.pause([Partition(topic, 0)])

        # Advancing the commit log offset should not cause the consumer to
        # resume, since it has been explicitly paused.
        wait_for_consumer(
            commit_log_consumer,
            commit_log_producer.produce(
                commit_log_topic,
                Commit("leader", Partition(topic, 0),
                       messages[0].get_next_offset()),
            ).result(),
        )

        with assert_does_not_change(consumer.paused, [Partition(topic, 0)]):
            assert synchronized_consumer.poll(0) is None

        # Resuming the partition does not immediately cause the partition to
        # resume, but it should look as if it is resumed to the caller.
        with assert_changes(synchronized_consumer.paused,
                            [Partition(topic, 0)], []), assert_does_not_change(
                                consumer.paused, [Partition(topic, 0)]):
            synchronized_consumer.resume([Partition(topic, 0)])

        # The partition should be resumed on the next poll call, however.
        with assert_changes(consumer.paused, [Partition(topic, 0)], []):
            assert synchronized_consumer.poll(0) == messages[0]

        # Pausing due to hitting the offset fence should not appear as a paused
        # partition to the caller.
        with assert_does_not_change(synchronized_consumer.paused,
                                    []), assert_changes(
                                        consumer.paused, [],
                                        [Partition(topic, 0)]):
            assert synchronized_consumer.poll(0) is None

        # Other pause and resume actions should not cause the inner consumer to
        # change its state while up against the fence.
        with assert_changes(synchronized_consumer.paused, [],
                            [Partition(topic, 0)]), assert_does_not_change(
                                consumer.paused, [Partition(topic, 0)]):
            synchronized_consumer.pause([Partition(topic, 0)])

        with assert_changes(synchronized_consumer.paused,
                            [Partition(topic, 0)], []), assert_does_not_change(
                                consumer.paused, [Partition(topic, 0)]):
            synchronized_consumer.resume([Partition(topic, 0)])
Ejemplo n.º 16
0
def test_parallel_transform_step() -> None:
    next_step = Mock()

    messages = [
        Message(
            Partition(Topic("test"), 0),
            i,
            KafkaPayload(None, b"\x00" * size, None),
            datetime.now(),
        ) for i, size in enumerate([1000, 1000, 2000, 2000])
    ]

    starting_processes = get_subprocess_count()
    worker_processes = 2
    manager_processes = 1
    metrics = TestingMetricsBackend()

    with assert_changes(
            get_subprocess_count,
            starting_processes,
            starting_processes + worker_processes + manager_processes,
    ), assert_changes(
            lambda: metrics.calls,
        [],
        [
            GaugeCall("batches_in_progress", value, tags=None)
            for value in [0.0, 1.0, 2.0]
        ],
    ):
        transform_step = ParallelTransformStep(
            transform_payload_expand,
            next_step,
            processes=worker_processes,
            max_batch_size=5,
            max_batch_time=60,
            input_block_size=4096,
            output_block_size=4096,
            metrics=metrics,
        )

        for message in messages:
            transform_step.poll()
            transform_step.submit(message)

        transform_step.close()

    metrics.calls.clear()

    with assert_changes(
            get_subprocess_count,
            starting_processes + worker_processes + manager_processes,
            starting_processes,
    ), assert_changes(
            lambda: metrics.calls,
        [],
        [
            GaugeCall("batches_in_progress", value, tags=None)
            for value in [1.0, 0.0]
        ],
    ):
        transform_step.join()

    assert next_step.submit.call_count == len(messages)
Ejemplo n.º 17
0
def test_tick_consumer() -> None:
    topic = Topic("messages")

    broker: DummyBroker[int] = DummyBroker()
    broker.create_topic(topic, partitions=2)

    producer: DummyProducer[int] = DummyProducer(broker)
    for partition, payloads in enumerate([[0, 1, 2], [0]]):
        for payload in payloads:
            producer.produce(Partition(topic, partition), payload).result()

    inner_consumer: Consumer[int] = DummyConsumer(broker, "group")

    consumer = TickConsumer(inner_consumer)

    consumer.subscribe([topic])

    assert consumer.tell() == {
        Partition(topic, 0): 0,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 0,
        Partition(topic, 1): 0,
    }

    # consume 0, 0
    assert consumer.poll() is None

    assert consumer.tell() == {
        Partition(topic, 0): 0,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 1,
        Partition(topic, 1): 0,
    }

    # consume 0, 1
    assert consumer.poll() == Message(
        Partition(topic, 0),
        0,
        Tick(offsets=Interval(0, 1), timestamps=Interval(epoch, epoch)),
        epoch,
    )

    assert consumer.tell() == {
        Partition(topic, 0): 1,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 2,
        Partition(topic, 1): 0,
    }

    # consume 0, 2
    assert consumer.poll() == Message(
        Partition(topic, 0),
        1,
        Tick(offsets=Interval(1, 2), timestamps=Interval(epoch, epoch)),
        epoch,
    )

    assert consumer.tell() == {
        Partition(topic, 0): 2,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 3,
        Partition(topic, 1): 0,
    }

    # consume 1, 0
    assert consumer.poll() is None

    assert consumer.tell() == {
        Partition(topic, 0): 2,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 3,
        Partition(topic, 1): 1,
    }

    # consume no message
    assert consumer.poll() is None

    assert consumer.tell() == {
        Partition(topic, 0): 2,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 3,
        Partition(topic, 1): 1,
    }

    consumer.seek({Partition(topic, 0): 1})

    assert consumer.tell() == {
        Partition(topic, 0): 1,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 1,
        Partition(topic, 1): 1,
    }

    # consume 0, 1
    assert consumer.poll() is None

    assert consumer.tell() == {
        Partition(topic, 0): 1,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 2,
        Partition(topic, 1): 1,
    }

    # consume 0, 2
    assert consumer.poll() == Message(
        Partition(topic, 0),
        1,
        Tick(offsets=Interval(1, 2), timestamps=Interval(epoch, epoch)),
        epoch,
    )

    assert consumer.tell() == {
        Partition(topic, 0): 2,
        Partition(topic, 1): 0,
    }

    assert inner_consumer.tell() == {
        Partition(topic, 0): 3,
        Partition(topic, 1): 1,
    }

    with pytest.raises(ConsumerError):
        consumer.seek({Partition(topic, -1): 0})
Ejemplo n.º 18
0
def test_commit_codec() -> None:
    commit = Commit("group", Partition(Topic("topic"), 0), 0)
    assert commit_codec.decode(commit_codec.encode(commit)) == commit
Ejemplo n.º 19
0
    def test_working_offsets(self) -> None:
        payloads = self.get_payloads()

        with self.get_topic() as topic:
            with closing(self.get_producer()) as producer:
                messages = [
                    producer.produce(topic, next(payloads)).result(5.0)
                ]

            def on_assign(partitions: Mapping[Partition, int]) -> None:
                # NOTE: This will eventually need to be controlled by a generalized
                # consumer auto offset reset setting.
                assert (partitions == consumer.tell() == {
                    messages[0].partition: messages[0].offset
                })

            consumer = self.get_consumer()
            consumer.subscribe([topic], on_assign=on_assign)

            for i in range(5):
                message = consumer.poll(1.0)
                if message is not None:
                    break
                else:
                    time.sleep(1.0)
            else:
                raise Exception("assignment never received")

            assert message == messages[0]

            # The first call to ``poll`` should raise ``EndOfPartition``. It
            # should be otherwise be safe to try to read the first missing
            # offset (index) in the partition.
            with assert_does_not_change(
                    consumer.tell, {message.partition: message.next_offset
                                    }), pytest.raises(EndOfPartition):
                consumer.poll(1.0) is None

            # It should be otherwise be safe to try to read the first missing
            # offset (index) in the partition.
            with assert_does_not_change(
                    consumer.tell, {message.partition: message.next_offset}):
                assert consumer.poll(1.0) is None

            with assert_changes(
                    consumer.tell,
                {message.partition: message.next_offset},
                {message.partition: message.offset},
            ):
                consumer.seek({message.partition: message.offset})

            with assert_changes(
                    consumer.tell,
                {message.partition: message.offset},
                {message.partition: message.next_offset},
            ):
                assert consumer.poll(1.0) == messages[0]

            # Seeking beyond the first missing index should work but subsequent
            # reads should error. (We don't know if this offset is valid or not
            # until we try to fetch a message.)
            with assert_changes(
                    consumer.tell,
                {message.partition: message.next_offset},
                {message.partition: message.next_offset + 1},
            ):
                consumer.seek({message.partition: message.next_offset + 1})

            # Offsets should not be advanced after a failed poll.
            with assert_does_not_change(
                    consumer.tell,
                {message.partition: message.next_offset + 1
                 }), pytest.raises(ConsumerError):
                consumer.poll(1.0)

            # Trying to seek on an unassigned partition should error.
            with assert_does_not_change(
                    consumer.tell,
                {message.partition: message.next_offset + 1
                 }), pytest.raises(ConsumerError):
                consumer.seek({message.partition: 0, Partition(topic, -1): 0})

            # Trying to seek to a negative offset should error.
            with assert_does_not_change(
                    consumer.tell,
                {message.partition: message.next_offset + 1
                 }), pytest.raises(ConsumerError):
                consumer.seek({message.partition: -1})
Ejemplo n.º 20
0
    def test_pause_resume(self) -> None:
        payloads = self.get_payloads()

        with self.get_topic() as topic, closing(
                self.get_consumer()) as consumer, closing(
                    self.get_producer()) as producer:
            messages = [
                producer.produce(topic, next(payloads)).result(timeout=5.0)
                for i in range(5)
            ]

            consumer.subscribe([topic])

            assert consumer.poll(10.0) == messages[0]
            assert consumer.paused() == []

            # XXX: Unfortunately, there is really no way to prove that this
            # consumer would return the message other than by waiting a while.
            with assert_changes(consumer.paused, [], [Partition(topic, 0)]):
                consumer.pause([Partition(topic, 0)])

            assert consumer.poll(1.0) is None

            # We should pick up where we left off when we resume the partition.
            with assert_changes(consumer.paused, [Partition(topic, 0)], []):
                consumer.resume([Partition(topic, 0)])

            assert consumer.poll(5.0) == messages[1]

            # Calling ``seek`` should have a side effect, even if no messages
            # are consumed before calling ``pause``.
            with assert_changes(
                    consumer.tell,
                {Partition(topic, 0): messages[1].next_offset},
                {Partition(topic, 0): messages[3].offset},
            ):
                consumer.seek({Partition(topic, 0): messages[3].offset})
                consumer.pause([Partition(topic, 0)])
                assert consumer.poll(1.0) is None
                consumer.resume([Partition(topic, 0)])

            assert consumer.poll(5.0) == messages[3]

            # It is still allowable to call ``seek`` on a paused partition.
            # When consumption resumes, we would expect to see the side effect
            # of that seek.
            consumer.pause([Partition(topic, 0)])
            with assert_changes(
                    consumer.tell,
                {Partition(topic, 0): messages[3].next_offset},
                {Partition(topic, 0): messages[0].offset},
            ):
                consumer.seek({Partition(topic, 0): messages[0].offset})
                assert consumer.poll(1.0) is None
                consumer.resume([Partition(topic, 0)])

            assert consumer.poll(5.0) == messages[0]

            with assert_does_not_change(consumer.paused,
                                        []), pytest.raises(ConsumerError):
                consumer.pause([Partition(topic, 0), Partition(topic, 1)])

            with assert_changes(consumer.paused, [], [Partition(topic, 0)]):
                consumer.pause([Partition(topic, 0)])

            with assert_does_not_change(
                    consumer.paused,
                [Partition(topic, 0)]), pytest.raises(ConsumerError):
                consumer.resume([Partition(topic, 0), Partition(topic, 1)])
Ejemplo n.º 21
0
def test_topic_contains_partition() -> None:
    assert Partition(Topic("topic"), 0) in Topic("topic")
    assert Partition(Topic("topic"), 0) not in Topic("other-topic")
    assert Partition(Topic("other-topic"), 0) not in Topic("topic")
Ejemplo n.º 22
0
def test_synchronized_consumer(broker: Broker[KafkaPayload]) -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker.create_topic(topic, partitions=1)
    broker.create_topic(commit_log_topic, partitions=1)

    consumer = broker.get_consumer("consumer")
    producer = broker.get_producer()
    commit_log_consumer = broker.get_consumer("commit-log-consumer")

    messages = [
        producer.produce(topic, KafkaPayload(None, f"{i}".encode("utf8"),
                                             [])).result(1.0) for i in range(6)
    ]

    synchronized_consumer: Consumer[KafkaPayload] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader-a", "leader-b"},
    )

    with closing(synchronized_consumer):
        synchronized_consumer.subscribe([topic])

        # The consumer should not consume any messages until it receives a
        # commit from both groups that are being followed.
        with assert_changes(consumer.paused, [],
                            [Partition(topic, 0)]), assert_changes(
                                consumer.tell, {},
                                {Partition(topic, 0): messages[0].offset}):
            assert synchronized_consumer.poll(0.0) is None

        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader-a", Partition(topic, 0),
                           messages[0].next_offset)),
            ).result(),
        )

        # The consumer should remain paused, since it needs both groups to
        # advance before it may continue.
        with assert_does_not_change(
                consumer.paused,
            [Partition(topic, 0)]), assert_does_not_change(
                consumer.tell, {Partition(topic, 0): messages[0].offset}):
            assert synchronized_consumer.poll(0.0) is None

        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader-b", Partition(topic, 0),
                           messages[0].next_offset)),
            ).result(),
        )

        # The consumer should be able to resume consuming, since both consumers
        # have processed the first message.
        with assert_changes(consumer.paused, [Partition(topic, 0)],
                            []), assert_changes(
                                consumer.tell,
                                {Partition(topic, 0): messages[0].offset},
                                {Partition(topic, 0): messages[0].next_offset},
                            ):
            assert synchronized_consumer.poll(0.0) == messages[0]

        # After consuming the one available message, the consumer should be
        # paused again until the remote offsets advance.
        with assert_changes(consumer.paused, [],
                            [Partition(topic, 0)]), assert_does_not_change(
                                consumer.tell,
                                {Partition(topic, 0): messages[1].offset}):
            assert synchronized_consumer.poll(0.0) is None

        # Emulate the unlikely (but possible) scenario of the leader offsets
        # being within a series of compacted (deleted) messages by:
        # 1. moving the remote offsets forward, so that the partition is resumed
        # 2. seeking the consumer beyond the remote offsets

        producer.produce(
            commit_log_topic,
            commit_codec.encode(
                Commit("leader-a", Partition(topic, 0), messages[3].offset)),
        ).result()

        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader-b", Partition(topic, 0),
                           messages[5].offset)),
            ).result(),
        )

        # The consumer should be able to resume consuming, since both consumers
        # have processed the first message.
        with assert_changes(consumer.paused, [Partition(topic, 0)],
                            []), assert_changes(
                                consumer.tell,
                                {Partition(topic, 0): messages[1].offset},
                                {Partition(topic, 0): messages[1].next_offset},
                            ):
            assert synchronized_consumer.poll(0.0) == messages[1]

        # At this point, we manually seek the consumer offset, to emulate messages being skipped.
        with assert_changes(
                consumer.tell,
            {Partition(topic, 0): messages[2].offset},
            {Partition(topic, 0): messages[4].offset},
        ):
            consumer.seek({Partition(topic, 0): messages[4].offset})

        # Since the (effective) remote offset is the offset for message #3 (via
        # ``leader-a``), and the local offset is the offset of message #4, when
        # message #4 is consumed, it should be discarded and the offset should
        # be rolled back to wait for the commit log to advance.
        with assert_changes(consumer.paused, [],
                            [Partition(topic, 0)]), assert_does_not_change(
                                consumer.tell,
                                {Partition(topic, 0): messages[4].offset}):
            assert synchronized_consumer.poll(0.0) is None

        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader-a", Partition(topic, 0),
                           messages[5].offset)),
            ).result(),
        )

        # The consumer should be able to resume consuming.
        with assert_changes(consumer.paused, [Partition(topic, 0)],
                            []), assert_changes(
                                consumer.tell,
                                {Partition(topic, 0): messages[4].offset},
                                {Partition(topic, 0): messages[4].next_offset},
                            ):
            assert synchronized_consumer.poll(0.0) == messages[4]
Ejemplo n.º 23
0
    def test_flattened_tags(self):
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        # | and = are intentional to test the escaping logic when computing the
        # flattened_tags on tag deletions
        self.event["data"]["tags"] = []
        self.event["data"]["tags"].append(["browser|name", "foo=1"])
        self.event["data"]["tags"].append(["browser|to_delete", "foo=2"])
        self.event["data"]["tags"].append(["notbrowser", "foo\\3"])
        self.event["data"]["tags"].append(["notbrowser2", "foo4"])
        self.write_raw_events(self.event)

        project_id = self.project_id

        def _fetch_flattened_tags():
            return json.loads(
                self.app.post(
                    "/query",
                    data=json.dumps({
                        "project": [project_id],
                        "selected_columns": [
                            "_tags_flattened",
                            "tags.key",
                            "tags.value",
                        ],
                    }),
                ).data)["data"]

        assert _fetch_flattened_tags() == [{
            "tags.key": [
                "browser|name",
                "browser|to_delete",
                "notbrowser",
                "notbrowser2",
            ],
            "tags.value": ["foo=1", "foo=2", "foo\\3", "foo4"],
            "_tags_flattened":
            "|browser\\|name=foo\\=1||browser\\|to_delete=foo\\=2||notbrowser=foo\\\\3||notbrowser2=foo4|",
        }]

        timestamp = datetime.now(tz=pytz.utc)

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    "end_delete_tag",
                    {
                        "project_id": project_id,
                        "tag": "browser|to_delete",
                        "datetime":
                        timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        assert _fetch_flattened_tags() == [{
            "tags.key": ["browser|name", "notbrowser", "notbrowser2"],
            "tags.value": ["foo=1", "foo\\3", "foo4"],
            "_tags_flattened":
            "|browser\\|name=foo\\=1||notbrowser=foo\\\\3||notbrowser2=foo4|",
        }]
Ejemplo n.º 24
0
    def test_consumer(self) -> None:
        group = uuid.uuid1().hex
        payloads = self.get_payloads()

        with self.get_topic() as topic:
            with closing(self.get_producer()) as producer:
                messages = [
                    future.result(timeout=5.0) for future in [
                        producer.produce(topic, next(payloads))
                        for i in range(2)
                    ]
                ]

            consumer = self.get_consumer(group)

            def assignment_callback(
                    partitions: Mapping[Partition, int]) -> None:
                assignment_callback.called = True
                assert partitions == {Partition(topic, 0): messages[0].offset}

                consumer.seek({Partition(topic, 0): messages[1].offset})

                with pytest.raises(ConsumerError):
                    consumer.seek({Partition(topic, 1): 0})

            assignment_callback.called = False

            def revocation_callback(partitions: Sequence[Partition]) -> None:
                revocation_callback.called = True
                assert partitions == [Partition(topic, 0)]
                assert consumer.tell() == {
                    Partition(topic, 0): messages[1].offset
                }

                # Not sure why you'd want to do this, but it shouldn't error.
                consumer.seek({Partition(topic, 0): messages[0].offset})

            revocation_callback.called = False

            # TODO: It'd be much nicer if ``subscribe`` returned a future that we could
            # use to wait for assignment, but we'd need to be very careful to avoid
            # edge cases here. It's probably not worth the complexity for now.
            consumer.subscribe([topic],
                               on_assign=assignment_callback,
                               on_revoke=revocation_callback)

            with assert_changes(
                    lambda: assignment_callback.called, False,
                    True), assert_changes(
                        consumer.tell, {},
                        {Partition(topic, 0): messages[1].next_offset}):
                message = consumer.poll(
                    10.0)  # XXX: getting the subcription is slow

            assert isinstance(message, Message)
            assert message.partition == Partition(topic, 0)
            assert message.offset == messages[1].offset
            assert message.payload == messages[1].payload

            consumer.seek({Partition(topic, 0): messages[0].offset})
            assert consumer.tell() == {Partition(topic, 0): messages[0].offset}

            with pytest.raises(ConsumerError):
                consumer.seek({Partition(topic, 1): 0})

            with assert_changes(consumer.paused, [], [Partition(topic, 0)]):
                consumer.pause([Partition(topic, 0)])

            # Even if there is another message available, ``poll`` should
            # return ``None`` if the consumer is paused.
            assert consumer.poll(1.0) is None

            with assert_changes(consumer.paused, [Partition(topic, 0)], []):
                consumer.resume([Partition(topic, 0)])

            message = consumer.poll(1.0)
            assert isinstance(message, Message)
            assert message.partition == Partition(topic, 0)
            assert message.offset == messages[0].offset
            assert message.payload == messages[0].payload

            assert consumer.commit_offsets() == {}

            consumer.stage_offsets({message.partition: message.next_offset})

            with pytest.raises(ConsumerError):
                consumer.stage_offsets({Partition(Topic("invalid"), 0): 0})

            assert consumer.commit_offsets() == {
                Partition(topic, 0): message.next_offset
            }

            assert consumer.tell() == {Partition(topic, 0): messages[1].offset}

            consumer.unsubscribe()

            with assert_changes(lambda: revocation_callback.called, False,
                                True):
                assert consumer.poll(1.0) is None

            assert consumer.tell() == {}

            with pytest.raises(ConsumerError):
                consumer.seek({Partition(topic, 0): messages[0].offset})

            revocation_callback.called = False

            with assert_changes(lambda: consumer.closed, False,
                                True), assert_does_not_change(
                                    lambda: revocation_callback.called, False):
                consumer.close()

            # Make sure all public methods (except ``close```) error if called
            # after the consumer has been closed.

            with pytest.raises(RuntimeError):
                consumer.subscribe([topic])

            with pytest.raises(RuntimeError):
                consumer.unsubscribe()

            with pytest.raises(RuntimeError):
                consumer.poll()

            with pytest.raises(RuntimeError):
                consumer.tell()

            with pytest.raises(RuntimeError):
                consumer.seek({Partition(topic, 0): messages[0].offset})

            with pytest.raises(RuntimeError):
                consumer.pause([Partition(topic, 0)])

            with pytest.raises(RuntimeError):
                consumer.resume([Partition(topic, 0)])

            with pytest.raises(RuntimeError):
                consumer.paused()

            with pytest.raises(RuntimeError):
                consumer.stage_offsets({})

            with pytest.raises(RuntimeError):
                consumer.commit_offsets()

            consumer.close(
            )  # should be safe, even if the consumer is already closed

            consumer = self.get_consumer(group)

            revocation_callback = mock.MagicMock()

            consumer.subscribe([topic], on_revoke=revocation_callback)

            message = consumer.poll(
                10.0)  # XXX: getting the subscription is slow
            assert isinstance(message, Message)
            assert message.partition == Partition(topic, 0)
            assert message.offset == messages[1].offset
            assert message.payload == messages[1].payload

            try:
                assert consumer.poll(1.0) is None
            except EndOfPartition as error:
                assert error.partition == Partition(topic, 0)
                assert error.offset == message.next_offset
            else:
                raise AssertionError("expected EndOfPartition error")

            with assert_changes(lambda: revocation_callback.called, False,
                                True):
                consumer.close()
Ejemplo n.º 25
0
def test_stream_processor_lifecycle() -> None:
    topic = Topic("topic")

    consumer = mock.Mock()
    strategy = mock.Mock()
    factory = mock.Mock()
    factory.create.return_value = strategy

    metrics = TestingMetricsBackend()

    with assert_changes(lambda: consumer.subscribe.call_count, 0, 1):
        processor: StreamProcessor[int] = StreamProcessor(
            consumer, topic, factory, metrics)

    # The processor should accept heartbeat messages without an assignment or
    # active processor.
    consumer.poll.return_value = None
    processor._run_once()

    message = Message(Partition(topic, 0), 0, 0, datetime.now())

    # XXX: ``call().args``, ``call().kwargs`` are not available until 3.8
    subscribe_args, subscribe_kwargs = consumer.subscribe.call_args
    assert subscribe_args[0] == [topic]

    assignment_callback = subscribe_kwargs["on_assign"]
    revocation_callback = subscribe_kwargs["on_revoke"]

    # Assignment should succeed if no assignment already exxists.
    offsets = {Partition(topic, 0): 0}
    assignment_callback(offsets)

    # If ``Consumer.poll`` doesn't return a message, we should poll the
    # processing strategy, but not submit anything for processing.
    consumer.poll.return_value = None
    with assert_changes(lambda: strategy.poll.call_count, 0,
                        1), assert_does_not_change(
                            lambda: strategy.submit.call_count, 0):
        processor._run_once()

    # If ``Consumer.poll`` **does** return a message, we should poll the
    # processing strategy and submit the message for processing.
    consumer.poll.return_value = message
    with assert_changes(lambda: strategy.poll.call_count, 1,
                        2), assert_changes(lambda: strategy.submit.call_count,
                                           0, 1):
        processor._run_once()
        assert strategy.submit.call_args_list[-1] == mock.call(message)

    # If the message is rejected by the processing strategy, the consumer
    # should be paused and the message should be held for later.
    consumer.tell.return_value = offsets
    consumer.poll.return_value = message
    strategy.submit.side_effect = MessageRejected()
    with assert_changes(lambda: consumer.pause.call_count, 0, 1):
        processor._run_once()
        assert strategy.submit.call_args_list[-1] == mock.call(message)

    # If ``Consumer.poll`` returns a message when we expect it to be paused,
    # we should raise an exception.
    with pytest.raises(InvalidStateError):
        processor._run_once()

    # Once the message is accepted by the processing strategy, the consumer
    # should be resumed.
    consumer.poll.return_value = None
    strategy.submit.return_value = None
    strategy.submit.side_effect = None
    with assert_changes(lambda: consumer.resume.call_count, 0, 1):
        processor._run_once()
        assert strategy.submit.call_args_list[-1] == mock.call(message)

    metric = metrics.calls[0]
    assert isinstance(metric, Timing)
    assert metric.name == "pause_duration_ms"

    # Assignment should fail if one already exists.
    with pytest.raises(InvalidStateError):
        assignment_callback({Partition(topic, 0): 0})

    # Revocation should succeed with an active assignment, and cause the
    # strategy instance to be closed.
    with assert_changes(lambda: strategy.close.call_count, 0, 1):
        revocation_callback([Partition(topic, 0)])

    # Revocation should fail without an active assignment.
    with pytest.raises(InvalidStateError):
        revocation_callback([Partition(topic, 0)])

    # The processor should not accept non-heartbeat messages without an
    # assignment or active processor.
    consumer.poll.return_value = message
    with pytest.raises(InvalidStateError):
        processor._run_once()

    with assert_changes(lambda: consumer.close.call_count, 0, 1):
        processor._shutdown()
Ejemplo n.º 26
0
def test_message_pickling() -> None:
    message = Message(Partition(Topic("topic"), 0), 0, b"", datetime.now())
    assert pickle.loads(pickle.dumps(message)) == message