コード例 #1
0
    def test_offsets(self):
        event = self.event

        message: Message[KafkaPayload] = Message(
            Partition(Topic("events"), 456),
            123,
            KafkaPayload(
                None, json.dumps((0, "insert", event)).encode("utf-8")
            ),  # event doesn't really matter
            datetime.now(),
        )

        test_worker = ConsumerWorker(
            self.dataset,
            producer=FakeConfluentKafkaProducer(),
            replacements_topic=Topic(
                enforce_table_writer(self.dataset)
                .get_stream_loader()
                .get_replacement_topic_spec()
                .topic_name
            ),
            metrics=self.metrics,
        )
        batch = [test_worker.process_message(message)]
        test_worker.flush_batch(batch)

        assert self.clickhouse.execute(
            "SELECT project_id, event_id, offset, partition FROM %s" % self.table
        ) == [(self.event["project_id"], self.event["event_id"], 123, 456)]
コード例 #2
0
def test_synchronized_consumer_worker_crash_before_assignment(
    broker: Broker[KafkaPayload], ) -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker.create_topic(topic, partitions=1)
    broker.create_topic(commit_log_topic, partitions=1)

    poll_called = Event()

    class BrokenConsumerException(Exception):
        pass

    class BrokenConsumer(LocalConsumer[KafkaPayload]):
        def poll(
            self,
            timeout: Optional[float] = None
        ) -> Optional[Message[KafkaPayload]]:
            try:
                raise BrokenConsumerException()
            finally:
                poll_called.set()

    consumer = broker.get_consumer("consumer")
    commit_log_consumer: Consumer[KafkaPayload] = BrokenConsumer(
        broker, "commit-log-consumer")

    with pytest.raises(BrokenConsumerException):
        Consumer[KafkaPayload] = SynchronizedConsumer(
            consumer,
            commit_log_consumer,
            commit_log_topic=commit_log_topic,
            commit_log_groups={"leader"},
        )
コード例 #3
0
    def test_skip_too_old(self):
        test_worker = ConsumerWorker(
            self.dataset,
            producer=FakeConfluentKafkaProducer(),
            replacements_topic=Topic(
                enforce_table_writer(self.dataset)
                .get_stream_loader()
                .get_replacement_topic_spec()
                .topic_name
            ),
            metrics=self.metrics,
        )

        event = self.event
        old_timestamp = datetime.utcnow() - timedelta(days=300)
        old_timestamp_str = old_timestamp.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
        event["datetime"] = old_timestamp_str
        event["data"]["datetime"] = old_timestamp_str
        event["data"]["received"] = int(calendar.timegm(old_timestamp.timetuple()))

        message: Message[KafkaPayload] = Message(
            Partition(Topic("events"), 1),
            42,
            KafkaPayload(None, json.dumps((0, "insert", event)).encode("utf-8")),
            datetime.now(),
        )

        assert test_worker.process_message(message) is None
コード例 #4
0
def test_filter() -> None:
    next_step = Mock()

    def test_function(message: Message[bool]) -> bool:
        return message.payload

    filter_step = FilterStep(test_function, next_step)

    fail_message = Message(Partition(Topic("topic"), 0), 0, False,
                           datetime.now())

    with assert_does_not_change(lambda: next_step.submit.call_count, 0):
        filter_step.submit(fail_message)

    pass_message = Message(Partition(Topic("topic"), 0), 0, True,
                           datetime.now())

    with assert_changes(lambda: next_step.submit.call_count, 0, 1):
        filter_step.submit(pass_message)

    assert next_step.submit.call_args == call(pass_message)

    with assert_changes(lambda: next_step.poll.call_count, 0, 1):
        filter_step.poll()

    with assert_changes(lambda: next_step.close.call_count, 0,
                        1), assert_changes(lambda: next_step.join.call_count,
                                           0, 1):
        filter_step.join()
コード例 #5
0
def test_synchronized_consumer_worker_crash_after_assignment(
    broker: Broker[KafkaPayload], ) -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker.create_topic(topic, partitions=1)
    broker.create_topic(commit_log_topic, partitions=1)

    poll_called = Event()

    class BrokenConsumerException(Exception):
        pass

    class BrokenConsumer(LocalConsumer[KafkaPayload]):
        def poll(
            self,
            timeout: Optional[float] = None
        ) -> Optional[Message[KafkaPayload]]:
            if not self.tell():
                return super().poll(timeout)
            else:
                try:
                    raise BrokenConsumerException()
                finally:
                    poll_called.set()

    consumer: Consumer[KafkaPayload] = broker.get_consumer("consumer")
    commit_log_consumer: Consumer[KafkaPayload] = BrokenConsumer(
        broker, "commit-log-consumer")

    synchronized_consumer: Consumer[KafkaPayload] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader"},
    )

    assert poll_called.wait(1.0) is True

    # If the worker thread has exited without a close request, calling ``poll``
    # should raise an error that originated from the worker thread.

    with pytest.raises(RuntimeError) as e:
        synchronized_consumer.poll(0.0)

    assert type(e.value.__cause__) is BrokenConsumerException

    # If a close request has been sent, the normal runtime error due to the
    # closed consumer should be raised instead.

    synchronized_consumer.close()

    with pytest.raises(RuntimeError) as e:
        synchronized_consumer.poll(0.0)

    assert type(e.value.__cause__) is not BrokenConsumerException
コード例 #6
0
def test_synchronized_consumer_handles_end_of_partition(
    broker: Broker[KafkaPayload], ) -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker.create_topic(topic, partitions=1)
    broker.create_topic(commit_log_topic, partitions=1)

    consumer = broker.get_consumer("consumer", enable_end_of_partition=True)
    producer = broker.get_producer()
    commit_log_consumer = broker.get_consumer("commit-log-consumer")

    messages = [
        producer.produce(topic, KafkaPayload(None, f"{i}".encode("utf8"),
                                             [])).result(1.0) for i in range(2)
    ]

    synchronized_consumer: Consumer[KafkaPayload] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader"},
    )

    with closing(synchronized_consumer):
        synchronized_consumer.subscribe([topic])

        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader", Partition(topic, 0),
                           messages[0].next_offset), ),
            ).result(),
        )

        assert synchronized_consumer.poll(0) == messages[0]

        # If the commit log consumer does not handle EOF, it will have crashed
        # here and will never return the next message.
        wait_for_consumer(
            commit_log_consumer,
            producer.produce(
                commit_log_topic,
                commit_codec.encode(
                    Commit("leader", Partition(topic, 0),
                           messages[1].next_offset), ),
            ).result(),
        )

        assert synchronized_consumer.poll(0) == messages[1]
コード例 #7
0
ファイル: test_synchronized.py プロジェクト: ruezetle/snuba
def test_synchronized_consumer_handles_end_of_partition() -> None:
    topic = Topic("topic")
    commit_log_topic = Topic("commit-log")

    broker: DummyBroker[int] = DummyBroker()
    broker.create_topic(topic, partitions=1)
    consumer: Consumer[int] = DummyConsumer(broker, "consumer")
    producer: Producer[int] = DummyProducer(broker)
    messages = [producer.produce(topic, i).result(1.0) for i in range(2)]

    commit_log_broker: DummyBroker[Commit] = DummyBroker()
    commit_log_broker.create_topic(commit_log_topic, partitions=1)
    commit_log_consumer: Consumer[Commit] = DummyConsumer(
        commit_log_broker, "commit-log-consumer", enable_end_of_partition=True)
    commit_log_producer: Producer[Commit] = DummyProducer(commit_log_broker)

    synchronized_consumer: Consumer[int] = SynchronizedConsumer(
        consumer,
        commit_log_consumer,
        commit_log_topic=commit_log_topic,
        commit_log_groups={"leader"},
    )

    with closing(synchronized_consumer):
        synchronized_consumer.subscribe([topic])

        wait_for_consumer(
            commit_log_consumer,
            commit_log_producer.produce(
                commit_log_topic,
                Commit("leader", Partition(topic, 0),
                       messages[0].get_next_offset()),
            ).result(),
        )

        assert synchronized_consumer.poll(0) == messages[0]

        # If the commit log consumer does not handle EOF, it will have crashed
        # here and will never return the next message.
        wait_for_consumer(
            commit_log_consumer,
            commit_log_producer.produce(
                commit_log_topic,
                Commit("leader", Partition(topic, 0),
                       messages[1].get_next_offset()),
            ).result(),
        )

        assert synchronized_consumer.poll(0) == messages[1]
コード例 #8
0
def test_encoding_producer(broker: Broker[str]) -> None:
    topic = Topic("test")
    broker.create_topic(topic, 1)

    class ReverseEncoder(Encoder[str, str]):
        def encode(self, value: str) -> str:
            return "".join(value[::-1])

    producer = ProducerEncodingWrapper(broker.get_producer(), ReverseEncoder())
    decoded_message = producer.produce(topic, "hello").result()
    assert decoded_message.payload == "hello"

    consumer = broker.get_consumer("group")
    consumer.subscribe([topic])

    encoded_message = consumer.poll()
    assert encoded_message is not None

    # The payload returned by the consumer should not be decoded.
    assert encoded_message.payload == "olleh"

    # All other attributes should be the same.
    for attribute in set(Message.__slots__) - {"payload"}:
        assert getattr(encoded_message,
                       attribute) == getattr(decoded_message, attribute)
コード例 #9
0
ファイル: views.py プロジェクト: jiankunking/snuba
    def eventstream(*, dataset: Dataset):
        ensure_table_exists(dataset)
        record = json.loads(http_request.data)

        version = record[0]
        if version != 2:
            raise RuntimeError("Unsupported protocol version: %s" % record)

        message: Message[KafkaPayload] = Message(
            Partition(Topic("topic"), 0),
            0,
            KafkaPayload(None, http_request.data),
            datetime.now(),
        )

        type_ = record[1]
        metrics = DummyMetricsBackend()
        if type_ == "insert":
            from snuba.consumer import ConsumerWorker

            worker = ConsumerWorker(dataset, metrics=metrics)
        else:
            from snuba.replacer import ReplacerWorker

            worker = ReplacerWorker(clickhouse_rw, dataset, metrics=metrics)

        processed = worker.process_message(message)
        if processed is not None:
            batch = [processed]
            worker.flush_batch(batch)

        return ("ok", 200, {"Content-Type": "text/plain"})
コード例 #10
0
def test_stream_processor_termination_on_error() -> None:
    topic = Topic("test")

    consumer = mock.Mock()
    consumer.poll.return_value = Message(Partition(topic, 0), 0, 0,
                                         datetime.now())

    exception = NotImplementedError("error")

    strategy = mock.Mock()
    strategy.submit.side_effect = exception

    factory = mock.Mock()
    factory.create.return_value = strategy

    processor: StreamProcessor[int] = StreamProcessor(consumer, topic, factory,
                                                      TestingMetricsBackend())

    assignment_callback = consumer.subscribe.call_args.kwargs["on_assign"]
    assignment_callback({Partition(topic, 0): 0})

    with pytest.raises(Exception) as e, assert_changes(
            lambda: strategy.terminate.call_count, 0,
            1), assert_changes(lambda: consumer.close.call_count, 0, 1):
        processor.run()

    assert e.value == exception
コード例 #11
0
    def test_delete_groups_insert(self):
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.write_raw_events(self.event)

        assert self._issue_count(self.project_id) == [{"count": 1, "group_id": 1}]

        timestamp = datetime.now(tz=pytz.utc)

        project_id = self.project_id

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps(
                    (
                        2,
                        "end_delete_groups",
                        {
                            "project_id": project_id,
                            "group_ids": [1],
                            "datetime": timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                        },
                    )
                ).encode("utf-8"),
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        assert self._issue_count(self.project_id) == []
コード例 #12
0
def test_transform() -> None:
    next_step = Mock()

    def transform_function(message: Message[int]) -> int:
        return message.payload * 2

    transform_step = TransformStep(transform_function, next_step)

    original_message = Message(Partition(Topic("topic"), 0), 0, 1,
                               datetime.now())

    with assert_changes(lambda: next_step.submit.call_count, 0, 1):
        transform_step.submit(original_message)

    assert next_step.submit.call_args == call(
        Message(
            original_message.partition,
            original_message.offset,
            transform_function(original_message),
            original_message.timestamp,
        ))

    with assert_changes(lambda: next_step.poll.call_count, 0, 1):
        transform_step.poll()

    with assert_changes(lambda: next_step.close.call_count, 0,
                        1), assert_changes(lambda: next_step.join.call_count,
                                           0, 1):
        transform_step.join()
コード例 #13
0
    def test_batch_size(self, broker: Broker[int]) -> None:
        topic = Topic("topic")
        broker.create_topic(topic, partitions=1)
        producer = broker.get_producer()
        for i in [1, 2, 3]:
            producer.produce(topic, i).result()

        consumer = broker.get_consumer("group")

        worker = FakeWorker()
        batching_consumer = StreamProcessor(
            consumer,
            topic,
            BatchProcessingStrategyFactory(
                worker=worker,
                max_batch_size=2,
                max_batch_time=100,
                metrics=DummyMetricsBackend(strict=True),
            ),
        )

        for _ in range(3):
            batching_consumer._run_once()

        batching_consumer._shutdown()

        assert worker.processed == [1, 2, 3]
        assert worker.flushed == [[1, 2]]
        assert consumer.commit_offsets_calls == 1
        assert consumer.close_calls == 1
コード例 #14
0
    def test_send_message(
        self,
        value: str,
        expected: Optional[ProcessedMessage],
    ) -> None:
        storage = get_storage("groupedmessages")
        snapshot_id = uuid1()
        transact_data = TransactionData(xmin=100,
                                        xmax=200,
                                        xip_list=[120, 130])

        worker = SnapshotAwareWorker(
            storage=storage,
            producer=FakeConfluentKafkaProducer(),
            snapshot_id=str(snapshot_id),
            transaction_data=transact_data,
            replacements_topic=None,
            metrics=DummyMetricsBackend(strict=True),
        )

        message: Message[KafkaPayload] = Message(
            Partition(Topic("topic"), 0),
            1,
            KafkaPayload(
                None,
                value.encode("utf-8"),
                [("table", "sentry_groupedmessage".encode())],
            ),
            datetime.now(),
        )

        ret = worker.process_message(message)
        assert ret == expected
コード例 #15
0
 def _wrap(self, msg: str) -> Message[KafkaPayload]:
     return Message(
         Partition(Topic("replacements"), 0),
         0,
         KafkaPayload(None, json.dumps(msg).encode("utf-8")),
         datetime.now(),
     )
コード例 #16
0
    def test_produce_replacement_messages(self):
        producer = FakeConfluentKafkaProducer()
        test_worker = ConsumerWorker(
            self.dataset,
            producer=producer,
            replacements_topic=Topic(
                enforce_table_writer(self.dataset)
                .get_stream_loader()
                .get_replacement_topic_spec()
                .topic_name
            ),
            metrics=self.metrics,
        )

        test_worker.flush_batch(
            [
                ProcessedMessage(
                    action=ProcessorAction.REPLACE, data=[("1", {"project_id": 1})],
                ),
                ProcessedMessage(
                    action=ProcessorAction.REPLACE, data=[("2", {"project_id": 2})],
                ),
            ]
        )

        assert [(m._topic, m._key, m._value) for m in producer.messages] == [
            ("event-replacements", b"1", b'{"project_id": 1}'),
            ("event-replacements", b"2", b'{"project_id": 2}'),
        ]
コード例 #17
0
ファイル: kafka.py プロジェクト: fpacifici/snuba
    def __delivery_callback(
        self,
        future: Future[Message[KafkaPayload]],
        payload: KafkaPayload,
        error: KafkaError,
        message: ConfluentMessage,
    ) -> None:
        if error is not None:
            future.set_exception(TransportError(error))
        else:
            try:
                timestamp_type, timestamp_value = message.timestamp()
                if timestamp_type is TIMESTAMP_NOT_AVAILABLE:
                    raise ValueError("timestamp not available")

                future.set_result(
                    Message(
                        Partition(Topic(message.topic()), message.partition()),
                        message.offset(),
                        payload,
                        datetime.utcfromtimestamp(timestamp_value / 1000.0),
                    )
                )
            except Exception as error:
                future.set_exception(error)
コード例 #18
0
ファイル: test_groupedmessage.py プロジェクト: ruezetle/snuba
 def __make_msg(self, partition: int, offset: int, payload: str,
                headers: Headers) -> Message[KafkaPayload]:
     return Message(
         partition=Partition(Topic("topic"), partition),
         offset=offset,
         payload=KafkaPayload(b"key", payload.encode(), headers),
         timestamp=datetime(2019, 6, 19, 6, 46, 28),
     )
コード例 #19
0
ファイル: test_batching.py プロジェクト: isabella232/snuba
    def test_batch_time(self, mock_time: Any, broker: Broker[int]) -> None:
        topic = Topic("topic")
        broker.create_topic(topic, partitions=1)
        producer = broker.get_producer()
        consumer = broker.get_consumer("group")

        worker = FakeWorker()
        metrics = DummyMetricsBackend(strict=True)
        batching_consumer = StreamProcessor(
            consumer,
            topic,
            BatchProcessingStrategyFactory(
                worker=worker,
                max_batch_size=100,
                max_batch_time=2000,
                metrics=metrics,
            ),
            metrics=metrics,
        )

        mock_time.return_value = time.mktime(
            datetime(2018, 1, 1, 0, 0, 0).timetuple())

        for i in [1, 2, 3]:
            producer.produce(topic, i).result()

        for _ in range(3):
            batching_consumer._run_once()

        mock_time.return_value = time.mktime(
            datetime(2018, 1, 1, 0, 0, 1).timetuple())

        for i in [4, 5, 6]:
            producer.produce(topic, i).result()

        for _ in range(3):
            batching_consumer._run_once()

        mock_time.return_value = time.mktime(
            datetime(2018, 1, 1, 0, 0, 5).timetuple())

        for i in [7, 8, 9]:
            producer.produce(topic, i).result()

        for _ in range(3):
            batching_consumer._run_once()

        batching_consumer._shutdown()

        assert worker.processed == [1, 2, 3, 4, 5, 6, 7, 8, 9]
        assert worker.flushed == [[1, 2, 3, 4, 5, 6]]
        assert consumer.commit_offsets_calls == 1
        assert consumer.close_calls == 1
コード例 #20
0
    def test_unaligned_offset(self) -> None:
        topic = Topic(uuid.uuid1().hex)
        partition = Partition(topic, 0)
        self.storage.create_topic(topic, 1)

        message = self.storage.produce(partition, 1, datetime.now())

        invalid_offset = message.offset + 4
        assert message.next_offset > invalid_offset > message.offset

        with pytest.raises(InvalidChecksum):
            self.storage.consume(partition, invalid_offset)
コード例 #21
0
    def test_storage(self) -> None:
        topic = Topic(uuid.uuid1().hex)
        partitions = 3

        self.storage.create_topic(topic, partitions)

        assert [*self.storage.list_topics()] == [topic]

        assert self.storage.get_partition_count(topic) == partitions

        with pytest.raises(TopicExists):
            self.storage.create_topic(topic, partitions)

        with pytest.raises(TopicDoesNotExist):
            self.storage.get_partition_count(Topic("invalid"))

        with pytest.raises(TopicDoesNotExist):
            self.storage.consume(Partition(Topic("invalid"), 0), 0)

        with pytest.raises(TopicDoesNotExist):
            self.storage.produce(Partition(Topic("invalid"), 0), 0,
                                 datetime.now())

        with pytest.raises(PartitionDoesNotExist):
            self.storage.consume(Partition(topic, -1), 0)

        with pytest.raises(PartitionDoesNotExist):
            self.storage.consume(Partition(topic, partitions + 1), 0)

        with pytest.raises(PartitionDoesNotExist):
            self.storage.produce(Partition(topic, -1), 0, datetime.now())

        with pytest.raises(PartitionDoesNotExist):
            self.storage.produce(Partition(topic, partitions + 1), 0,
                                 datetime.now())

        self.storage.delete_topic(topic)

        with pytest.raises(TopicDoesNotExist):
            self.storage.delete_topic(topic)
コード例 #22
0
def test_parallel_transform_worker_apply() -> None:
    messages = [
        Message(
            Partition(Topic("test"), 0),
            i,
            KafkaPayload(None, b"\x00" * size, None),
            datetime.now(),
        ) for i, size in enumerate([1000, 1000, 2000, 4000])
    ]

    with SharedMemoryManager() as smm:
        input_block = smm.SharedMemory(8192)
        assert input_block.size == 8192

        input_batch = MessageBatch(input_block)
        for message in messages:
            input_batch.append(message)

        assert len(input_batch) == 4

        output_block = smm.SharedMemory(4096)
        assert output_block.size == 4096

        index, output_batch = parallel_transform_worker_apply(
            transform_payload_expand,
            input_batch,
            output_block,
        )

        # The first batch should be able to fit 2 messages.
        assert index == 2
        assert len(output_batch) == 2

        index, output_batch = parallel_transform_worker_apply(
            transform_payload_expand,
            input_batch,
            output_block,
            index,
        )

        # The second batch should be able to fit one message.
        assert index == 3
        assert len(output_batch) == 1

        # The last message is too large to fit in the batch.
        with pytest.raises(ValueTooLarge):
            parallel_transform_worker_apply(
                transform_payload_expand,
                input_batch,
                output_block,
                index,
            )
コード例 #23
0
ファイル: kafka.py プロジェクト: ruezetle/snuba
    def decode(self, value: KafkaPayload) -> Commit:
        key = value.key
        if not isinstance(key, bytes):
            raise TypeError("payload key must be a bytes object")

        val = value.value
        if not isinstance(val, bytes):
            raise TypeError("payload value must be a bytes object")

        topic_name, partition_index, group = key.decode("utf-8").split(":", 3)
        offset = int(val.decode("utf-8"))
        return Commit(group, Partition(Topic(topic_name),
                                       int(partition_index)), offset)
コード例 #24
0
    def test_delete_tag_promoted_insert(self):
        self.event["project_id"] = self.project_id
        self.event["group_id"] = 1
        self.event["data"]["tags"].append(["browser.name", "foo"])
        self.event["data"]["tags"].append(["notbrowser", "foo"])
        self.write_raw_events(self.event)

        project_id = self.project_id

        def _issue_count(total=False):
            return json.loads(
                self.app.post(
                    "/query",
                    data=json.dumps({
                        "project": [project_id],
                        "aggregations": [["count()", "", "count"]],
                        "conditions": [["tags[browser.name]", "=", "foo"]]
                        if not total else [],
                        "groupby": ["group_id"],
                    }),
                ).data)["data"]

        assert _issue_count() == [{"count": 1, "group_id": 1}]
        assert _issue_count(total=True) == [{"count": 1, "group_id": 1}]

        timestamp = datetime.now(tz=pytz.utc)

        message: Message[KafkaPayload] = Message(
            Partition(Topic("replacements"), 1),
            42,
            KafkaPayload(
                None,
                json.dumps((
                    2,
                    "end_delete_tag",
                    {
                        "project_id": project_id,
                        "tag": "browser.name",
                        "datetime":
                        timestamp.strftime(PAYLOAD_DATETIME_FORMAT),
                    },
                )).encode("utf-8"),
            ),
            datetime.now(),
        )

        processed = self.replacer.process_message(message)
        self.replacer.flush_batch([processed])

        assert _issue_count() == []
        assert _issue_count(total=True) == [{"count": 1, "group_id": 1}]
コード例 #25
0
 def get_topic(self, partitions: int = 1) -> Iterator[Topic]:
     name = f"test-{uuid.uuid1().hex}"
     client = AdminClient(self.configuration)
     [[key, future]] = client.create_topics(
         [NewTopic(name, num_partitions=partitions,
                   replication_factor=1)]).items()
     assert key == name
     assert future.result() is None
     try:
         yield Topic(name)
     finally:
         [[key, future]] = client.delete_topics([name]).items()
         assert key == name
         assert future.result() is None
コード例 #26
0
def get_messages(events_file) -> Sequence[Message[KafkaPayload]]:
    "Create a fake Kafka message for each JSON event in the file."
    messages: MutableSequence[Message[KafkaPayload]] = []
    raw_events = open(events_file).readlines()
    for raw_event in raw_events:
        messages.append(
            Message(
                Partition(Topic("events"), 1),
                0,
                KafkaPayload(None, raw_event.encode("utf-8")),
                datetime.now(),
            ),
        )
    return messages
コード例 #27
0
ファイル: test_kafka.py プロジェクト: ruezetle/snuba
    def test_commit_log_consumer(self) -> None:
        # XXX: This would be better as an integration test (or at least a test
        # against an abstract Producer interface) instead of against a test against
        # a mock.
        commit_log_producer = FakeConfluentKafkaProducer()

        consumer: KafkaConsumer[int] = KafkaConsumerWithCommitLog(
            {
                **self.configuration,
                "auto.offset.reset": "earliest",
                "enable.auto.commit": "false",
                "enable.auto.offset.store": "false",
                "enable.partition.eof": "true",
                "group.id": "test",
                "session.timeout.ms": 10000,
            },
            codec=self.codec,
            producer=commit_log_producer,
            commit_log_topic=Topic("commit-log"),
        )

        with self.get_topic() as topic, closing(consumer) as consumer:
            consumer.subscribe([topic])

            with closing(self.get_producer()) as producer:
                producer.produce(topic, 0).result(5.0)

            message = consumer.poll(
                10.0)  # XXX: getting the subscription is slow
            assert isinstance(message, Message)

            consumer.stage_offsets(
                {message.partition: message.get_next_offset()})

            assert consumer.commit_offsets() == {
                Partition(topic, 0): message.get_next_offset()
            }

            assert len(commit_log_producer.messages) == 1
            commit_message = commit_log_producer.messages[0]
            assert commit_message.topic() == "commit-log"

            assert CommitCodec().decode(
                KafkaPayload(commit_message.key(),
                             commit_message.value())) == Commit(
                                 "test", Partition(topic, 0),
                                 message.get_next_offset())
コード例 #28
0
ファイル: kafka.py プロジェクト: fpacifici/snuba
        def assignment_callback(
            consumer: ConfluentConsumer, partitions: Sequence[ConfluentTopicPartition]
        ) -> None:
            self.__state = KafkaConsumerState.ASSIGNING

            try:
                assignment: MutableSequence[ConfluentTopicPartition] = []

                for partition in self.__consumer.committed(partitions):
                    if partition.offset >= 0:
                        assignment.append(partition)
                    elif partition.offset == OFFSET_INVALID:
                        assignment.append(
                            self.__resolve_partition_starting_offset(partition)
                        )
                    else:
                        raise ValueError("received unexpected offset")

                offsets: MutableMapping[Partition, int] = {
                    Partition(Topic(i.topic), i.partition): i.offset for i in assignment
                }
                self.__seek(offsets)

                # Ensure that all partitions are resumed on assignment to avoid
                # carrying over state from a previous assignment.
                self.__consumer.resume(
                    [
                        ConfluentTopicPartition(
                            partition.topic.name, partition.index, offset
                        )
                        for partition, offset in offsets.items()
                    ]
                )

                for partition in offsets:
                    self.__paused.discard(partition)
            except Exception:
                self.__state = KafkaConsumerState.ERROR
                raise

            try:
                if on_assign is not None:
                    on_assign(offsets)
            finally:
                self.__state = KafkaConsumerState.CONSUMING
コード例 #29
0
def test_collect() -> None:
    step_factory = Mock()
    step_factory.return_value = inner_step = Mock()

    commit_function = Mock()
    partition = Partition(Topic("topic"), 0)
    messages = message_generator(partition, 0)

    collect_step = CollectStep(step_factory, commit_function, 2, 60)

    # A batch should be started the first time the step receives a message.
    with assert_changes(lambda: step_factory.call_count, 0, 1):
        collect_step.poll()
        collect_step.submit(next(messages))  # offset 0

    # Subsequent messages should reuse the existing batch, ...
    with assert_does_not_change(lambda: step_factory.call_count, 1):
        collect_step.poll()
        collect_step.submit(next(messages))  # offset 1

    # ...until we hit the batch size limit.
    with assert_changes(lambda: inner_step.close.call_count,
                        0, 1), assert_changes(
                            lambda: inner_step.join.call_count, 0,
                            1), assert_changes(
                                lambda: commit_function.call_count, 0, 1):
        collect_step.poll()
        assert commit_function.call_args == call({partition: 2})

    step_factory.return_value = inner_step = Mock()

    # The next message should create a new batch.
    with assert_changes(lambda: step_factory.call_count, 1, 2):
        collect_step.submit(next(messages))

    with assert_changes(lambda: inner_step.close.call_count, 0, 1):
        collect_step.close()

    with assert_changes(lambda: inner_step.join.call_count, 0,
                        1), assert_changes(lambda: commit_function.call_count,
                                           1, 2):
        collect_step.join()
コード例 #30
0
def test_message_batch() -> None:
    partition = Partition(Topic("test"), 0)

    with SharedMemoryManager() as smm:
        block = smm.SharedMemory(4096)
        assert block.size == 4096

        message = Message(partition, 0, KafkaPayload(None, b"\x00" * 4000,
                                                     None), datetime.now())

        batch: MessageBatch[KafkaPayload] = MessageBatch(block)
        with assert_changes(lambda: len(batch), 0, 1):
            batch.append(message)

        assert batch[0] == message
        assert list(batch) == [message]

        with assert_does_not_change(lambda: len(batch),
                                    1), pytest.raises(ValueTooLarge):
            batch.append(message)