def test_table_name_filter() -> None: table_name = "table_name" message_filter = CdcTableNameMessageFilter(table_name) # Messages that math the table should not be dropped. assert not message_filter.should_drop( Message( Partition(Topic("topic"), 0), 0, KafkaPayload(None, b"", [("table", table_name.encode("utf8"))]), datetime.now(), )) # Messages without a table should be dropped. assert message_filter.should_drop( Message( Partition(Topic("topic"), 0), 0, KafkaPayload(None, b"", []), datetime.now(), )) # Messages from a different table should be dropped. assert message_filter.should_drop( Message( Partition(Topic("topic"), 0), 0, KafkaPayload(None, b"", [("table", b"other_table")]), datetime.now(), ))
def submit( self, message: Message[Sequence[Tuple[StorageKey, Union[None, BytesInsertBatch, ReplacementBatch]]]], ) -> None: assert not self.__closed for storage_key, payload in message.payload: writer_message = Message( message.partition, message.offset, payload, message.timestamp, ) self.__steps[storage_key].submit(writer_message) # we collect the messages in self.__messages in the off chance # that we get an error submitting a batch and need to forward # these message to the dead letter topic. The payload doesn't # have storage information so we need to keep the storage_key other_message = Message( message.partition, message.offset, (storage_key, payload), message.timestamp, ) self.__messages[storage_key].append(other_message)
def test_multiple_partitions(self) -> None: """ Different partitions should have independent offset checks. """ set_config("skip_seen_offsets", True) self.event["project_id"] = self.project_id self.event["group_id"] = 1 self.event["primary_hash"] = "a" * 32 write_unprocessed_events(self.storage, [self.event]) payload = KafkaPayload( None, json.dumps(( 2, ReplacementType.END_UNMERGE, { "project_id": self.project_id, "previous_group_id": 1, "new_group_id": 2, "hashes": ["a" * 32], "datetime": datetime.utcnow().strftime(PAYLOAD_DATETIME_FORMAT), }, )).encode("utf-8"), [], ) offset = 42 timestamp = datetime.now() partition_one: Message[KafkaPayload] = Message( Partition(Topic("replacements"), 1), offset, payload, timestamp, ) partition_two: Message[KafkaPayload] = Message( Partition(Topic("replacements"), 2), offset, payload, timestamp, ) processed = self.replacer.process_message(partition_one) self.replacer.flush_batch([processed]) # different partition should be unaffected even if it's the same offset assert self.replacer.process_message(partition_two) is not None
def join(self, timeout: Optional[float] = None) -> None: start = time.time() while self.__queue: remaining = timeout - (time.time() - start) if timeout is not None else None if remaining is not None and remaining <= 0: logger.warning( f"Timed out with {len(self.__queue)} futures in queue") break message, result_future = self.__queue.popleft() subscription_task_result = SubscriptionTaskResult( result_future.task, result_future.future.result(remaining)) self.__next_step.submit( Message( message.partition, message.offset, subscription_task_result, message.timestamp, )) remaining = timeout - (time.time() - start) if timeout is not None else None self.__executor.shutdown() self.__next_step.close() self.__next_step.join(remaining)
def test_delete_tag_promoted_insert(self) -> None: self.event["project_id"] = self.project_id self.event["group_id"] = 1 self.event["data"]["tags"].append(["browser.name", "foo"]) self.event["data"]["tags"].append(["notbrowser", "foo"]) write_unprocessed_events(self.storage, [self.event]) project_id = self.project_id def _issue_count(total: bool = False) -> Sequence[Mapping[str, Any]]: clickhouse = self.storage.get_cluster().get_query_connection( ClickhouseClientSettings.QUERY) total_cond = ( "AND has(_tags_hash_map, cityHash64('browser.name=foo'))" if not total else "") data = clickhouse.execute(f""" SELECT group_id, count() FROM errors_local FINAL WHERE deleted = 0 AND project_id = {project_id} {total_cond} GROUP BY group_id """).results return [{"group_id": row[0], "count": row[1]} for row in data] assert _issue_count() == [{"count": 1, "group_id": 1}] assert _issue_count(total=True) == [{"count": 1, "group_id": 1}] timestamp = datetime.now(tz=pytz.utc) message: Message[KafkaPayload] = Message( Partition(Topic("replacements"), 1), 42, KafkaPayload( None, json.dumps(( 2, ReplacementType.END_DELETE_TAG, { "project_id": project_id, "tag": "browser.name", "datetime": timestamp.strftime(PAYLOAD_DATETIME_FORMAT), }, )).encode("utf-8"), [], ), datetime.now(), ) processed = self.replacer.process_message(message) assert processed is not None self.replacer.flush_batch([processed]) assert _issue_count() == [] assert _issue_count(total=True) == [{"count": 1, "group_id": 1}]
def _wrap(self, msg: Tuple[Any, ...]) -> Message[KafkaPayload]: return Message( Partition(Topic("replacements"), 0), 0, KafkaPayload(None, json.dumps(msg).encode("utf-8"), []), datetime.now(), )
def test_offset_already_processed(self) -> None: """ Don't process an offset that already exists in Redis. """ set_config("skip_seen_offsets", True) self.event["project_id"] = self.project_id self.event["group_id"] = 1 self.event["primary_hash"] = "a" * 32 write_unprocessed_events(self.storage, [self.event]) key = f"replacement:{CONSUMER_GROUP}:errors:1" redis_client.set(key, 42) old_offset: Message[KafkaPayload] = Message( Partition(Topic("replacements"), 1), 41, KafkaPayload( None, json.dumps(( 2, ReplacementType.END_UNMERGE, {}, )).encode("utf-8"), [], ), datetime.now(), ) same_offset: Message[KafkaPayload] = Message( Partition(Topic("replacements"), 1), 42, KafkaPayload( None, json.dumps(( 2, ReplacementType.END_UNMERGE, {}, )).encode("utf-8"), [], ), datetime.now(), ) assert self.replacer.process_message(old_offset) is None assert self.replacer.process_message(same_offset) is None
def test_multistorage_strategy( processes: Optional[int], input_block_size: Optional[int], output_block_size: Optional[int], ) -> None: from snuba.datasets.storages import groupassignees, groupedmessages from tests.datasets.cdc.test_groupassignee import TestGroupassignee from tests.datasets.cdc.test_groupedmessage import TestGroupedMessage commit = Mock() storages = [groupassignees.storage, groupedmessages.storage] strategy = MultistorageConsumerProcessingStrategyFactory( storages, 10, 10, processes, input_block_size, output_block_size, TestingMetricsBackend(), ).create(commit) payloads = [ KafkaPayload(None, b"{}", [("table", b"ignored")]), KafkaPayload( None, json.dumps(TestGroupassignee.INSERT_MSG).encode("utf8"), [("table", groupassignees.storage.get_postgres_table().encode("utf8"))], ), KafkaPayload( None, json.dumps(TestGroupedMessage.INSERT_MSG).encode("utf8"), [("table", groupedmessages.storage.get_postgres_table().encode("utf8"))], ), ] messages = [ Message( Partition(Topic("topic"), 0), offset, payload, datetime.now(), offset + 1 ) for offset, payload in enumerate(payloads) ] with assert_changes( lambda: get_row_count(groupassignees.storage), 0, 1 ), assert_changes(lambda: get_row_count(groupedmessages.storage), 0, 1): for message in messages: strategy.submit(message) with assert_changes( lambda: commit.call_args_list, [], [call({Partition(Topic("topic"), 0): 3})] ): strategy.close() strategy.join()
def poll(self, timeout: Optional[float] = None) -> Optional[Message[Tick]]: message = self.__consumer.poll(timeout) if message is None: return None try: commit = commit_codec.decode(message.payload) assert commit.orig_message_ts is not None except Exception: logger.error( f"Error decoding commit log message for followed group: {self.__followed_consumer_group}.", extra={ "payload": str(message.payload), "offset": message.offset }, exc_info=True, ) return None if commit.group != self.__followed_consumer_group: return None previous_message = self.__previous_messages.get(commit.partition) result: Optional[Message[Tick]] if previous_message is not None: try: time_interval = Interval(previous_message.orig_message_ts, commit.orig_message_ts) except InvalidRangeError: logger.warning( "Could not construct valid time interval between %r and %r!", previous_message, MessageDetails(commit.offset, commit.orig_message_ts), exc_info=True, ) return None else: result = Message( message.partition, message.offset, Tick( commit.partition.index, Interval(previous_message.offset, commit.offset), time_interval, ).time_shift(self.__time_shift), message.timestamp, ) else: result = None self.__previous_messages[commit.partition] = MessageDetails( commit.offset, commit.orig_message_ts) return result
def test_subscription_worker_consistent( subscription_data: SubscriptionData) -> None: state.set_config("event_subscription_non_consistent_sample_rate", 1) broker: Broker[SubscriptionTaskResult] = Broker(MemoryMessageStorage(), TestingClock()) result_topic = Topic("subscription-results") broker.create_topic(result_topic, partitions=1) frequency = timedelta(minutes=1) evaluations = 1 subscription = Subscription( SubscriptionIdentifier(PartitionId(0), uuid1()), subscription_data, ) store = DummySubscriptionDataStore() store.create(subscription.identifier.uuid, subscription.data) metrics = TestingMetricsBackend() dataset = get_dataset("events") worker = SubscriptionWorker( dataset, ThreadPoolExecutor(), { 0: SubscriptionScheduler(store, PartitionId(0), timedelta(), DummyMetricsBackend(strict=True)) }, broker.get_producer(), result_topic, metrics, ) now = datetime(2000, 1, 1) tick = Tick( offsets=Interval(0, 1), timestamps=Interval(now - (frequency * evaluations), now), ) worker.process_message(Message(Partition(Topic("events"), 0), 0, tick, now)) time.sleep(0.1) assert (len([ m for m in metrics.calls if isinstance(m, Increment) and m.name == "consistent" ]) == 1)
def eventstream(*, dataset: Dataset) -> RespTuple: record = json.loads(http_request.data) version = record[0] if version != 2: raise RuntimeError("Unsupported protocol version: %s" % record) message: Message[KafkaPayload] = Message( Partition(Topic("topic"), 0), 0, KafkaPayload(None, http_request.data, []), datetime.now(), ) type_ = record[1] storage = dataset.get_default_entity().get_writable_storage() assert storage is not None if type_ == "insert": from arroyo.processing.strategies.streaming import ( KafkaConsumerStrategyFactory, ) from snuba.consumers.consumer import build_batch_writer, process_message table_writer = storage.get_table_writer() stream_loader = table_writer.get_stream_loader() strategy = KafkaConsumerStrategyFactory( stream_loader.get_pre_filter(), functools.partial( process_message, stream_loader.get_processor(), "consumer_grouup" ), build_batch_writer(table_writer, metrics=metrics), max_batch_size=1, max_batch_time=1.0, processes=None, input_block_size=None, output_block_size=None, ).create(lambda offsets: None) strategy.submit(message) strategy.close() strategy.join() else: from snuba.replacer import ReplacerWorker worker = ReplacerWorker(storage, "consumer_group", metrics=metrics) processed = worker.process_message(message) if processed is not None: batch = [processed] worker.flush_batch(batch) return ("ok", 200, {"Content-Type": "text/plain"})
def set_decoded_future_result( encoded_future: Future[Message[TEncoded]], ) -> None: try: message = encoded_future.result() except Exception as e: decoded_future.set_exception(e) else: decoded_future.set_result( Message( message.partition, message.offset, payload, message.timestamp, ))
def test_skip_kafka_message(self) -> None: state.set_config("kafka_messages_to_skip", "[snuba-test-lol:1:2,snuba-test-yeet:0:1]") assert skip_kafka_message( Message( Partition(Topic("snuba-test-lol"), 1), 2, KafkaPayload(None, b"", []), datetime.now(), )) assert skip_kafka_message( Message( Partition(Topic("snuba-test-yeet"), 0), 1, KafkaPayload(None, b"", []), datetime.now(), )) assert not skip_kafka_message( Message( Partition(Topic("snuba-test-lol"), 2), 1, KafkaPayload(None, b"", []), datetime.now(), ))
def test_kafka_filter_header_with_bypass() -> None: header_filter = KafkaHeaderFilterWithBypass("should_drop", "1", 5) message = Message( Partition(Topic("random"), 1), 1, KafkaPayload(b"key", b"value", [("should_drop", b"1")]), datetime.now(), ) for _ in range(3): assert header_filter.should_drop(message) is True assert header_filter.should_drop(message) is True assert header_filter.should_drop(message) is True assert header_filter.should_drop(message) is True assert header_filter.should_drop(message) is False
def test_unmerge_hierarchical_insert(self) -> None: self.event["project_id"] = self.project_id self.event["group_id"] = 1 self.event["primary_hash"] = "b" * 32 self.event["data"]["hierarchical_hashes"] = ["a" * 32] write_unprocessed_events(self.storage, [self.event]) assert self._issue_count(self.project_id) == [{ "count": 1, "group_id": 1 }] timestamp = datetime.now(tz=pytz.utc) project_id = self.project_id message: Message[KafkaPayload] = Message( Partition(Topic("replacements"), 1), 42, KafkaPayload( None, json.dumps(( 2, ReplacementType.END_UNMERGE_HIERARCHICAL, { "project_id": project_id, "previous_group_id": 1, "new_group_id": 2, "hierarchical_hash": "a" * 32, "primary_hash": "b" * 32, "datetime": timestamp.strftime(PAYLOAD_DATETIME_FORMAT), }, )).encode("utf-8"), [], ), datetime.now(), ) processed = self.replacer.process_message(message) assert processed is not None self.replacer.flush_batch([processed]) assert self._issue_count(self.project_id) == [{ "count": 1, "group_id": 2 }]
def poll(self) -> None: while self.__queue: if not self.__queue[0][1].future.done(): break message, result_future = self.__queue.popleft() self.__next_step.submit( Message( message.partition, message.offset, SubscriptionTaskResult(result_future.task, result_future.future.result()), message.timestamp, )) self.__next_step.poll()
def submit( self, message: Message[Sequence[Tuple[StorageKey, Union[None, JSONRowInsertBatch, ReplacementBatch]]]], ) -> None: assert not self.__closed for storage_key, payload in message.payload: self.__steps[storage_key].submit( Message( message.partition, message.offset, payload, message.timestamp, message.next_offset, ))
def submit(self, message: Message[Tick]) -> None: assert not self.__closed # Update self.__offset_high_watermark self.__update_offset_high_watermark(message) should_commit = self.__should_commit(message) offset_to_commit = self.__offset_high_watermark if should_commit else None self.__next_step.submit( Message( message.partition, message.offset, CommittableTick(message.payload, offset_to_commit), message.timestamp, )) if should_commit: self.__offset_low_watermark = self.__offset_high_watermark
def test_unmerge_insert(self) -> None: self.event["project_id"] = self.project_id self.event["group_id"] = 1 self.event["primary_hash"] = "a" * 32 write_unprocessed_events(self.storage, [self.event]) assert self._issue_count(self.project_id) == [{ "count": 1, "group_id": 1 }] timestamp = datetime.now(tz=pytz.utc) project_id = self.project_id message: Message[KafkaPayload] = Message( Partition(Topic("replacements"), 1), 42, KafkaPayload( None, json.dumps(( 2, "end_unmerge", { "project_id": project_id, "previous_group_id": 1, "new_group_id": 2, "hashes": ["a" * 32], "datetime": timestamp.strftime(PAYLOAD_DATETIME_FORMAT), }, )).encode("utf-8"), [], ), datetime.now(), ) processed = self.replacer.process_message(message) self.replacer.flush_batch([processed]) assert self._issue_count(self.project_id) == [{ "count": 1, "group_id": 2 }]
def poll(self, timeout: Optional[float] = None) -> Optional[Message[Tick]]: message = self.__consumer.poll(timeout) if message is None: return None previous_message = self.__previous_messages.get(message.partition) result: Optional[Message[Tick]] if previous_message is not None: try: time_interval = Interval(previous_message.timestamp, message.timestamp) if (self.__min_interval is not None and time_interval.upper - time_interval.lower < self.__min_interval): return None except InvalidRangeError: logger.warning( "Could not construct valid time interval between %r and %r!", previous_message, message, exc_info=True, ) return None else: result = Message( message.partition, previous_message.offset, Tick( None, Interval(previous_message.offset, message.offset), time_interval, ).time_shift(self.__time_shift), message.timestamp, ) else: result = None self.__previous_messages[message.partition] = MessageDetails( message.offset, message.timestamp) return result
def test_delete_groups_insert(self) -> None: self.event["project_id"] = self.project_id self.event["group_id"] = 1 write_unprocessed_events(self.storage, [self.event]) assert self._issue_count(self.project_id) == [{ "count": 1, "group_id": 1 }] timestamp = datetime.utcnow() project_id = self.project_id message: Message[KafkaPayload] = Message( Partition(Topic("replacements"), 1), 42, KafkaPayload( None, json.dumps(( 2, ReplacementType.END_DELETE_GROUPS, { "project_id": project_id, "group_ids": [1], "datetime": timestamp.strftime(PAYLOAD_DATETIME_FORMAT), }, )).encode("utf-8"), [], ), datetime.now(), ) processed = self.replacer.process_message(message) self.replacer.flush_batch([processed]) assert self._issue_count(self.project_id) == [] # Count is still zero after Redis flushed and parts merged self._clear_redis_and_force_merge() assert self._issue_count(self.project_id) == []
def generate_message( entity_key: EntityKey, subscription_identifier: Optional[SubscriptionIdentifier] = None, ) -> Iterator[Message[KafkaPayload]]: codec = SubscriptionScheduledTaskEncoder() epoch = datetime(1970, 1, 1) i = 0 if subscription_identifier is None: subscription_identifier = SubscriptionIdentifier( PartitionId(1), uuid.uuid1()) data_dict = {} if entity_key in (EntityKey.METRICS_SETS, EntityKey.METRICS_COUNTERS): data_dict = {"organization": 1} entity_subscription = ENTITY_KEY_TO_SUBSCRIPTION_MAPPER[entity_key]( data_dict=data_dict) while True: payload = codec.encode( ScheduledSubscriptionTask( epoch + timedelta(minutes=i), SubscriptionWithMetadata( entity_key, Subscription( subscription_identifier, SubscriptionData( project_id=1, time_window_sec=60, resolution_sec=60, query=f"MATCH ({entity_key.value}) SELECT count()", entity_subscription=entity_subscription, ), ), i + 1, ), )) yield Message(Partition(Topic("test"), 0), i, payload, epoch) i += 1
def test_reset_consumer_group_offset_check(self) -> None: set_config("skip_seen_offsets", True) self.event["project_id"] = self.project_id self.event["group_id"] = 1 self.event["primary_hash"] = "a" * 32 write_unprocessed_events(self.storage, [self.event]) message: Message[KafkaPayload] = Message( Partition(Topic("replacements"), 1), 42, KafkaPayload( None, json.dumps(( 2, ReplacementType.END_UNMERGE, { "project_id": self.project_id, "previous_group_id": 1, "new_group_id": 2, "hashes": ["a" * 32], "datetime": datetime.utcnow().strftime(PAYLOAD_DATETIME_FORMAT), }, )).encode("utf-8"), [], ), datetime.now(), ) self.replacer.flush_batch([self.replacer.process_message(message)]) set_config(replacer.RESET_CHECK_CONFIG, f"[{CONSUMER_GROUP}]") # Offset to check against should be reset so this message shouldn't be skipped assert self.replacer.process_message(message) is not None
def test_process_offset_twice(self) -> None: set_config("skip_seen_offsets", True) self.event["project_id"] = self.project_id self.event["group_id"] = 1 self.event["primary_hash"] = "a" * 32 write_unprocessed_events(self.storage, [self.event]) message: Message[KafkaPayload] = Message( Partition(Topic("replacements"), 1), 42, KafkaPayload( None, json.dumps(( 2, ReplacementType.END_UNMERGE, { "project_id": self.project_id, "previous_group_id": 1, "new_group_id": 2, "hashes": ["a" * 32], "datetime": datetime.utcnow().strftime(PAYLOAD_DATETIME_FORMAT), }, )).encode("utf-8"), [], ), datetime.now(), ) processed = self.replacer.process_message(message) self.replacer.flush_batch([processed]) # should be None since the offset should be in Redis, indicating it should be skipped assert self.replacer.process_message(message) is None
def test_reprocessing_flow_insert(self) -> None: # We have a group that contains two events, 1 and 2. self.event["project_id"] = self.project_id self.event["group_id"] = 1 self.event["event_id"] = event_id = "00e24a150d7f4ee4b142b61b4d893b6d" write_unprocessed_events(self.storage, [self.event]) self.event["event_id"] = event_id2 = "00e24a150d7f4ee4b142b61b4d893b6e" write_unprocessed_events(self.storage, [self.event]) assert self._issue_count(self.project_id) == [{ "count": 2, "group_id": 1 }] project_id = self.project_id message: Message[KafkaPayload] = Message( Partition(Topic("replacements"), 1), 41, KafkaPayload( None, json.dumps(( 2, ReplacementType.TOMBSTONE_EVENTS, { "project_id": project_id, "event_ids": [event_id] }, )).encode("utf-8"), [], ), datetime.now(), ) # The user chooses to reprocess a subset of the group and throw away # the other events. Event 1 gets manually tombstoned by Sentry while # Event 2 prevails. processed = self.replacer.process_message(message) self.replacer.flush_batch([processed]) # At this point the count doesn't make any sense but we don't care. assert self._issue_count(self.project_id) == [{ "count": 2, "group_id": 1 }] # The reprocessed event is inserted with a guaranteed-new group ID but # the *same* event ID (this is why we need to skip tombstoning this # event ID) self.event["group_id"] = 2 write_unprocessed_events(self.storage, [self.event]) message: Message[KafkaPayload] = Message( Partition(Topic("replacements"), 1), 42, KafkaPayload( None, json.dumps(( 2, ReplacementType.EXCLUDE_GROUPS, { "project_id": project_id, "group_ids": [1] }, )).encode("utf-8"), [], ), datetime.now(), ) # Group 1 is excluded from queries. At this point we have almost a # regular group deletion, except only a subset of events have been # tombstoned (the ones that will *not* be reprocessed). processed = self.replacer.process_message(message) self.replacer.flush_batch([processed]) # Group 2 should contain the one event that the user chose to # reprocess, and Group 1 should be gone. (Note: In the product Group 2 # looks identical to Group 1, including short ID). assert self._issue_count(self.project_id) == [{ "count": 1, "group_id": 2 }] assert self._get_group_id(project_id, event_id2) == 2 assert not self._get_group_id(project_id, event_id)
def test_combined_scheduler_and_executor() -> None: state.set_config("subscription_mode_events", "new") create_subscription() epoch = datetime(1970, 1, 1) dataset = get_dataset("events") entity_names = ["events"] num_partitions = 2 max_concurrent_queries = 2 total_concurrent_queries = 2 metrics = TestingMetricsBackend() commit = mock.Mock() partitions = mock.Mock() topic = Topic("snuba-commit-log") partition = Partition(topic, 0) stale_threshold_seconds = None result_topic = "events-subscription-results" schedule_ttl = 60 producer = KafkaProducer( build_kafka_producer_configuration( SnubaTopic.SUBSCRIPTION_RESULTS_EVENTS)) with closing(producer): factory = CombinedSchedulerExecutorFactory( dataset, entity_names, num_partitions, max_concurrent_queries, total_concurrent_queries, producer, metrics, stale_threshold_seconds, result_topic, schedule_ttl, ) strategy = factory.create_with_partitions(commit, partitions) message = Message( partition, 4, Tick( 0, offsets=Interval(1, 3), timestamps=Interval(epoch, epoch + timedelta(seconds=60)), ), epoch, ) strategy.submit(message) # Wait for the query to be executed and the result message produced for i in range(10): time.sleep(0.5) strategy.poll() if commit.call_count == 1: break assert commit.call_count == 1 strategy.close() strategy.join()
def test_tick_consumer_non_monotonic() -> None: clock = TestingClock() broker: Broker[KafkaPayload] = Broker(MemoryMessageStorage(), clock) epoch = datetime.fromtimestamp(clock.time()) topic = Topic("messages") followed_consumer_group = "events" partition = Partition(topic, 0) broker.create_topic(topic, partitions=1) producer = broker.get_producer() inner_consumer = broker.get_consumer("group") consumer = CommitLogTickConsumer(inner_consumer, followed_consumer_group) def _assignment_callback(offsets: Mapping[Partition, int]) -> None: assert inner_consumer.tell() == {partition: 0} assert consumer.tell() == {partition: 0} assignment_callback = mock.Mock(side_effect=_assignment_callback) consumer.subscribe([topic], on_assign=assignment_callback) producer.produce( partition, commit_codec.encode( Commit(followed_consumer_group, partition, 0, epoch)), ).result() clock.sleep(1) producer.produce( partition, commit_codec.encode( Commit(followed_consumer_group, partition, 1, epoch + timedelta(seconds=1))), ).result() with assert_changes(lambda: assignment_callback.called, False, True): assert consumer.poll() is None assert consumer.tell() == {partition: 1} with assert_changes(consumer.tell, {partition: 1}, {partition: 2}): assert consumer.poll() == Message( partition, 1, Tick( 0, offsets=Interval(0, 1), timestamps=Interval(epoch, epoch + timedelta(seconds=1)), ), epoch + timedelta(seconds=1), ) clock.sleep(-1) producer.produce( partition, commit_codec.encode( Commit(followed_consumer_group, partition, 2, epoch)), ).result() with assert_changes(consumer.tell, {partition: 2}, {partition: 3}): assert consumer.poll() is None clock.sleep(2) producer.produce( partition, commit_codec.encode( Commit(followed_consumer_group, partition, 3, epoch + timedelta(seconds=2))), ).result() with assert_changes(consumer.tell, {partition: 3}, {partition: 4}): assert consumer.poll() == Message( partition, 3, Tick( 0, offsets=Interval(1, 3), timestamps=Interval(epoch + timedelta(seconds=1), epoch + timedelta(seconds=2)), ), epoch + timedelta(seconds=2), )
def test_tick_consumer(time_shift: Optional[timedelta]) -> None: clock = TestingClock() broker: Broker[KafkaPayload] = Broker(MemoryMessageStorage(), clock) epoch = datetime.fromtimestamp(clock.time()) topic = Topic("messages") followed_consumer_group = "events" broker.create_topic(topic, partitions=1) producer = broker.get_producer() for partition, offsets in enumerate([[0, 1, 2], [0]]): for offset in offsets: payload = commit_codec.encode( Commit(followed_consumer_group, Partition(topic, partition), offset, epoch)) producer.produce(Partition(topic, 0), payload).result() inner_consumer = broker.get_consumer("group") consumer = CommitLogTickConsumer(inner_consumer, followed_consumer_group, time_shift=time_shift) if time_shift is None: time_shift = timedelta() def _assignment_callback(offsets: Mapping[Partition, int]) -> None: assert consumer.tell() == { Partition(topic, 0): 0, } assignment_callback = mock.Mock(side_effect=_assignment_callback) consumer.subscribe([topic], on_assign=assignment_callback) with assert_changes(lambda: assignment_callback.called, False, True): # consume 0, 0 assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 1, } # consume 0, 1 assert consumer.poll() == Message( Partition(topic, 0), 1, Tick(0, offsets=Interval(0, 1), timestamps=Interval(epoch, epoch)).time_shift(time_shift), epoch, ) assert consumer.tell() == { Partition(topic, 0): 2, } # consume 0, 2 assert consumer.poll() == Message( Partition(topic, 0), 2, Tick(0, offsets=Interval(1, 2), timestamps=Interval(epoch, epoch)).time_shift(time_shift), epoch, ) assert consumer.tell() == { Partition(topic, 0): 3, } # consume 1, 0 assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 4, } # consume no message assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 4, } consumer.seek({Partition(topic, 0): 1}) assert consumer.tell() == { Partition(topic, 0): 1, } # consume 0, 1 assert consumer.poll() is None assert consumer.tell() == { Partition(topic, 0): 2, } # consume 0, 2 assert consumer.poll() == Message( Partition(topic, 0), 2, Tick(0, offsets=Interval(1, 2), timestamps=Interval(epoch, epoch)).time_shift(time_shift), epoch, ) assert consumer.tell() == { Partition(topic, 0): 3, } with pytest.raises(ConsumerError): consumer.seek({Partition(topic, -1): 0})
def test_subscription_worker(subscription_data: SubscriptionData) -> None: broker: Broker[SubscriptionTaskResult] = Broker(MemoryMessageStorage(), TestingClock()) result_topic = Topic("subscription-results") broker.create_topic(result_topic, partitions=1) frequency = timedelta(minutes=1) evaluations = 3 subscription = Subscription( SubscriptionIdentifier(PartitionId(0), uuid1()), subscription_data, ) store = DummySubscriptionDataStore() store.create(subscription.identifier.uuid, subscription.data) metrics = DummyMetricsBackend(strict=True) dataset = get_dataset("events") worker = SubscriptionWorker( dataset, ThreadPoolExecutor(), { 0: SubscriptionScheduler(store, PartitionId(0), timedelta(), metrics) }, broker.get_producer(), result_topic, metrics, ) now = datetime(2000, 1, 1) tick = Tick( offsets=Interval(0, 1), timestamps=Interval(now - (frequency * evaluations), now), ) result_futures = worker.process_message( Message(Partition(Topic("events"), 0), 0, tick, now)) assert result_futures is not None and len(result_futures) == evaluations # Publish the results. worker.flush_batch([result_futures]) # Check to make sure the results were published. # NOTE: This does not cover the ``SubscriptionTaskResultCodec``! consumer = broker.get_consumer("group") consumer.subscribe([result_topic]) for i in range(evaluations): timestamp = now - frequency * (evaluations - i) message = consumer.poll() assert message is not None assert message.partition.topic == result_topic task, future = result_futures[i] future_result = request, result = future.result() assert message.payload.task.timestamp == timestamp assert message.payload == SubscriptionTaskResult(task, future_result) # NOTE: The time series extension is folded back into the request # body, ideally this would reference the timeseries options in # isolation. from_pattern = FunctionCall( String(ConditionFunctions.GTE), ( Column(None, String("timestamp")), Literal(Datetime(timestamp - subscription.data.time_window)), ), ) to_pattern = FunctionCall( String(ConditionFunctions.LT), (Column(None, String("timestamp")), Literal(Datetime(timestamp))), ) condition = request.query.get_condition() assert condition is not None conditions = get_first_level_and_conditions(condition) assert any([from_pattern.match(e) for e in conditions]) assert any([to_pattern.match(e) for e in conditions]) assert result == { "meta": [{ "name": "count", "type": "UInt64" }], "data": [{ "count": 0 }], }
def test_tick_consumer_non_monotonic() -> None: clock = TestingClock() broker: Broker[int] = Broker(MemoryMessageStorage(), clock) epoch = datetime.fromtimestamp(clock.time()) topic = Topic("messages") partition = Partition(topic, 0) broker.create_topic(topic, partitions=1) producer = broker.get_producer() inner_consumer = broker.get_consumer("group") consumer = TickConsumer(inner_consumer) def _assignment_callback(offsets: Mapping[Partition, int]) -> None: assert inner_consumer.tell() == {partition: 0} assert consumer.tell() == {partition: 0} assignment_callback = mock.Mock(side_effect=_assignment_callback) consumer.subscribe([topic], on_assign=assignment_callback) producer.produce(partition, 0) clock.sleep(1) producer.produce(partition, 1) with assert_changes(lambda: assignment_callback.called, False, True): assert consumer.poll() is None assert inner_consumer.tell() == {partition: 1} assert consumer.tell() == {partition: 0} with assert_changes( inner_consumer.tell, {partition: 1}, {partition: 2} ), assert_changes(consumer.tell, {partition: 0}, {partition: 1}): assert consumer.poll() == Message( partition, 0, Tick( offsets=Interval(0, 1), timestamps=Interval(epoch, epoch + timedelta(seconds=1)), ), epoch + timedelta(seconds=1), ) clock.sleep(-1) producer.produce(partition, 2) with assert_changes( inner_consumer.tell, {partition: 2}, {partition: 3} ), assert_does_not_change(consumer.tell, {partition: 1}): assert consumer.poll() is None clock.sleep(2) producer.produce(partition, 3) with assert_changes( inner_consumer.tell, {partition: 3}, {partition: 4} ), assert_changes(consumer.tell, {partition: 1}, {partition: 3}): assert consumer.poll() == Message( partition, 1, Tick( offsets=Interval(1, 3), timestamps=Interval( epoch + timedelta(seconds=1), epoch + timedelta(seconds=2) ), ), epoch + timedelta(seconds=2), )