def _get_task_kwargs(message: Message) -> Optional[Mapping[str, Any]]: use_kafka_headers = options.get("post-process-forwarder:kafka-headers") if use_kafka_headers: try: with _sampled_eventstream_timer(instance="get_task_kwargs_for_message_from_headers"): return get_task_kwargs_for_message_from_headers(message.headers()) except Exception as error: logger.error("Could not forward message: %s", error, exc_info=True) with metrics.timer(_DURATION_METRIC, instance="get_task_kwargs_for_message"): return get_task_kwargs_for_message(message.value()) else: with metrics.timer(_DURATION_METRIC, instance="get_task_kwargs_for_message"): return get_task_kwargs_for_message(message.value())
def test_get_task_kwargs_for_message_version_1(): event_data = { "project_id": 1, "group_id": 2, "event_id": "00000000000010008080808080808080", "message": "message", "platform": "python", "datetime": "2018-07-20T21:04:27.600640Z", "data": { "logentry": {"formatted": "message"}, "platform": "python", "timestamp": 1532120667.60064, }, "extra": {}, "primary_hash": "49f68a5c8493ec2c0bf489821c21fc3b", } task_state = {"is_new": True, "is_regression": False, "is_new_group_environment": True} kwargs = get_task_kwargs_for_message(json.dumps([1, "insert", event_data, task_state])) event = kwargs.pop("event") assert event.project_id == 1 assert event.group_id == 2 assert event.event_id == "00000000000010008080808080808080" assert event.message == "message" assert event.platform == "python" assert event.datetime == datetime(2018, 7, 20, 21, 4, 27, 600640, tzinfo=pytz.utc) assert kwargs.pop("primary_hash") == "49f68a5c8493ec2c0bf489821c21fc3b" assert kwargs.pop("is_new") is True assert kwargs.pop("is_regression") is False assert kwargs.pop("is_new_group_environment") is True assert not kwargs, f"unexpected values remaining: {kwargs!r}"
def _get_task_kwargs_and_dispatch(self, message) -> None: with metrics.timer("eventstream.duration", instance="get_task_kwargs_for_message"): task_kwargs = get_task_kwargs_for_message(message.value()) if task_kwargs is not None: if task_kwargs["group_id"] is None: metrics.incr( "eventstream.messages", tags={ "partition": message.partition(), "type": "transactions" }, ) else: metrics.incr( "eventstream.messages", tags={ "partition": message.partition(), "type": "errors" }, ) with metrics.timer("eventstream.duration", instance="dispatch_post_process_group_task"): self._dispatch_post_process_group_task(**task_kwargs)
def test_get_task_kwargs_for_message_version_1(): event_data = { 'project_id': 1, 'group_id': 2, 'event_id': '00000000000010008080808080808080', 'message': 'message', 'platform': 'python', 'datetime': '2018-07-20T21:04:27.600640Z', 'data': {}, 'extra': {}, 'primary_hash': '49f68a5c8493ec2c0bf489821c21fc3b', } task_state = { 'is_new': True, 'is_sample': False, 'is_regression': False, 'is_new_group_environment': True, } kwargs = get_task_kwargs_for_message( json.dumps([1, 'insert', event_data, task_state])) event = kwargs.pop('event') assert event.project_id == 1 assert event.group_id == 2 assert event.event_id == '00000000000010008080808080808080' assert event.message == 'message' assert event.platform == 'python' assert event.datetime == datetime(2018, 7, 20, 21, 4, 27, 600640, tzinfo=pytz.utc) assert dict(event.data) == {} assert kwargs.pop('primary_hash') == '49f68a5c8493ec2c0bf489821c21fc3b' assert kwargs.pop('is_new') is True assert kwargs.pop('is_sample') is False assert kwargs.pop('is_regression') is False assert kwargs.pop('is_new_group_environment') is True assert not kwargs, 'unexpected values remaining: {!r}'.format(kwargs)
def run_post_process_forwarder(self, consumer_group, commit_log_topic, synchronize_commit_group, commit_batch_size=100, initial_offset_reset='latest'): logger.debug('Starting post-process forwarder...') cluster_name = settings.KAFKA_TOPICS[settings.KAFKA_EVENTS]['cluster'] bootstrap_servers = settings.KAFKA_CLUSTERS[cluster_name][ 'bootstrap.servers'] consumer = SynchronizedConsumer( bootstrap_servers=bootstrap_servers, consumer_group=consumer_group, commit_log_topic=commit_log_topic, synchronize_commit_group=synchronize_commit_group, initial_offset_reset=initial_offset_reset, ) owned_partition_offsets = {} def commit(partitions): results = consumer.commit(offsets=partitions, asynchronous=False) errors = filter(lambda i: i.error is not None, results) if errors: raise Exception('Failed to commit %s/%s partitions: %r' % (len(errors), len(partitions), errors)) return results def on_assign(consumer, partitions): logger.debug('Received partition assignment: %r', partitions) for i in partitions: if i.offset == OFFSET_INVALID: updated_offset = None elif i.offset < 0: raise Exception( 'Received unexpected negative offset during partition assignment: %r' % (i, )) else: updated_offset = i.offset key = (i.topic, i.partition) previous_offset = owned_partition_offsets.get(key, None) if previous_offset is not None and previous_offset != updated_offset: logger.warning( 'Received new offset for owned partition %r, will overwrite previous stored offset %r with %r.', key, previous_offset, updated_offset) owned_partition_offsets[key] = updated_offset def on_revoke(consumer, partitions): logger.debug('Revoked partition assignment: %r', partitions) offsets_to_commit = [] for i in partitions: key = (i.topic, i.partition) try: offset = owned_partition_offsets.pop(key) except KeyError: logger.warning( 'Received unexpected partition revocation for unowned partition: %r', i, exc_info=True) continue if offset is None: logger.debug( 'Skipping commit of unprocessed partition: %r', i) continue offsets_to_commit.append( TopicPartition(i.topic, i.partition, offset)) if offsets_to_commit: logger.debug( 'Committing offset(s) for %s revoked partition(s): %r', len(offsets_to_commit), offsets_to_commit) commit(offsets_to_commit) consumer.subscribe( [self.topic], on_assign=on_assign, on_revoke=on_revoke, ) def commit_offsets(): offsets_to_commit = [] for (topic, partition), offset in owned_partition_offsets.items(): if offset is None: logger.debug( 'Skipping commit of unprocessed partition: %r', (topic, partition)) continue offsets_to_commit.append( TopicPartition(topic, partition, offset)) if offsets_to_commit: logger.debug( 'Committing offset(s) for %s owned partition(s): %r', len(offsets_to_commit), offsets_to_commit) commit(offsets_to_commit) try: i = 0 while True: message = consumer.poll(0.1) if message is None: continue error = message.error() if error is not None: raise Exception(error) key = (message.topic(), message.partition()) if key not in owned_partition_offsets: logger.warning( 'Skipping message for unowned partition: %r', key) continue i = i + 1 owned_partition_offsets[key] = message.offset() + 1 with metrics.timer('eventstream.duration', instance='get_task_kwargs_for_message'): task_kwargs = get_task_kwargs_for_message(message.value()) if task_kwargs is not None: with metrics.timer( 'eventstream.duration', instance='dispatch_post_process_group_task'): self._dispatch_post_process_group_task(**task_kwargs) if i % commit_batch_size == 0: commit_offsets() except KeyboardInterrupt: pass logger.debug('Committing offsets and closing consumer...') commit_offsets() consumer.close()
def test_get_task_kwargs_for_message_version_1_unexpected_operation(): with pytest.raises(UnexpectedOperation): get_task_kwargs_for_message(json.dumps([1, "invalid", {}, {}]))
def test_get_task_kwargs_for_message_version_1_unsupported_operation(): assert get_task_kwargs_for_message(json.dumps([1, "delete", {}])) is None
def test_get_task_kwargs_for_message_version_1_skip_consume(): assert ( get_task_kwargs_for_message(json.dumps([1, "insert", {}, {"skip_consume": True}])) is None )
def test_get_task_kwargs_for_message_invalid_version(): with pytest.raises(InvalidVersion): get_task_kwargs_for_message(json.dumps([0, "insert", {}]))
def test_get_task_kwargs_for_message_invalid_payload(): with pytest.raises(InvalidPayload): get_task_kwargs_for_message('{"format": "invalid"}')
def run_post_process_forwarder(self, consumer_group, commit_log_topic, synchronize_commit_group, commit_batch_size=100, initial_offset_reset='latest'): logger.debug('Starting post-process forwarder...') cluster_name = settings.KAFKA_TOPICS[settings.KAFKA_EVENTS]['cluster'] bootstrap_servers = settings.KAFKA_CLUSTERS[cluster_name]['bootstrap.servers'] consumer = SynchronizedConsumer( bootstrap_servers=bootstrap_servers, consumer_group=consumer_group, commit_log_topic=commit_log_topic, synchronize_commit_group=synchronize_commit_group, initial_offset_reset=initial_offset_reset, ) owned_partition_offsets = {} def commit(partitions): results = consumer.commit(offsets=partitions, asynchronous=False) errors = filter(lambda i: i.error is not None, results) if errors: raise Exception( 'Failed to commit %s/%s partitions: %r' % (len(errors), len(partitions), errors)) return results def on_assign(consumer, partitions): logger.debug('Received partition assignment: %r', partitions) for i in partitions: if i.offset == OFFSET_INVALID: updated_offset = None elif i.offset < 0: raise Exception( 'Received unexpected negative offset during partition assignment: %r' % (i,)) else: updated_offset = i.offset key = (i.topic, i.partition) previous_offset = owned_partition_offsets.get(key, None) if previous_offset is not None and previous_offset != updated_offset: logger.warning( 'Received new offset for owned partition %r, will overwrite previous stored offset %r with %r.', key, previous_offset, updated_offset) owned_partition_offsets[key] = updated_offset def on_revoke(consumer, partitions): logger.debug('Revoked partition assignment: %r', partitions) offsets_to_commit = [] for i in partitions: key = (i.topic, i.partition) try: offset = owned_partition_offsets.pop(key) except KeyError: logger.warning( 'Received unexpected partition revocation for unowned partition: %r', i, exc_info=True) continue if offset is None: logger.debug('Skipping commit of unprocessed partition: %r', i) continue offsets_to_commit.append(TopicPartition(i.topic, i.partition, offset)) if offsets_to_commit: logger.debug( 'Committing offset(s) for %s revoked partition(s): %r', len(offsets_to_commit), offsets_to_commit) commit(offsets_to_commit) consumer.subscribe( [self.topic], on_assign=on_assign, on_revoke=on_revoke, ) def commit_offsets(): offsets_to_commit = [] for (topic, partition), offset in owned_partition_offsets.items(): if offset is None: logger.debug('Skipping commit of unprocessed partition: %r', (topic, partition)) continue offsets_to_commit.append(TopicPartition(topic, partition, offset)) if offsets_to_commit: logger.debug( 'Committing offset(s) for %s owned partition(s): %r', len(offsets_to_commit), offsets_to_commit) commit(offsets_to_commit) try: i = 0 while True: message = consumer.poll(0.1) if message is None: continue error = message.error() if error is not None: raise Exception(error) key = (message.topic(), message.partition()) if key not in owned_partition_offsets: logger.warning('Skipping message for unowned partition: %r', key) continue i = i + 1 owned_partition_offsets[key] = message.offset() + 1 with metrics.timer('eventstream.duration', instance='get_task_kwargs_for_message'): task_kwargs = get_task_kwargs_for_message(message.value()) if task_kwargs is not None: with metrics.timer('eventstream.duration', instance='dispatch_post_process_group_task'): self._dispatch_post_process_group_task(**task_kwargs) if i % commit_batch_size == 0: commit_offsets() except KeyboardInterrupt: pass logger.debug('Committing offsets and closing consumer...') commit_offsets() consumer.close()
def test_get_task_kwargs_for_message_version_1_skip_consume(): assert get_task_kwargs_for_message( json.dumps([1, 'insert', {}, { 'skip_consume': True }])) is None
def run_post_process_forwarder( self, consumer_group, commit_log_topic, synchronize_commit_group, commit_batch_size=100, initial_offset_reset="latest", ): logger.debug("Starting post-process forwarder...") cluster_name = settings.KAFKA_TOPICS[settings.KAFKA_EVENTS]["cluster"] consumer = SynchronizedConsumer( cluster_name=cluster_name, consumer_group=consumer_group, commit_log_topic=commit_log_topic, synchronize_commit_group=synchronize_commit_group, initial_offset_reset=initial_offset_reset, ) owned_partition_offsets = {} def commit(partitions): results = consumer.commit(offsets=partitions, asynchronous=False) errors = [i for i in results if i.error is not None] if errors: raise Exception( "Failed to commit {}/{} partitions: {!r}".format( len(errors), len(partitions), errors)) return results def on_assign(consumer, partitions): logger.info("Received partition assignment: %r", partitions) for i in partitions: if i.offset == OFFSET_INVALID: updated_offset = None elif i.offset < 0: raise Exception( f"Received unexpected negative offset during partition assignment: {i!r}" ) else: updated_offset = i.offset key = (i.topic, i.partition) previous_offset = owned_partition_offsets.get(key, None) if previous_offset is not None and previous_offset != updated_offset: logger.warning( "Received new offset for owned partition %r, will overwrite previous stored offset %r with %r.", key, previous_offset, updated_offset, ) owned_partition_offsets[key] = updated_offset def on_revoke(consumer, partitions): logger.info("Revoked partition assignment: %r", partitions) offsets_to_commit = [] for i in partitions: key = (i.topic, i.partition) try: offset = owned_partition_offsets.pop(key) except KeyError: logger.warning( "Received unexpected partition revocation for unowned partition: %r", i, exc_info=True, ) continue if offset is None: logger.debug( "Skipping commit of unprocessed partition: %r", i) continue offsets_to_commit.append( TopicPartition(i.topic, i.partition, offset)) if offsets_to_commit: logger.debug( "Committing offset(s) for %s revoked partition(s): %r", len(offsets_to_commit), offsets_to_commit, ) commit(offsets_to_commit) consumer.subscribe([self.topic], on_assign=on_assign, on_revoke=on_revoke) def commit_offsets(): offsets_to_commit = [] for (topic, partition), offset in owned_partition_offsets.items(): if offset is None: logger.debug( "Skipping commit of unprocessed partition: %r", (topic, partition)) continue offsets_to_commit.append( TopicPartition(topic, partition, offset)) if offsets_to_commit: logger.debug( "Committing offset(s) for %s owned partition(s): %r", len(offsets_to_commit), offsets_to_commit, ) commit(offsets_to_commit) shutdown_requested = False def handle_shutdown_request(signum: int, frame: Any) -> None: nonlocal shutdown_requested logger.debug("Received signal %r, requesting shutdown...", signum) shutdown_requested = True signal.signal(signal.SIGINT, handle_shutdown_request) signal.signal(signal.SIGTERM, handle_shutdown_request) i = 0 while not shutdown_requested: message = consumer.poll(0.1) if message is None: continue error = message.error() if error is not None: raise Exception(error) key = (message.topic(), message.partition()) if key not in owned_partition_offsets: logger.warning("Skipping message for unowned partition: %r", key) continue i = i + 1 owned_partition_offsets[key] = message.offset() + 1 with metrics.timer("eventstream.duration", instance="get_task_kwargs_for_message"): task_kwargs = get_task_kwargs_for_message(message.value()) if task_kwargs is not None: with metrics.timer( "eventstream.duration", instance="dispatch_post_process_group_task"): self._dispatch_post_process_group_task(**task_kwargs) if i % commit_batch_size == 0: commit_offsets() logger.debug("Committing offsets and closing consumer...") commit_offsets() consumer.close()