def __delivery_callback( self, future: Future[Message[KafkaPayload]], payload: KafkaPayload, error: KafkaError, message: ConfluentMessage, ) -> None: if error is not None: future.set_exception(TransportError(error)) else: try: timestamp_type, timestamp_value = message.timestamp() if timestamp_type is TIMESTAMP_NOT_AVAILABLE: raise ValueError("timestamp not available") future.set_result( Message( Partition(Topic(message.topic()), message.partition()), message.offset(), payload, datetime.utcfromtimestamp(timestamp_value / 1000.0), ) ) except Exception as error: future.set_exception(error)
def msg_to_topic_partition(msg: Message) -> TopicPartition: """ Takes a :class:`confluent_kafka.Message` and reads its attributes in order to create a :class:`confluent_kafka.TopicPartition`. :param msg: Message to read partition and offset information from. :return: The extracted partition and offset. """ return TopicPartition(msg.topic(), msg.partition(), msg.offset())
def from_message(cls, msg: confluent_kafka.Message) -> 'Metadata': return cls( topic=msg.topic(), partition=msg.partition(), offset=msg.offset(), timestamp=msg.timestamp()[1], key=msg.key(), _raw=msg, )
def __on_delivery(self, error: kafka.KafkaError, message: kafka.Message): if error is not None: logging.error( "KafkaStorage:__on_delivery():error:{}".format(error)) else: if globals.__DEBUG__: logging.debug( "KafkaStorage:__on_delivery():topic:{}:partition:{}:offset{}" .format(message.topic(), message.partition(), message.offset()))
def commit_offset(context: RequestContext, data: Any, message: confluent_kafka.Message) -> None: logger.debug( "committing topic %s partition %s offset %s", message.topic(), message.partition(), message.offset(), ) with context.span.make_child("kafka.commit"): self.consumer.commit(message=message, asynchronous=False)
def _confluent_to_binary_message( self, consumed_message: Message) -> BinaryMessage: binary_message = BinaryMessage( key=consumed_message.key(), value=consumed_message.value(), partition=consumed_message.partition(), offset=consumed_message.offset(), timestamp=self._confluent_to_io_timestamp(consumed_message), headers=self._confluent_to_io_headers(consumed_message.headers()), ) return binary_message
def _on_eof_received(self, msg: Message): key = (msg.topic(), msg.partition()) self.__eof_reached[key] = True # when the sink has been restarted but is already at the end of the topic, this is how we # get the current offset. We need to keep committing this so the offsets in kafka won't get deleted. topic_partition = msg_to_topic_partition(msg) key = (topic_partition.topic, topic_partition.partition) logger.debug(f"Updating offset: {topic_partition}") self.__offsets[key] = topic_partition self._unsafe_call_and_set_status(self.on_eof_received, msg)
def check_msg_equality(msgnum, expected, given: Message, expectFail=False): errs = [] if expected['value'] != given.value(): errs.append(f"Values do not match! Expected {expected['value']} got {given.value()}") if expected['key'] != given.key(): errs.append(f"Keys do not match! Expected {expected['key']} got {given.key()}") if 'headers' in expected and expected['headers'] != given.headers(): errs.append(f"Headers do not match! Expected {expected['headers']} got {given.headers()}") if expectFail: if len(errs) == 0: print(f"{msgnum}: expected ({expected}) matched given. Did not expect that") exit(1) elif len(errs) > 0: for err in errs: print(f"{msgnum}" + err) exit(255)
def notify(slack_client: WebClient, msg: confluent_kafka.Message) -> None: prior_plans_count, prior_times_sum, prior_reads_sum, prior_execs_sum, prior_last_execution = 0, 0, 0, 0, 0 msg_val = dict(msg.value()) for pp in msg_val['prior_plans']: prior_plans_count += 1 prior_times_sum += pp['total_elapsed_time'] prior_reads_sum += pp['total_logical_reads'] prior_execs_sum += pp['execution_count'] prior_last_execution = max(prior_last_execution, pp['last_execution_time']) avg_prior_time_ms = prior_times_sum / prior_execs_sum / 1000 avg_prior_reads = prior_reads_sum / prior_execs_sum avg_time_ms = msg_val['total_elapsed_time'] / msg_val['execution_count'] / 1000 avg_reads = msg_val['total_logical_reads'] / msg_val['execution_count'] time_increase_factor = avg_time_ms / avg_prior_time_ms read_increase_factor = (avg_reads / avg_prior_reads) if avg_prior_reads else 0 eviction_latency_seconds = int((msg_val['eviction_time'] - msg_val['creation_time']) / 1000) template = Template(config.SLACK_MESSAGE_TEMPLATE) template.globals['format_ts'] = common.format_ts rendered = template.render(msg=msg, prior_plans_count=prior_plans_count, prior_times_sum=prior_times_sum, prior_reads_sum=prior_reads_sum, prior_execs_sum=prior_execs_sum, prior_last_execution=prior_last_execution, avg_prior_time_ms=avg_prior_time_ms, avg_prior_reads=avg_prior_reads, avg_time_ms=avg_time_ms, avg_reads=avg_reads, time_increase_factor=time_increase_factor, read_increase_factor=read_increase_factor, eviction_latency_seconds=eviction_latency_seconds, hostname=socket.getfqdn()) if slack_client: try: if config.SLACK_POST_AS_BLOCKS: slack_client.chat_postMessage(channel=config.SLACK_NOTIFY_CHANNEL, blocks=json.loads(rendered)) else: slack_client.chat_postMessage(channel=config.SLACK_NOTIFY_CHANNEL, text=rendered) except SlackApiError as e: logger.warning(f"Error sending message to Slack: {e.response.get('error', '<none>')}")
def process_message( self, message: Message) -> Optional[MutableMapping[str, Any]]: message = msgpack.unpackb(message.value(), use_list=False) profile = cast(Dict[str, Any], json.loads(message["payload"])) profile.update({ "organization_id": message["organization_id"], "project_id": message["project_id"], "received": message["received"], }) return profile
def __decode(self, message: Message): """ Decode a message coming from Kafka. It will become a Python Dict """ value = message.value() # can be None, str, bytes if value: value = json.loads(value, encoding='utf-8') return value
def __handle_msg(self, message: Message) -> CommitDecision: value = json.loads(message.value()) parsed_message = parse_control_message(value) if isinstance(parsed_message, SnapshotInit): commit_decision = self.__recovery_state.process_init(parsed_message) elif isinstance(parsed_message, SnapshotAbort): commit_decision = self.__recovery_state.process_abort(parsed_message) elif isinstance(parsed_message, SnapshotLoaded): commit_decision = self.__recovery_state.process_snapshot_loaded( parsed_message, ) else: logger.warning("Received an unrecognized message: %r", parsed_message) commit_decision = CommitDecision.DO_NOT_COMMIT return commit_decision
def create_incident(message: Message, topic: str) -> dict: """Create incident from kafka's message. Args: message: Kafka message to create incident from topic: Message's topic Return incident """ message_value = message.value() raw = { 'Topic': topic, 'Partition': message.partition(), 'Offset': message.offset(), 'Message': message_value.decode('utf-8') } incident = { 'name': 'Kafka {} partition:{} offset:{}'.format(topic, message.partition(), message.offset()), 'details': message_value.decode('utf-8'), 'rawJSON': json.dumps(raw) } timestamp = message.timestamp( ) # returns a list of [timestamp_type, timestamp] if timestamp and len( timestamp) == 2 and timestamp[0] != TIMESTAMP_NOT_AVAILABLE: incident['occurred'] = timestamp_to_datestring(timestamp[1]) demisto.debug( f"Creating incident from topic {topic} partition {message.partition()} offset {message.offset()}" ) return incident
def message_to_metadata(self, message: _Message) -> RecordMetadata: topic, partition = tp = TP(message.topic(), message.partition()) return RecordMetadata(topic, partition, tp, message.offset())
def process_message(self, message: confluent_kafka.Message) -> None: self.total_count += 1 if message.partition() not in self._last_processed_offset_by_partition: self._last_processed_offset_by_partition[message.partition()] = -1 self.missing_offsets += ( message.offset() - self._last_processed_offset_by_partition[message.partition()] - 1) self._last_processed_offset_by_partition[ message.partition()] = message.offset() if message.value() is None: self.tombstone_count += 1 return if constants.UNIFIED_TOPIC_MSG_DATA_WRAPPER_NAME in message.value(): message_body = dict( message.value()[constants.UNIFIED_TOPIC_MSG_DATA_WRAPPER_NAME]) else: message_body = dict(message.value()) key = extract_key_tuple(self.table, message_body) operation_name = message_body[constants.OPERATION_NAME] if operation_name == constants.SNAPSHOT_OPERATION_NAME: self.snapshot_count += 1 self.keys_seen_in_snapshots.add(key) if self.min_snapshot_key_seen is None or key < self.min_snapshot_key_seen: self.min_snapshot_key_seen = key if self.max_snapshot_key_seen is None or key > self.max_snapshot_key_seen: self.max_snapshot_key_seen = key if message.partition() in self._last_snapshot_key_seen_for_partition and \ self._last_snapshot_key_seen_for_partition[message.partition()] < key: self.snapshot_key_order_regressions_count += 1 print(message.offset()) self._last_snapshot_key_seen_for_partition[ message.partition()] = key return if operation_name == constants.DELETE_OPERATION_NAME: self.all_deletes_in_topic += 1 self.deleted_keys.add(key) msg_change_index = change_index.ChangeIndex.from_avro_ready_dict( message_body) if msg_change_index.lsn < self.table.min_lsn: # the live change table has been truncated and no longer has this entry return if operation_name == constants.DELETE_OPERATION_NAME: self.delete_count += 1 elif operation_name == constants.INSERT_OPERATION_NAME: self.insert_count += 1 elif operation_name == constants.POST_UPDATE_OPERATION_NAME: self.update_count += 1 else: self.unknown_operation_count += 1 return change_idx = change_index.ChangeIndex.from_avro_ready_dict( message_body) self.keys_seen_in_changes.add(key) if self.min_change_index_seen is None or change_idx < self.min_change_index_seen: self.min_change_index_seen = change_idx if self.max_change_index_seen is None or change_idx > self.max_change_index_seen: self.max_change_index_seen = change_idx if message.partition() in self._last_change_index_seen_for_partition and \ self._last_change_index_seen_for_partition[message.partition()] > change_idx: self.change_index_order_regressions_count += 1 self._last_change_index_seen_for_partition[ message.partition()] = change_idx event_time = datetime.datetime.fromisoformat( message_body[constants.EVENT_TIME_NAME]) if self.latest_change_seen is None or event_time > self.latest_change_seen: self.latest_change_seen = event_time return
def raise_for_message(message: Message): if message is None: raise MessageEmptyException elif message.error() is not None: raise_for_kafka_error(message.error())
def _on_message_received(self, msg: Message): self.__eof_reached[(msg.topic(), msg.partition())] = False self._unsafe_call_and_set_status(self.on_message_received, msg) self._update_offset_from_message(msg)
def delivery_callback(error: KafkaError, message: Message) -> None: if error is not None: raise error else: logger.info("Message sent %r", message.value())
def _confluent_to_io_timestamp( consumed_message: Message) -> datetime.datetime: return datetime.datetime.fromtimestamp( consumed_message.timestamp()[1] / 1000, tz=datetime.timezone.utc)
def handle(self, message: confluent_kafka.Message) -> None: context = self.baseplate.make_context_object() try: # We place the call to ``baseplate.make_server_span`` inside the # try/except block because we still want Baseplate to see and # handle the error (publish it to error reporting) with self.baseplate.make_server_span( context, f"{self.name}.handler") as span: error = message.error() if error: # this isn't a real message, but is an error from Kafka raise ValueError(f"KafkaError: {error.str()}") topic = message.topic() offset = message.offset() partition = message.partition() span.set_tag("kind", "consumer") span.set_tag("kafka.topic", topic) span.set_tag("kafka.key", message.key()) span.set_tag("kafka.partition", partition) span.set_tag("kafka.offset", offset) span.set_tag("kafka.timestamp", message.timestamp()) blob: bytes = message.value() try: data = self.message_unpack_fn(blob) except Exception: logger.error("skipping invalid message") context.span.incr_tag( f"{self.name}.{topic}.invalid_message") return try: ingest_timestamp_ms = data["endpoint_timestamp"] now_ms = int(time.time() * 1000) message_latency = (now_ms - ingest_timestamp_ms) / 1000 except (KeyError, TypeError): # we can't guarantee that all publishers populate this field # v2 events publishers (event collectors) do, but future # kafka publishers may not message_latency = None self.handler_fn(context, data, message) if self.on_success_fn: self.on_success_fn(context, data, message) if message_latency is not None: context.metrics.timer(f"{self.name}.{topic}.latency").send( message_latency) context.metrics.gauge( f"{self.name}.{topic}.offset.{partition}").replace(offset) except Exception: # let this exception crash the server so we'll stop processing messages # and won't commit offsets. when the server restarts it will get # this message again and try to process it. logger.exception( "Unhandled error while trying to process a message, terminating the server" ) raise
def raise_delivery_errors(kafka_error: confluent_kafka.KafkaError, msg: confluent_kafka.Message) -> None: if kafka_error is not None: raise KafkaException.from_kafka_error(kafka_error) elif msg.error() is not None: raise KafkaException.from_kafka_error(msg.error())
def kafka_producer_delivery_cb(err: confluent_kafka.KafkaError, msg: confluent_kafka.Message) -> None: if err is not None: logger.error("Delivery failed for record %s: %s", msg.key(), err)
def decode_standard_message(msg: Message) -> dict: return json.loads(msg.value().decode('utf-8'))
def handle_message(self, message: Message) -> None: """ Parses the value from Kafka, and if valid passes the payload to the callback defined by the subscription. If the subscription has been removed, or no longer has a valid callback then just log metrics/errors and continue. :param message: :return: """ with sentry_sdk.push_scope() as scope: try: with metrics.timer( "snuba_query_subscriber.parse_message_value"): contents = self.parse_message_value(message.value()) except InvalidMessageError: # If the message is in an invalid format, just log the error # and continue logger.exception( "Subscription update could not be parsed", extra={ "offset": message.offset(), "partition": message.partition(), "value": message.value(), }, ) return scope.set_tag("query_subscription_id", contents["subscription_id"]) try: with metrics.timer( "snuba_query_subscriber.fetch_subscription"): subscription: QuerySubscription = QuerySubscription.objects.get_from_cache( subscription_id=contents["subscription_id"]) if subscription.status != QuerySubscription.Status.ACTIVE.value: metrics.incr( "snuba_query_subscriber.subscription_inactive") return except QuerySubscription.DoesNotExist: metrics.incr( "snuba_query_subscriber.subscription_doesnt_exist") logger.error( "Received subscription update, but subscription does not exist", extra={ "offset": message.offset(), "partition": message.partition(), "value": message.value(), }, ) try: _delete_from_snuba(self.topic_to_dataset[message.topic()], contents["subscription_id"]) except Exception: logger.exception( "Failed to delete unused subscription from snuba.") return if subscription.type not in subscriber_registry: metrics.incr( "snuba_query_subscriber.subscription_type_not_registered") logger.error( "Received subscription update, but no subscription handler registered", extra={ "offset": message.offset(), "partition": message.partition(), "value": message.value(), }, ) return sentry_sdk.set_tag("project_id", subscription.project_id) sentry_sdk.set_tag("query_subscription_id", contents["subscription_id"]) callback = subscriber_registry[subscription.type] with sentry_sdk.start_span( op="process_message") as span, metrics.timer( "snuba_query_subscriber.callback.duration", instance=subscription.type): span.set_data("payload", contents) span.set_data("subscription_dataset", subscription.snuba_query.dataset) span.set_data("subscription_query", subscription.snuba_query.query) span.set_data("subscription_aggregation", subscription.snuba_query.aggregate) span.set_data("subscription_time_window", subscription.snuba_query.time_window) span.set_data("subscription_resolution", subscription.snuba_query.resolution) span.set_data("message_offset", message.offset()) span.set_data("message_partition", message.partition()) span.set_data("message_value", message.value()) callback(contents, subscription)
def decode_image_message(msg: Message) -> np.array: img_mat = np.fromstring(msg.value(), dtype=np.uint8) return cv2.imdecode(img_mat, -1)
def handle_message(self, message: Message) -> None: """ Parses the value from Kafka, and if valid passes the payload to the callback defined by the subscription. If the subscription has been removed, or no longer has a valid callback then just log metrics/errors and continue. :param message: :return: """ # set a commit time deadline only after the first message for this batch is seen if not self.__batch_deadline: self.__batch_deadline = self.commit_batch_timeout_ms / 1000.0 + time.time() with sentry_sdk.push_scope() as scope: try: with metrics.timer("snuba_query_subscriber.parse_message_value"): contents = self.parse_message_value(message.value()) except InvalidMessageError: # If the message is in an invalid format, just log the error # and continue logger.exception( "Subscription update could not be parsed", extra={ "offset": message.offset(), "partition": message.partition(), "value": message.value(), }, ) return scope.set_tag("query_subscription_id", contents["subscription_id"]) try: with metrics.timer("snuba_query_subscriber.fetch_subscription"): subscription: QuerySubscription = QuerySubscription.objects.get_from_cache( subscription_id=contents["subscription_id"] ) if subscription.status != QuerySubscription.Status.ACTIVE.value: metrics.incr("snuba_query_subscriber.subscription_inactive") return except QuerySubscription.DoesNotExist: metrics.incr("snuba_query_subscriber.subscription_doesnt_exist") logger.error( "Received subscription update, but subscription does not exist", extra={ "offset": message.offset(), "partition": message.partition(), "value": message.value(), }, ) try: if "entity" in contents: entity_key = contents["entity"] else: # XXX(ahmed): Remove this logic. This was kept here as backwards compat # for subscription updates with schema version `2`. However schema version 3 # sends the "entity" in the payload entity_regex = r"^(MATCH|match)[ ]*\(([^)]+)\)" entity_match = re.match(entity_regex, contents["request"]["query"]) if not entity_match: raise InvalidMessageError( "Unable to fetch entity from query in message" ) entity_key = entity_match.group(2) _delete_from_snuba( self.topic_to_dataset[message.topic()], contents["subscription_id"], EntityKey(entity_key), ) except InvalidMessageError as e: logger.exception(e) except Exception: logger.exception("Failed to delete unused subscription from snuba.") return if subscription.type not in subscriber_registry: metrics.incr("snuba_query_subscriber.subscription_type_not_registered") logger.error( "Received subscription update, but no subscription handler registered", extra={ "offset": message.offset(), "partition": message.partition(), "value": message.value(), }, ) return sentry_sdk.set_tag("project_id", subscription.project_id) sentry_sdk.set_tag("query_subscription_id", contents["subscription_id"]) callback = subscriber_registry[subscription.type] with sentry_sdk.start_span(op="process_message") as span, metrics.timer( "snuba_query_subscriber.callback.duration", instance=subscription.type ): span.set_data("payload", contents) span.set_data("subscription_dataset", subscription.snuba_query.dataset) span.set_data("subscription_query", subscription.snuba_query.query) span.set_data("subscription_aggregation", subscription.snuba_query.aggregate) span.set_data("subscription_time_window", subscription.snuba_query.time_window) span.set_data("subscription_resolution", subscription.snuba_query.resolution) span.set_data("message_offset", message.offset()) span.set_data("message_partition", message.partition()) span.set_data("message_value", message.value()) callback(contents, subscription)