async def commit_offsets(self, internal_name: Tuple[str, str], content_type: str, request_data: dict, cluster_metadata: dict): self.log.info("Committing offsets for %s", internal_name) self._assert_consumer_exists(internal_name, content_type) if request_data: self._assert_has_key(request_data, "offsets", content_type) payload = {} for el in request_data.get("offsets", []): for k in ["partition", "offset"]: convert_to_int(el, k, content_type) # If we commit for a partition that does not belong to this consumer, then the internal error raised # is marked as retriable, and thus the commit method will remain blocked in what looks like an infinite loop self._topic_and_partition_valid(cluster_metadata, el, content_type) payload[TopicPartition(el["topic"], el["partition"])] = OffsetAndMetadata( el["offset"] + 1, None) async with self.consumer_locks[internal_name]: consumer = self.consumers[internal_name].consumer payload = payload or None try: consumer.commit(offsets=payload) except KafkaError as e: KarapaceBase.internal_error( message=f"error sending commit request: {e}", content_type=content_type) empty_response()
def _assert_consumer_exists(self, internal_name: Tuple[str, str], content_type: str): if internal_name not in self.consumers: KarapaceBase.not_found( message=f"Consumer for {internal_name} not found among {list(self.consumers.keys())}", content_type=content_type, sub_code=40403 )
def __init__(self, config): KarapaceBase.__init__(self, config) KafkaRest._init(self, config) KafkaRest._add_routes(self) KarapaceSchemaRegistry._init(self) KarapaceSchemaRegistry._add_routes(self) self.log = logging.getLogger("KarapaceAll") self.app.on_shutdown.append(self.close_by_app)
def _assert_consumer_exists(self, internal_name: Tuple[str, str], content_type: str) -> None: if internal_name not in self.consumers: KarapaceBase.not_found( message= f"Consumer for {internal_name} not found among {list(self.consumers.keys())}", content_type=content_type, sub_code=RESTErrorCodes.CONSUMER_NOT_FOUND.value, )
def _assert(cond: bool, code: HTTPStatus, sub_code: int, message: str, content_type: str) -> None: if not cond: KarapaceBase.r(content_type=content_type, status=code, body={ "message": message, "error_code": sub_code })
def __init__(self, config_file_path: str, config: dict) -> None: KarapaceBase.__init__(self, config_file_path=config_file_path, config=config) KafkaRest._init(self, config=config) KafkaRest._add_routes(self) KarapaceSchemaRegistry._init(self, config=config) KarapaceSchemaRegistry._add_routes(self) self.log = logging.getLogger("KarapaceAll") self.app.on_shutdown.append(self.close_by_app)
async def get_subscription(self, internal_name: Tuple[str, str], content_type: str): self.log.info("Retrieving subscription for %s", internal_name) self._assert_consumer_exists(internal_name, content_type) async with self.consumer_locks[internal_name]: consumer = self.consumers[internal_name].consumer if consumer.subscription() is None: topics = [] else: topics = list(consumer.subscription()) KarapaceBase.r(content_type=content_type, body={"topics": topics})
def _topic_and_partition_valid(cluster_metadata: dict, topic_data: dict, content_type: str): ConsumerManager._has_topic_and_partition_keys(topic_data, content_type) topic = topic_data["topic"] partition = topic_data["partition"] if topic not in cluster_metadata["topics"]: KarapaceBase.not_found(message=f"Topic {topic} not found", content_type=content_type, sub_code=40401) partitions = {pi["partition"] for pi in cluster_metadata["topics"][topic]["partitions"]} if partition not in partitions: KarapaceBase.not_found( message=f"Partition {partition} not found for topic {topic}", content_type=content_type, sub_code=40402 )
async def get_assignments(self, internal_name: Tuple[str, str], content_type: str): self.log.info("Retrieving assignment for %s", internal_name) self._assert_consumer_exists(internal_name, content_type) async with self.consumer_locks[internal_name]: consumer = self.consumers[internal_name].consumer KarapaceBase.r( content_type=content_type, body={"partitions": [{ "topic": pd.topic, "partition": pd.partition } for pd in consumer.assignment()]} )
def __init__(self, config_path: str): self.config = KarapaceBase.read_config(config_path) self.hostname = f"http://{self.config['advertised_hostname']}:{self.config['port']}" self.log = logging.getLogger("RestConsumerManager") self.deserializer = SchemaRegistryDeserializer(config_path=config_path) self.consumers = {} self.consumer_locks = defaultdict(Lock)
def __init__(self, config_path, backup_path, topic_option=None): self.config = KarapaceBase.read_config(config_path) self.backup_location = backup_path self.topic_name = topic_option or self.config["topic_name"] self.log = logging.getLogger("SchemaBackup") self.consumer = None self.producer = None self.admin_client = None self.timeout_ms = 1000
async def get_offsets(self, internal_name: Tuple[str, str], content_type: str, request_data: dict): self.log.info("Retrieving offsets for %s", internal_name) self._assert_consumer_exists(internal_name, content_type) self._assert_has_key(request_data, "partitions", content_type) response = {"offsets": []} async with self.consumer_locks[internal_name]: consumer = self.consumers[internal_name].consumer for el in request_data["partitions"]: convert_to_int(el, "partition", content_type) tp = TopicPartition(el["topic"], el["partition"]) commit_info = consumer.committed(tp, metadata=True) if not commit_info: continue response["offsets"].append({ "topic": tp.topic, "partition": tp.partition, "metadata": commit_info.metadata, "offset": commit_info.offset }) KarapaceBase.r(body=response, content_type=content_type)
async def create_consumer(self, group_name: str, request_data: dict, content_type: str): group_name = group_name.strip("/") self.log.info("Create consumer request for group %s", group_name) consumer_name = request_data.get("name") or self.new_name() internal_name = self.create_internal_name(group_name, consumer_name) async with self.consumer_locks[internal_name]: if internal_name in self.consumers: self.log.error( "Error creating duplicate consumer in group %s with id %s", group_name, consumer_name) KarapaceBase.r( status=HTTPStatus.CONFLICT, content_type=content_type, body={ "error_code": RESTErrorCodes.CONSUMER_ALREADY_EXISTS.value, "message": f"Consumer {consumer_name} already exists", }, ) self._validate_create_consumer(request_data, content_type) self.log.info( "Creating new consumer in group %s with id %s and request_info %r", group_name, consumer_name, request_data) for k in ["consumer.request.timeout.ms", "fetch_min_bytes"]: convert_to_int(request_data, k, content_type) try: enable_commit = request_data.get( "auto.commit.enable", self.config["consumer_enable_auto_commit"]) if isinstance(enable_commit, str): enable_commit = enable_commit.lower() == "true" request_data["consumer.request.timeout.ms"] = request_data.get( "consumer.request.timeout.ms", self.config["consumer_request_timeout_ms"]) request_data["auto.commit.enable"] = enable_commit request_data["auto.offset.reset"] = request_data.get( "auto.offset.reset", "earliest") fetch_min_bytes = request_data.get( "fetch.min.bytes", self.config["fetch_min_bytes"]) c = await self.create_kafka_consumer(fetch_min_bytes, group_name, internal_name, request_data) except KafkaConfigurationError as e: KarapaceBase.internal_error(str(e), content_type) self.consumers[internal_name] = TypedConsumer( consumer=c, serialization_format=request_data["format"], config=request_data) base_uri = urljoin( self.hostname, f"consumers/{group_name}/instances/{consumer_name}") KarapaceBase.r(content_type=content_type, body={ "base_uri": base_uri, "instance_id": consumer_name })
async def fetch(self, internal_name: Tuple[str, str], content_type: str, formats: dict, query_params: dict): self.log.info("Running fetch for name %s with parameters %r and formats %r", internal_name, query_params, formats) self._assert_consumer_exists(internal_name, content_type) async with self.consumer_locks[internal_name]: consumer = self.consumers[internal_name].consumer serialization_format = self.consumers[internal_name].serialization_format config = self.consumers[internal_name].config request_format = formats["embedded_format"] self._assert( cond=serialization_format == request_format, code=406, sub_code=40601, content_type=content_type, message=f"Consumer format {serialization_format} does not match the embedded format {request_format}" ) self.log.info("Fetch request for %s with params %r", internal_name, query_params) try: timeout = int(query_params["timeout"]) if "timeout" in query_params \ else config["consumer.request.timeout.ms"] # we get to be more in line with the confluent proxy by doing a bunch of fetches each time and # respecting the max fetch request size max_bytes = int(query_params['max_bytes']) if "max_bytes" in query_params \ else consumer.config["fetch_max_bytes"] except ValueError: KarapaceBase.internal_error(message=f"Invalid request parameters: {query_params}", content_type=content_type) for val in [timeout, max_bytes]: if not val: continue if val <= 0: KarapaceBase.internal_error(message=f"Invalid request parameter {val}", content_type=content_type) response = [] self.log.info( "Will poll multiple times for a single message with a total timeout of %dms, " "until at least %d bytes have been fetched", timeout, max_bytes ) read_bytes = 0 start_time = time.monotonic() poll_data = defaultdict(list) message_count = 0 while read_bytes < max_bytes and start_time + timeout / 1000 > time.monotonic(): time_left = start_time + timeout / 1000 - time.monotonic() bytes_left = max_bytes - read_bytes self.log.info( "Polling with %r time left and %d bytes left, gathered %d messages so far", time_left, bytes_left, message_count ) data = consumer.poll(timeout_ms=timeout, max_records=1) self.log.debug("Successfully polled for messages") for topic, records in data.items(): for rec in records: message_count += 1 read_bytes += \ max(0, rec.serialized_key_size) + \ max(0, rec.serialized_value_size) + \ max(0, rec.serialized_header_size) poll_data[topic].append(rec) self.log.info("Gathered %d total messages", message_count) for tp in poll_data: for msg in poll_data[tp]: try: key = await self.deserialize(msg.key, request_format) if msg.key else None value = await self.deserialize(msg.value, request_format) if msg.value else None except (UnpackError, InvalidMessageHeader, InvalidPayload) as e: KarapaceBase.internal_error(message=f"deserialization error: {e}", content_type=content_type) element = { "topic": tp.topic, "partition": tp.partition, "offset": msg.offset, "key": key, "value": value, } response.append(element) KarapaceBase.r(content_type=content_type, body=response)