Exemple #1
0
    async def commit_offsets(self, internal_name: Tuple[str, str],
                             content_type: str, request_data: dict,
                             cluster_metadata: dict):
        self.log.info("Committing offsets for %s", internal_name)
        self._assert_consumer_exists(internal_name, content_type)
        if request_data:
            self._assert_has_key(request_data, "offsets", content_type)
        payload = {}
        for el in request_data.get("offsets", []):
            for k in ["partition", "offset"]:
                convert_to_int(el, k, content_type)
            # If we commit for a partition that does not belong to this consumer, then the internal error raised
            # is marked as retriable, and thus the commit method will remain blocked in what looks like an infinite loop
            self._topic_and_partition_valid(cluster_metadata, el, content_type)
            payload[TopicPartition(el["topic"],
                                   el["partition"])] = OffsetAndMetadata(
                                       el["offset"] + 1, None)

        async with self.consumer_locks[internal_name]:
            consumer = self.consumers[internal_name].consumer
            payload = payload or None
            try:
                consumer.commit(offsets=payload)
            except KafkaError as e:
                KarapaceBase.internal_error(
                    message=f"error sending commit request: {e}",
                    content_type=content_type)
        empty_response()
 def _assert_consumer_exists(self, internal_name: Tuple[str, str], content_type: str):
     if internal_name not in self.consumers:
         KarapaceBase.not_found(
             message=f"Consumer for {internal_name} not found among {list(self.consumers.keys())}",
             content_type=content_type,
             sub_code=40403
         )
Exemple #3
0
 def __init__(self, config):
     KarapaceBase.__init__(self, config)
     KafkaRest._init(self, config)
     KafkaRest._add_routes(self)
     KarapaceSchemaRegistry._init(self)
     KarapaceSchemaRegistry._add_routes(self)
     self.log = logging.getLogger("KarapaceAll")
     self.app.on_shutdown.append(self.close_by_app)
Exemple #4
0
 def _assert_consumer_exists(self, internal_name: Tuple[str, str],
                             content_type: str) -> None:
     if internal_name not in self.consumers:
         KarapaceBase.not_found(
             message=
             f"Consumer for {internal_name} not found among {list(self.consumers.keys())}",
             content_type=content_type,
             sub_code=RESTErrorCodes.CONSUMER_NOT_FOUND.value,
         )
Exemple #5
0
 def _assert(cond: bool, code: HTTPStatus, sub_code: int, message: str,
             content_type: str) -> None:
     if not cond:
         KarapaceBase.r(content_type=content_type,
                        status=code,
                        body={
                            "message": message,
                            "error_code": sub_code
                        })
Exemple #6
0
 def __init__(self, config_file_path: str, config: dict) -> None:
     KarapaceBase.__init__(self,
                           config_file_path=config_file_path,
                           config=config)
     KafkaRest._init(self, config=config)
     KafkaRest._add_routes(self)
     KarapaceSchemaRegistry._init(self, config=config)
     KarapaceSchemaRegistry._add_routes(self)
     self.log = logging.getLogger("KarapaceAll")
     self.app.on_shutdown.append(self.close_by_app)
 async def get_subscription(self, internal_name: Tuple[str, str], content_type: str):
     self.log.info("Retrieving subscription for %s", internal_name)
     self._assert_consumer_exists(internal_name, content_type)
     async with self.consumer_locks[internal_name]:
         consumer = self.consumers[internal_name].consumer
         if consumer.subscription() is None:
             topics = []
         else:
             topics = list(consumer.subscription())
         KarapaceBase.r(content_type=content_type, body={"topics": topics})
 def _topic_and_partition_valid(cluster_metadata: dict, topic_data: dict, content_type: str):
     ConsumerManager._has_topic_and_partition_keys(topic_data, content_type)
     topic = topic_data["topic"]
     partition = topic_data["partition"]
     if topic not in cluster_metadata["topics"]:
         KarapaceBase.not_found(message=f"Topic {topic} not found", content_type=content_type, sub_code=40401)
     partitions = {pi["partition"] for pi in cluster_metadata["topics"][topic]["partitions"]}
     if partition not in partitions:
         KarapaceBase.not_found(
             message=f"Partition {partition} not found for topic {topic}", content_type=content_type, sub_code=40402
         )
 async def get_assignments(self, internal_name: Tuple[str, str], content_type: str):
     self.log.info("Retrieving assignment for %s", internal_name)
     self._assert_consumer_exists(internal_name, content_type)
     async with self.consumer_locks[internal_name]:
         consumer = self.consumers[internal_name].consumer
         KarapaceBase.r(
             content_type=content_type,
             body={"partitions": [{
                 "topic": pd.topic,
                 "partition": pd.partition
             } for pd in consumer.assignment()]}
         )
 def __init__(self, config_path: str):
     self.config = KarapaceBase.read_config(config_path)
     self.hostname = f"http://{self.config['advertised_hostname']}:{self.config['port']}"
     self.log = logging.getLogger("RestConsumerManager")
     self.deserializer = SchemaRegistryDeserializer(config_path=config_path)
     self.consumers = {}
     self.consumer_locks = defaultdict(Lock)
Exemple #11
0
 def __init__(self, config_path, backup_path, topic_option=None):
     self.config = KarapaceBase.read_config(config_path)
     self.backup_location = backup_path
     self.topic_name = topic_option or self.config["topic_name"]
     self.log = logging.getLogger("SchemaBackup")
     self.consumer = None
     self.producer = None
     self.admin_client = None
     self.timeout_ms = 1000
 async def get_offsets(self, internal_name: Tuple[str, str], content_type: str, request_data: dict):
     self.log.info("Retrieving offsets for %s", internal_name)
     self._assert_consumer_exists(internal_name, content_type)
     self._assert_has_key(request_data, "partitions", content_type)
     response = {"offsets": []}
     async with self.consumer_locks[internal_name]:
         consumer = self.consumers[internal_name].consumer
         for el in request_data["partitions"]:
             convert_to_int(el, "partition", content_type)
             tp = TopicPartition(el["topic"], el["partition"])
             commit_info = consumer.committed(tp, metadata=True)
             if not commit_info:
                 continue
             response["offsets"].append({
                 "topic": tp.topic,
                 "partition": tp.partition,
                 "metadata": commit_info.metadata,
                 "offset": commit_info.offset
             })
     KarapaceBase.r(body=response, content_type=content_type)
Exemple #13
0
 async def create_consumer(self, group_name: str, request_data: dict,
                           content_type: str):
     group_name = group_name.strip("/")
     self.log.info("Create consumer request for group  %s", group_name)
     consumer_name = request_data.get("name") or self.new_name()
     internal_name = self.create_internal_name(group_name, consumer_name)
     async with self.consumer_locks[internal_name]:
         if internal_name in self.consumers:
             self.log.error(
                 "Error creating duplicate consumer in group %s with id %s",
                 group_name, consumer_name)
             KarapaceBase.r(
                 status=HTTPStatus.CONFLICT,
                 content_type=content_type,
                 body={
                     "error_code":
                     RESTErrorCodes.CONSUMER_ALREADY_EXISTS.value,
                     "message": f"Consumer {consumer_name} already exists",
                 },
             )
         self._validate_create_consumer(request_data, content_type)
         self.log.info(
             "Creating new consumer in group %s with id %s and request_info %r",
             group_name, consumer_name, request_data)
         for k in ["consumer.request.timeout.ms", "fetch_min_bytes"]:
             convert_to_int(request_data, k, content_type)
         try:
             enable_commit = request_data.get(
                 "auto.commit.enable",
                 self.config["consumer_enable_auto_commit"])
             if isinstance(enable_commit, str):
                 enable_commit = enable_commit.lower() == "true"
             request_data["consumer.request.timeout.ms"] = request_data.get(
                 "consumer.request.timeout.ms",
                 self.config["consumer_request_timeout_ms"])
             request_data["auto.commit.enable"] = enable_commit
             request_data["auto.offset.reset"] = request_data.get(
                 "auto.offset.reset", "earliest")
             fetch_min_bytes = request_data.get(
                 "fetch.min.bytes", self.config["fetch_min_bytes"])
             c = await self.create_kafka_consumer(fetch_min_bytes,
                                                  group_name, internal_name,
                                                  request_data)
         except KafkaConfigurationError as e:
             KarapaceBase.internal_error(str(e), content_type)
         self.consumers[internal_name] = TypedConsumer(
             consumer=c,
             serialization_format=request_data["format"],
             config=request_data)
         base_uri = urljoin(
             self.hostname,
             f"consumers/{group_name}/instances/{consumer_name}")
         KarapaceBase.r(content_type=content_type,
                        body={
                            "base_uri": base_uri,
                            "instance_id": consumer_name
                        })
    async def fetch(self, internal_name: Tuple[str, str], content_type: str, formats: dict, query_params: dict):
        self.log.info("Running fetch for name %s with parameters %r and formats %r", internal_name, query_params, formats)
        self._assert_consumer_exists(internal_name, content_type)
        async with self.consumer_locks[internal_name]:
            consumer = self.consumers[internal_name].consumer
            serialization_format = self.consumers[internal_name].serialization_format
            config = self.consumers[internal_name].config
            request_format = formats["embedded_format"]
            self._assert(
                cond=serialization_format == request_format,
                code=406,
                sub_code=40601,
                content_type=content_type,
                message=f"Consumer format {serialization_format} does not match the embedded format {request_format}"
            )
            self.log.info("Fetch request for %s with params %r", internal_name, query_params)
            try:
                timeout = int(query_params["timeout"]) if "timeout" in query_params \
                    else config["consumer.request.timeout.ms"]
                # we get to be more in line with the confluent proxy by doing a bunch of fetches each time and
                # respecting the max fetch request size
                max_bytes = int(query_params['max_bytes']) if "max_bytes" in query_params \
                    else consumer.config["fetch_max_bytes"]
            except ValueError:
                KarapaceBase.internal_error(message=f"Invalid request parameters: {query_params}", content_type=content_type)
            for val in [timeout, max_bytes]:
                if not val:
                    continue
                if val <= 0:
                    KarapaceBase.internal_error(message=f"Invalid request parameter {val}", content_type=content_type)
            response = []
            self.log.info(
                "Will poll multiple times for a single message with a total timeout of %dms, "
                "until at least %d bytes have been fetched", timeout, max_bytes
            )
            read_bytes = 0
            start_time = time.monotonic()
            poll_data = defaultdict(list)
            message_count = 0
            while read_bytes < max_bytes and start_time + timeout / 1000 > time.monotonic():
                time_left = start_time + timeout / 1000 - time.monotonic()
                bytes_left = max_bytes - read_bytes
                self.log.info(
                    "Polling with %r time left and %d bytes left, gathered %d messages so far", time_left, bytes_left,
                    message_count
                )
                data = consumer.poll(timeout_ms=timeout, max_records=1)
                self.log.debug("Successfully polled for messages")
                for topic, records in data.items():
                    for rec in records:
                        message_count += 1
                        read_bytes += \
                            max(0, rec.serialized_key_size) + \
                            max(0, rec.serialized_value_size) + \
                            max(0, rec.serialized_header_size)
                        poll_data[topic].append(rec)
            self.log.info("Gathered %d total messages", message_count)
            for tp in poll_data:
                for msg in poll_data[tp]:
                    try:
                        key = await self.deserialize(msg.key, request_format) if msg.key else None
                        value = await self.deserialize(msg.value, request_format) if msg.value else None
                    except (UnpackError, InvalidMessageHeader, InvalidPayload) as e:
                        KarapaceBase.internal_error(message=f"deserialization error: {e}", content_type=content_type)
                    element = {
                        "topic": tp.topic,
                        "partition": tp.partition,
                        "offset": msg.offset,
                        "key": key,
                        "value": value,
                    }
                    response.append(element)

            KarapaceBase.r(content_type=content_type, body=response)