def _get_partitions( self, topic: Topic, retrieve_last_timestamp: bool, get_partition_watermarks: bool = True) -> List[Partition]: assert not ( retrieve_last_timestamp and not get_partition_watermarks ), "Can not retrieve timestamp without partition watermarks" config = Config.get_instance().create_confluent_config() config.update({ "group.id": ESQUE_GROUP_ID, "topic.metadata.refresh.interval.ms": "250" }) with closing(confluent_kafka.Consumer(config)) as consumer: confluent_topic = consumer.list_topics( topic=topic.name).topics[topic.name] partitions: List[Partition] = [] if not get_partition_watermarks: return [ Partition(partition_id, -1, -1, meta.isrs, meta.leader, meta.replicas, None) for partition_id, meta in confluent_topic.partitions.items() ] for partition_id, meta in confluent_topic.partitions.items(): try: low, high = consumer.get_watermark_offsets( TopicPartition(topic=topic.name, partition=partition_id)) except KafkaException: # retry after metadata should be refreshed (also consider small network delays) # unfortunately we cannot explicitly cause and wait for a metadata refresh time.sleep(1) low, high = consumer.get_watermark_offsets( TopicPartition(topic=topic.name, partition=partition_id)) latest_timestamp = None if high > low and retrieve_last_timestamp: assignment = [ TopicPartition(topic=topic.name, partition=partition_id, offset=high - 1) ] consumer.assign(assignment) msg = consumer.poll(timeout=10) if msg is None: logger.warning( f"Due to timeout latest timestamp for topic `{topic.name}` " f"and partition `{partition_id}` is missing.") else: latest_timestamp = float(msg.timestamp()[1]) / 1000 partition = Partition(partition_id, low, high, meta.isrs, meta.leader, meta.replicas, latest_timestamp) partitions.append(partition) return partitions
def _assign_consumer_to_last_offset(self): off_topic = self.config["offset_topic"] partition = TopicPartition(off_topic, 0) try: _, high_offset = self._offset_consumer.get_watermark_offsets( partition, timeout=10) except KafkaException: logger.warning( f"Offset topic {off_topic} was not found, creating it now.") self._admin.create_topics( [NewTopic(off_topic, num_partitions=1, replication_factor=1)], operation_timeout=120) high_offset = 0 partition.offset = max(0, high_offset - 1) self._offset_consumer.assign([partition])
def test_edit_offsets( monkeypatch: MonkeyPatch, interactive_cli_runner, topic: str, producer: ConfluenceProducer, consumer_group: str, consumergroup_controller: ConsumerGroupController, ): produce_text_test_messages(producer=producer, topic_name=topic, amount=10) consumergroup_controller.commit_offsets(consumer_group, [TopicPartition(topic=topic, partition=0, offset=10)]) consumergroup_desc_before = consumergroup_controller.get_consumer_group(consumer_id=consumer_group).describe( partitions=True ) offset_config = {"offsets": [{"topic": topic, "partition": 0, "offset": 1}]} def mock_edit_function(text=None, editor=None, env=None, require_save=None, extension=None, filename=None): return yaml.dump(offset_config, default_flow_style=False) monkeypatch.setattr(click, "edit", mock_edit_function) result = interactive_cli_runner.invoke( esque, args=["edit", "offsets", consumer_group, "-t", topic], input="y\n", catch_exceptions=False ) assert result.exit_code == 0 # Check assertions: consumergroup_desc_after = consumergroup_controller.get_consumer_group(consumer_id=consumer_group).describe( partitions=True ) assert consumergroup_desc_before["offsets"][topic][0]["consumer_offset"] == 10 assert consumergroup_desc_after["offsets"][topic][0]["consumer_offset"] == 1
def edit_consumer_group_offsets( self, consumer_id: str, offset_plan: List[ConsumerGroupOffsetPlan]): """ Commit consumergroup offsets to specific values :param consumer_id: ID of the consumer group :param offset_plan: List of ConsumerGroupOffsetPlan objects denoting the offsets for each partition in different topics :return: """ consumer = ConsumerFactory().create_consumer( group_id=consumer_id, topic_name=None, output_directory=None, last=False, avro=False, initialize_default_output_directory=False, match=None, enable_auto_commit=False, ) offsets = [ TopicPartition(topic=plan_element.topic_name, partition=plan_element.partition_id, offset=plan_element.proposed_offset) for plan_element in offset_plan if not plan_element.offset_equal ] consumer.commit(offsets=offsets)
def test_set_offsets_offset_to_delta_all_topics( topic: str, interactive_cli_runner, producer: ConfluenceProducer, consumer_group: str, consumergroup_controller: ConsumerGroupController, ): produce_text_test_messages(producer=producer, topic_name=topic, amount=10) consumergroup_controller.commit_offsets( consumer_group, [TopicPartition(topic=topic, partition=0, offset=10)]) consumergroup_desc_before = consumergroup_controller.get_consumer_group( consumer_id=consumer_group).describe(partitions=True) interactive_cli_runner.invoke( esque, args=["set", "offsets", consumer_group, "--offset-by-delta", "-2"], input="y\n", catch_exceptions=False) # Check assertions: consumergroup_desc_after = consumergroup_controller.get_consumer_group( consumer_id=consumer_group).describe(partitions=True) assert consumergroup_desc_before["offsets"][topic][0][ "consumer_offset"] == 10 assert consumergroup_desc_after["offsets"][topic][0][ "consumer_offset"] == 8
def assign_specific_partitions(self, topic_name: str, partitions: list = None, offset: int = 0): self._topic_name = topic_name if partitions is not None: topic_partitions = [ TopicPartition(self._topic_name, partition=partition, offset=offset) for partition in partitions ] else: topic_partitions = [ TopicPartition(self._topic_name, partition=0, offset=offset) ] self._consumer.assign(topic_partitions)
def produced_messages( records: RecordList, plain_avro_producer, plain_avro_consumer, topic_and_partitions: Tuple[str, int], running_cluster_config: Dict[str, str], consume_all, ) -> Iterable[List[Tuple[str, dict]]]: """ Creates 15 random messages, produces them to the currently active topic and then yields them for the test. """ topic_id, partitions = topic_and_partitions for key, value in records: plain_avro_producer.produce(key=key, value=value) plain_avro_producer.flush() cluster_metadata: ClusterMetadata = plain_avro_consumer.list_topics( topic=topic_id) topic_metadata: TopicMetadata = cluster_metadata.topics[topic_id] logger.info(f"Topic partitions: {topic_metadata.partitions.keys()}") assert partitions == len( topic_metadata.partitions.keys()), "Not all partitions present" offsets = 0 for partition in topic_metadata.partitions.keys(): _, ho = plain_avro_consumer.get_watermark_offsets( TopicPartition(topic_id, partition)) offsets += ho assert len(records) == offsets, "" yield records
def test_set_offsets_offset_from_group( topic: str, interactive_cli_runner, producer: ConfluenceProducer, consumer_group: str, target_consumer_group: str, consumergroup_controller: ConsumerGroupController, ): produce_text_test_messages(producer=producer, topic_name=topic, amount=10) consumergroup_controller.commit_offsets( consumer_group, [TopicPartition(topic=topic, partition=0, offset=10)]) consumergroup_desc_before = consumergroup_controller.get_consumer_group( consumer_id=consumer_group).describe(partitions=True) interactive_cli_runner.invoke( esque, args=["set", "offsets", consumer_group, "--offset-by-delta", "-2"], input="y\n", catch_exceptions=False) consumergroup_desc_after = consumergroup_controller.get_consumer_group( consumer_id=consumer_group).describe(partitions=True) # create a new consumer in a separate group and consume just one message consumergroup_controller.commit_offsets( target_consumer_group, [TopicPartition(topic=topic, partition=0, offset=1)]) interactive_cli_runner.invoke( esque, args=[ "set", "offsets", target_consumer_group, "--offset-from-group", consumer_group ], input="y\n", catch_exceptions=False, ) consumergroup_desc_target = consumergroup_controller.get_consumer_group( consumer_id=target_consumer_group).describe(partitions=True) assert consumergroup_desc_before["offsets"][topic][0][ "consumer_offset"] == 10 assert consumergroup_desc_after["offsets"][topic][0][ "consumer_offset"] == 8 assert consumergroup_desc_target["offsets"][topic][0][ "consumer_offset"] == 8
def pause_topic(self): if not self.paused: logging.info('Topic {} paused. Last event timestamp: {}'.format( self.topic_name, self.queue[-1].message.timestamp() if len(self.queue) > 0 else None)) self.paused = True self.consumer_ref.pause([ TopicPartition(topic=self.topic_name, partition=self.partition) ])
def get_messages(self, timestamp): ret = [] while len(self.queue) > 0 and self.queue[0].message.timestamp()[1] <= timestamp: ret.append(self.queue.popleft().message) if len(self.queue) < self.min_limit and self.paused and not self.stopped: logging.info('Resume reading on topic: {}'.format(self.topic_name)) self.paused = False self.consumer_ref.resume([TopicPartition(topic=self.topic_name, partition=self.partition)]) self.last_message_ts = timestamp return ret
def poll_next_message(c, partition, resolution, topic, transactional): msg = None try: offset = get_next_offset(c, partition, resolution, topic, transactional) c.seek(TopicPartition(topic, partition, offset)) msg = c.poll(timeout=0.05) except Exception as e: print(e) return msg
def target_topic_avro_consumer(unittest_config: Config, target_topic: Tuple[str, int]) -> AvroConsumer: consumer = AvroConsumer( { "group.id": "asdf", "enable.auto.commit": False, "enable.partition.eof": False, **unittest_config.create_confluent_config(include_schema_registry=True), } ) consumer.assign([TopicPartition(topic=target_topic[0], partition=i, offset=0) for i in range(target_topic[1])]) yield consumer consumer.close()
def test_json_record_serialization_custom(kafka_cluster, load_file): """ Ensures to_dict and from_dict hooks are properly applied by the serializer. Args: kafka_cluster (KafkaClusterFixture): cluster fixture load_file (callable(str)): JSON Schema file reader """ topic = kafka_cluster.create_topic("serialization-json") sr = kafka_cluster.schema_registry({'url': 'http://localhost:8081'}) schema_str = load_file("product.json") value_serializer = JSONSerializer(sr, schema_str, to_dict=_testProduct_to_dict) value_deserializer = JSONDeserializer(schema_str, from_dict=_testProduct_from_dict) producer = kafka_cluster.producer(value_serializer=value_serializer) record = _TestProduct(product_id=1, name="The ice sculpture", price=12.50, tags=["cold", "ice"], dimensions={ "length": 7.0, "width": 12.0, "height": 9.5 }, location={ "latitude": -78.75, "longitude": 20.4 }) producer.produce(topic, value=record, partition=0) producer.flush() consumer = kafka_cluster.consumer(value_deserializer=value_deserializer) consumer.assign([TopicPartition(topic, 0)]) msg = consumer.poll() actual = msg.value() assert all([ getattr(actual, attribute) == getattr(record, attribute) for attribute in vars(record) ])
def consumer_factory_(topic: str) -> Consumer: consumer = Consumer({ "group.id": "asdf", "enable.auto.commit": False, "enable.partition.eof": False, **unittest_config.create_confluent_config(), }) partitions = consumer.list_topics(topic=topic).topics[topic].partitions consumer.assign([ TopicPartition(topic=topic, partition=p, offset=0) for p in partitions ]) consumers.append(consumer) return consumer
def test_json_record_serialization(kafka_cluster, load_file): """ Tests basic JsonSerializer and JsonDeserializer basic functionality. product.json from: https://json-schema.org/learn/getting-started-step-by-step.html Args: kafka_cluster (KafkaClusterFixture): cluster fixture load_file (callable(str)): JSON Schema file reader """ topic = kafka_cluster.create_topic("serialization-json") sr = kafka_cluster.schema_registry({'url': 'http://localhost:8081'}) schema_str = load_file("product.json") value_serializer = JSONSerializer(sr, schema_str) value_deserializer = JSONDeserializer(schema_str) producer = kafka_cluster.producer(value_serializer=value_serializer) record = { "productId": 1, "productName": "An ice sculpture", "price": 12.50, "tags": ["cold", "ice"], "dimensions": { "length": 7.0, "width": 12.0, "height": 9.5 }, "warehouseLocation": { "latitude": -78.75, "longitude": 20.4 } } producer.produce(topic, value=record, partition=0) producer.flush() consumer = kafka_cluster.consumer(value_deserializer=value_deserializer) consumer.assign([TopicPartition(topic, 0)]) msg = consumer.poll() actual = msg.value() assert all([actual[k] == v for k, v in record.items()])
def edit_consumer_group_offsets( self, consumer_id: str, offset_plan: List[ConsumerGroupOffsetPlan]): """ Commit consumergroup offsets to specific values :param consumer_id: ID of the consumer group :param offset_plan: List of ConsumerGroupOffsetPlan objects denoting the offsets for each partition in different topics :return: """ offsets = [ TopicPartition(topic=plan_element.topic_name, partition=plan_element.partition_id, offset=plan_element.proposed_offset) for plan_element in offset_plan if not plan_element.offset_equal ] self.commit_offsets(consumer_id, offsets)
def _manually_commit(self): """ kafka要求消费线程数量和分区数量是一对一或一对多,不能多对一,消息并发处理收到分区数量的限制,这种是支持超高线程数量消费,所以commit非常复杂。 因为这种是支持单分区200线程消费,消费本身和拉取kafka任务不在同一个线程,而且可能offset较大的比offset较小的任务先完成, 每隔2秒对1组offset,对连续消费状态是1的最大offset进行commit :return: """ from confluent_kafka.cimpl import TopicPartition # 这个包不好安装,用户用这个中间件的时候自己再想办法安装。 if time.time() - self._recent_commit_time > 2: partion_max_consumed_offset_map = dict() to_be_remove_from_partion_max_consumed_offset_map = defaultdict( list) for partion, offset_consume_status in self._partion__offset_consume_status_map.items( ): max_consumed_offset = 0 for offset, consume_status in offset_consume_status.items(): # print(offset,consume_status) if consume_status == 1: max_consumed_offset = offset to_be_remove_from_partion_max_consumed_offset_map[ partion].append(offset) else: break if max_consumed_offset: partion_max_consumed_offset_map[ partion] = max_consumed_offset # self.logger.info(partion_max_consumed_offset_map) # TopicPartition offsets = list() for partion, max_consumed_offset in partion_max_consumed_offset_map.items( ): # print(partion,max_consumed_offset) offsets.append( TopicPartition(topic=self._queue_name, partition=partion, offset=max_consumed_offset + 1)) if len(offsets): self._confluent_consumer.commit(offsets=offsets, asynchronous=False) self._recent_commit_time = time.time() for partion, offset_list in to_be_remove_from_partion_max_consumed_offset_map.items( ): for offset in offset_list: del self._partion__offset_consume_status_map[partion][ offset]
def reset_offsets_from_partitions(client: AdminClient, brokers: str, app_name: str, input_topic: str): topic_description = get_topic(client, input_topic) partition_ids = [ partition_metada.id for partition_metada in topic_description.partitions.values() ] partitions = [ TopicPartition(input_topic, id_partition, 0) for id_partition in partition_ids ] consumer = Consumer({ 'bootstrap.servers': brokers, 'group.id': app_name, 'session.timeout.ms': 6000 }) response = consumer.commit(offsets=partitions, asynchronous=False) if not isinstance(response, list): raise FaustAppCleanException("Error while cleaning the Faust app!")
def test_consume_error(kafka_cluster): """ Tests to ensure librdkafka errors are propagated as an instance of ConsumeError. """ topic = kafka_cluster.create_topic("test_commit_transaction") consumer_conf = {'enable.partition.eof': True} producer = kafka_cluster.producer() producer.produce(topic=topic, value="a") producer.flush() consumer = kafka_cluster.consumer(consumer_conf, value_deserializer=StringSerializer()) consumer.assign([TopicPartition(topic, 0, OFFSET_END)]) with pytest.raises(ConsumeError, match="No more messages"): # Trigger EOF error consumer.poll()
def consumer(topic_object: Topic, consumer_group): _config = Config().create_confluent_config() _config.update({ "group.id": consumer_group, "error_cb": raise_for_kafka_error, # We need to commit offsets manually once we"re sure it got saved # to the sink "enable.auto.commit": False, "enable.partition.eof": False, # We need this to start at the last committed offset instead of the # latest when subscribing for the first time "default.topic.config": { "auto.offset.reset": "latest" }, }) _consumer = confluent_kafka.Consumer(_config) _consumer.assign( [TopicPartition(topic=topic_object.name, partition=0, offset=0)]) yield _consumer
def test_set_offsets_offset_to_timestamp_value( topic: str, interactive_cli_runner, producer: ConfluenceProducer, consumer_group: str, consumergroup_controller: ConsumerGroupController, ): messages = produce_text_test_messages(producer=producer, topic_name=topic, amount=10) consumergroup_controller.commit_offsets( consumer_group, [TopicPartition(topic=topic, partition=0, offset=10)]) consumergroup_desc_before = consumergroup_controller.get_consumer_group( consumer_id=consumer_group).describe(partitions=True) fifth_message = messages[4] timestamp = fifth_message.timestamp dt = pendulum.from_timestamp(round(timestamp / 1000) - 1) interactive_cli_runner.invoke( esque, args=[ "set", "offsets", consumer_group, "--topic-name", topic, "--offset-to-timestamp", dt.format("YYYY-MM-DDTHH:mm:ss"), ], input="y\n", catch_exceptions=False, ) # Check assertions: consumergroup_desc_after = consumergroup_controller.get_consumer_group( consumer_id=consumer_group).describe(partitions=True) assert consumergroup_desc_before["offsets"][topic][0][ "consumer_offset"] == 10 assert consumergroup_desc_after["offsets"][topic][0][ "consumer_offset"] == 4
def randomly_generated_consumer_groups(filled_topic, unittest_config: Config, prefix="") -> str: randomly_generated_consumer_group = prefix + "".join( random.choices(ascii_letters, k=8)) _config = unittest_config.create_confluent_config() _config.update({ "group.id": randomly_generated_consumer_group, "enable.auto.commit": False, "default.topic.config": { "auto.offset.reset": "latest" }, }) _consumer = confluent_kafka.Consumer(_config) _consumer.assign( [TopicPartition(topic=filled_topic.name, partition=0, offset=0)]) for i in range(2): msg = _consumer.consume(timeout=10)[0] _consumer.commit(msg, asynchronous=False) return randomly_generated_consumer_group
def test_json_record_deserialization_mismatch(kafka_cluster, load_file): """ Ensures to_dict and from_dict hooks are properly applied by the serializer. Args: kafka_cluster (KafkaClusterFixture): cluster fixture load_file (callable(str)): JSON Schema file reader """ topic = kafka_cluster.create_topic("serialization-json") sr = kafka_cluster.schema_registry({'url': 'http://localhost:8081'}) schema_str = load_file("contractor.json") schema_str2 = load_file("product.json") value_serializer = JSONSerializer(sr, schema_str) value_deserializer = JSONDeserializer(schema_str2) producer = kafka_cluster.producer(value_serializer=value_serializer) record = { "contractorId": 2, "contractorName": "Magnus Edenhill", "contractRate": 30, "trades": ["pickling"] } producer.produce(topic, value=record, partition=0) producer.flush() consumer = kafka_cluster.consumer(value_deserializer=value_deserializer) consumer.assign([TopicPartition(topic, 0)]) with pytest.raises( ConsumeError, match=r"(.*) is a required property \(KafkaError code {}\)".format( KafkaError._VALUE_DESERIALIZATION)): consumer.poll()
def create_consumers(args, num_partitions, partition_table): consumers = [] transactional = args["transactional"] for i in range(num_partitions): partition_table[i] = [] oc = Consumer({ 'bootstrap.servers': args["kafka"], 'group.id': str(uuid.uuid4()), 'auto.offset.reset': 'latest', 'api.version.request': True, 'isolation.level': ('read_committed' if transactional else 'read_uncommitted'), 'max.poll.interval.ms': 86400000 }) oc.assign([TopicPartition(args["output_topic"], i)]) oc.poll(0.5) consumers.append(oc) return consumers
def _assign_consumer_to_last_offset(self): partition = TopicPartition(self.config["offset_topic"], 0) _, high_offset = self._offset_consumer.get_watermark_offsets(partition) partition.offset = max(0, high_offset - 1) self._offset_consumer.assign([partition])
def __init__(self, broker, groupid, topics_infos: List[TopicInfo], latency_ms, commit_interval_sec=None, group_by_time=False, begin_timestamp=None, begin_flag=None, end_timestamp=None, end_flag=None, heartbeat_interval_ms=-1): """ :param broker: Broker to connect to. :param groupid: Group id of the consumer. :param topics_infos: [TopicInfo()] - list of TopicInfo objects. :param latency_ms: (integer >=0) Latency to wait before serving a message. After this messages with lower or equal timestamps will be discarded. :param commit_interval_sec: How many seconds to wait between commits.-1 does not commit with the given group id. :param group_by_time: Group messages with the same timestamp. This will yield a list of messages. :param begin_timestamp: Timestamp of the kafka messages where the generator will start. :param begin_flag: BEGINNING, CONTINUE, LIVE - CONTINUE will continue from the last committed offset. If there was no committed offset will start from the end of the stream. :param end_timestamp: Timestamp where to end the reading. :param end_flag: NEVER, END_OF_PARTITION :param heartbeat_interval_ms: -1 does not produce heartbeat. After every interval will produce a HeartBeat typed message with the timestamp. """ if begin_timestamp is not None and begin_flag is not None: raise Exception( 'You can not set the begin timestamp and a flag in the same time.' ) if end_timestamp is not None and end_flag is not None: raise Exception( 'You can not set the end timestamp and a flag in the same time.' ) if begin_timestamp is not None and end_timestamp is not None and begin_timestamp >= end_timestamp: raise Exception( 'The begin timestamp is larger then the end timestamp.') if begin_flag is not None and end_flag is not None and \ begin_flag == BeginFlag.LIVE and end_flag == EndFlag.END_OF_PARTITION: raise Exception( 'You can not start in live and process until the end of the streams.' ) if end_flag is not None and not (end_flag == EndFlag.END_OF_PARTITION or end_flag == EndFlag.NEVER): raise Exception( 'Unknow end flag: {} . Please use the given enum to use proper end flag.' .format(end_flag)) self.end_ts = end_timestamp self.end_flag = end_flag self.commit_interval_sec = commit_interval_sec self.latency_ms = latency_ms self.group_by_time = group_by_time self.max_poll_interval_ms = 5 * 60 * 1000 self.consumer = Consumer({ 'bootstrap.servers': broker, 'group.id': groupid, 'enable.auto.commit': False, 'auto.offset.reset': 'earliest' if begin_flag == BeginFlag.CONTINUE_OR_BEGINNING else 'latest', 'fetch.wait.max.ms': 20, 'max.poll.interval.ms': self.max_poll_interval_ms, 'enable.partition.eof': True }) self.last_poll = None self.tps = [] self.queues = {} self.messages_to_be_committed = {} self.begin_timestamp = begin_timestamp for ti in topics_infos: topic_name = ti.topic self.messages_to_be_committed[topic_name] = { 'last_msg': None, 'committed': True } if begin_timestamp is not None: self.tps.extend( self.consumer.offsets_for_times([ TopicPartition(topic_name, partition=ti.partition, offset=begin_timestamp) ])) elif begin_flag is not None: if begin_flag == BeginFlag.BEGINNING: self.tps.append( TopicPartition(topic_name, partition=ti.partition, offset=OFFSET_BEGINNING)) elif begin_flag in (BeginFlag.CONTINUE, BeginFlag.CONTINUE_OR_BEGINNING): self.tps.append( TopicPartition(topic_name, partition=ti.partition, offset=OFFSET_STORED)) elif begin_flag == BeginFlag.LIVE: self.tps.append( TopicPartition(topic_name, partition=ti.partition, offset=OFFSET_END)) else: raise Exception( 'Unknown begin flag. Please use the enum to provide proper begin flag.' ) else: self.tps.append( TopicPartition(topic_name, partition=ti.partition, offset=OFFSET_END)) end_offset = None if end_flag is not None and end_flag == EndFlag.END_OF_PARTITION: end_offset = self.consumer.get_watermark_offsets( TopicPartition(topic_name, 0))[1] - 1 if end_offset is None or end_offset >= 0: self.queues[topic_name] = Topic(topic_name, self.consumer, end_offset=end_offset, partition=ti.partition, drop=ti.drop) self.consumer.assign(self.tps) self.last_commit = time.time() self.running = True self.heartbeat_interval_ms = heartbeat_interval_ms self.next_hb = None
def main(): parser = argparse.ArgumentParser( epilog="""Description: Reidentification demo using any number of cameras: Either camera can be used for registration or reidentification only, or for both. Plays a video from a jpeg topic, visualizes head detection with a gray bounding box around a head. When a detection is identified, changes the bounding box color to orange and writes the dwell time, age and ID (derived from the reid MS ID) above the heads. Displays ('-d') or stores ('-o') the result of this demo in kafka topics. Required topics (example): - <prefix>.cam.0.original.Image.jpg - <prefix>.cam.0.dets.ObjectDetectionRecord.json - <prefix>.cam.0.frameinfo.FrameInfoRecord.json - <prefix>.cam.0.ages.AgeRecord.json - <prefix>.cam.1.original.Image.jpg - <prefix>.cam.1.dets.ObjectDetectionRecord.json - <prefix>.cam.1.frameinfo.FrameInfoRecord.json - <prefix>.cam.1.ages.AgeRecord.json ... - <prefix>.cam.1.reids.ReidRecord.json """, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument("broker", help="The name of the kafka broker.", type=str) parser.add_argument("prefix", help="Prefix of topics (base|skeleton).", type=str) parser.add_argument('-d', "--display", action='store_true') parser.add_argument('-o', '--output', help='write output image into kafka topic', action='store_true') parser.add_argument('text', help='Text to display (age|dwell_time|both).', type=str) args = parser.parse_args() if not args.display and not args.output: parser.error( "Missing argument: -d (display output) or -o (write output to kafka) is needed" ) if args.output: producer = Producer({'bootstrap.servers': args.broker}) overlay = cv2.imread('resources/powered_by_white.png', cv2.IMREAD_UNCHANGED) # Prepare the topics to read input_topics = [ f"{args.prefix}.cam.{id}.{topic_postfix}" for id in CAMERA_TOPIC_IDS for topic_postfix in TOPIC_POSTFIXES ] reid_topics = [ f"{args.prefix}.cam.{id}.{topic_postfix}" for id in REID_TOPIC_IDS for topic_postfix in REID_TOPIC_POSTFIXES ] consumable_topics = list(map(TopicInfo, input_topics)) \ + (list(map(lambda t: TopicInfo(t, drop=False), reid_topics))) # TODO (when names via person stream): Remove this consumer reg_consumer = Consumer({ 'bootstrap.servers': args.broker, 'group.id': 'multicamreid_reg', 'auto.offset.reset': 'earliest' }) reg_consumer.assign( [TopicPartition(topic="named.records.json", partition=0, offset=0)]) output_topics = dict((id, f"{args.prefix}.cam.{id}.{OUTPUT_TOPIC_POSTFIX}") for id in CAMERA_TOPIC_IDS) # read message, draw and display them consumer = TimeOrderedGeneratorWithTimeout(broker=args.broker, groupid="detection", topics_infos=consumable_topics, latency_ms=200, commit_interval_sec=None, group_by_time=True) registrations: Dict[str, Registration] = {} i = 0 inner_id = 0 scaling = 1.0 for msgs in consumer.getMessages(): k = -1 for time, v in message_list_to_frame_structure(msgs).items(): message = v.get(args.prefix, {}) # Collect Reid records reid_records = {} for reid_id in REID_TOPIC_IDS: reid_message = message.get(reid_id, {}) reid_records.update(reid_message.get("reid", {})) # Process the image for topic_key, topic_message in filter( lambda t: t[0] not in REID_TOPIC_IDS, message.items()): img = topic_message.get("image", {}) if not isinstance(img, np.ndarray): continue head_detections = topic_message.get("head_detection", {}) # Set the image scale shape_orig = head_detections.pop("image", {}) if shape_orig: scaling = img.shape[1] / shape_orig["frame_info"]["columns"] # Processing the detections of the image for detection_key, detection_record in head_detections.items(): object_detection_record = detection_record.get( "bounding_box", {}) if not object_detection_record: continue key_to_display = "" color = COLOR_DARK_GREY face_detection = detection_record.get("unknown", {}) if face_detection: color = COLOR_LIGHT_GREY age = None age_detection_record = detection_record.get("age", {}) if age_detection_record: age = age_detection_record["age"] if args.text == "age" or args.text == "both": key_to_display = f"Age: {age}" if age else "" # Reidentification received for the detection reid_records_for_det = reid_records.get(detection_key, {}) if reid_records_for_det: for reid_record in filter(lambda r: "reid_event" in r, reid_records_for_det): # We only use the first [0] identified face now reid_key = reid_record["reid_event"]["match_list"][ 0]["id"]["first_detection_key"] registered = registrations.get(reid_key, None) if registered: age_to_display = "" if age: registered.addAge(age) if args.text == "age" or args.text == "both": age_to_display = f"; Age: {registered.age:d}" if age else "" # Calculate the dwell time if required dwell_time_display = "" if args.text == "dwell_time" or args.text == "both": detection_time = reid_record["reid_event"][ "match_list"][0]["id"][ "first_detection_time"] dwell_time = time - int(detection_time) dwell_time_display = f"; Dwell time: {dwell_time}ms" color = COLOR_ORANGE name_to_display = registered.name if registered.name else f"ID: {registered.id}" key_to_display = f"{name_to_display}{age_to_display}{dwell_time_display}" else: inner_id += 1 registrations[reid_key] = Registration( id=inner_id) if age: registrations[reid_key].addAge(age) # Update the technical naming topic # TODO (when names via person stream): remove producer.produce( "detected.records.json", key=str(reid_key).encode("utf-8"), value=(str(inner_id) + ";").encode("utf-8"), timestamp=time) # Read the technical naming topic # TODO (when names via person stream): remove reg_msg = reg_consumer.poll(0.01) if reg_msg is not None: try: key = reg_msg.key().decode("utf-8") name = reg_msg.value().decode("utf-8") # Update the person name reg_to_update = registrations.get(key) if reg_to_update: reg_to_update.addName(name) else: registrations[key] = Registration(name=name) except: print( "Decoding entry of the named.records topic failed.", flush=True) # draw text above bounding box img = draw_nice_text( canvas=img, text=key_to_display, bounding_box=object_detection_record["bounding_box"], color=color, scale=scaling) # draw bounding_box img = draw_nice_bounding_box( canvas=img, bounding_box=object_detection_record["bounding_box"], color=color, scaling=scaling) # draw ultinous logo img = draw_overlay(canvas=img, overlay=overlay, position=Position.BOTTOM_RIGHT, scale=scaling) # produce output topic if args.output: out_topic = output_topics.get(topic_key) producer.produce(out_topic, value=encode_image_to_message(img), timestamp=time) producer.poll(0) if i % 1000 == 0: producer.flush() i += 1 # display # if args.display: cv2.imshow(f"DEMO Camera {topic_key}", img) k = cv2.waitKey(33) if k == 113: # The 'q' key to stop break elif k == -1: # normally -1 returned,so don't print it continue else: print(f"Press 'q' key for EXIT!")
def get_next_offset(c, partition, resolution, topic, transactional): if transactional: return c.position([TopicPartition(topic, partition) ])[0].offset + resolution else: return OFFSET_END
latest 当各分区下有已提交的offset时,从提交的offset开始消费;无提交的offset时,消费新产生的该分区下的数据 none topic各分区都存在已提交的offset时,从offset后开始消费;只要有一个分区不存在已提交的offset,则抛出异常 """ c = Consumer({ 'bootstrap.servers': '192.168.198.133:29092', 'group.id': 'mygroup', 'auto.offset.reset': 'earliest' }) # 配置向指定(partition)分区,指定分区内的偏移位置起 消费数据 # TopicPartition(topic[, partition][, offset]) tp = TopicPartition('mytopic', 0, 0) c.assign([tp]) c.seek(tp) # c.subscribe(['mytopic']) while True: msg = c.poll(1.0) if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue print('Received message {} [{}]: {}'.format(msg.topic(), msg.partition(), msg.value().decode('utf-8')))