Esempio n. 1
0
    def _get_partitions(
            self,
            topic: Topic,
            retrieve_last_timestamp: bool,
            get_partition_watermarks: bool = True) -> List[Partition]:
        assert not (
            retrieve_last_timestamp and not get_partition_watermarks
        ), "Can not retrieve timestamp without partition watermarks"

        config = Config.get_instance().create_confluent_config()
        config.update({
            "group.id": ESQUE_GROUP_ID,
            "topic.metadata.refresh.interval.ms": "250"
        })
        with closing(confluent_kafka.Consumer(config)) as consumer:
            confluent_topic = consumer.list_topics(
                topic=topic.name).topics[topic.name]
            partitions: List[Partition] = []
            if not get_partition_watermarks:
                return [
                    Partition(partition_id, -1, -1, meta.isrs, meta.leader,
                              meta.replicas, None) for partition_id, meta in
                    confluent_topic.partitions.items()
                ]
            for partition_id, meta in confluent_topic.partitions.items():
                try:
                    low, high = consumer.get_watermark_offsets(
                        TopicPartition(topic=topic.name,
                                       partition=partition_id))
                except KafkaException:
                    # retry after metadata should be refreshed (also consider small network delays)
                    # unfortunately we cannot explicitly cause and wait for a metadata refresh
                    time.sleep(1)
                    low, high = consumer.get_watermark_offsets(
                        TopicPartition(topic=topic.name,
                                       partition=partition_id))

                latest_timestamp = None
                if high > low and retrieve_last_timestamp:
                    assignment = [
                        TopicPartition(topic=topic.name,
                                       partition=partition_id,
                                       offset=high - 1)
                    ]
                    consumer.assign(assignment)
                    msg = consumer.poll(timeout=10)
                    if msg is None:
                        logger.warning(
                            f"Due to timeout latest timestamp for topic `{topic.name}` "
                            f"and partition `{partition_id}` is missing.")
                    else:
                        latest_timestamp = float(msg.timestamp()[1]) / 1000
                partition = Partition(partition_id, low, high, meta.isrs,
                                      meta.leader, meta.replicas,
                                      latest_timestamp)
                partitions.append(partition)
        return partitions
Esempio n. 2
0
 def _assign_consumer_to_last_offset(self):
     off_topic = self.config["offset_topic"]
     partition = TopicPartition(off_topic, 0)
     try:
         _, high_offset = self._offset_consumer.get_watermark_offsets(
             partition, timeout=10)
     except KafkaException:
         logger.warning(
             f"Offset topic {off_topic} was not found, creating it now.")
         self._admin.create_topics(
             [NewTopic(off_topic, num_partitions=1, replication_factor=1)],
             operation_timeout=120)
         high_offset = 0
     partition.offset = max(0, high_offset - 1)
     self._offset_consumer.assign([partition])
Esempio n. 3
0
def test_edit_offsets(
    monkeypatch: MonkeyPatch,
    interactive_cli_runner,
    topic: str,
    producer: ConfluenceProducer,
    consumer_group: str,
    consumergroup_controller: ConsumerGroupController,
):
    produce_text_test_messages(producer=producer, topic_name=topic, amount=10)

    consumergroup_controller.commit_offsets(consumer_group, [TopicPartition(topic=topic, partition=0, offset=10)])

    consumergroup_desc_before = consumergroup_controller.get_consumer_group(consumer_id=consumer_group).describe(
        partitions=True
    )

    offset_config = {"offsets": [{"topic": topic, "partition": 0, "offset": 1}]}

    def mock_edit_function(text=None, editor=None, env=None, require_save=None, extension=None, filename=None):
        return yaml.dump(offset_config, default_flow_style=False)

    monkeypatch.setattr(click, "edit", mock_edit_function)
    result = interactive_cli_runner.invoke(
        esque, args=["edit", "offsets", consumer_group, "-t", topic], input="y\n", catch_exceptions=False
    )
    assert result.exit_code == 0

    # Check assertions:
    consumergroup_desc_after = consumergroup_controller.get_consumer_group(consumer_id=consumer_group).describe(
        partitions=True
    )
    assert consumergroup_desc_before["offsets"][topic][0]["consumer_offset"] == 10
    assert consumergroup_desc_after["offsets"][topic][0]["consumer_offset"] == 1
Esempio n. 4
0
    def edit_consumer_group_offsets(
            self, consumer_id: str,
            offset_plan: List[ConsumerGroupOffsetPlan]):
        """
        Commit consumergroup offsets to specific values
        :param consumer_id: ID of the consumer group
        :param offset_plan: List of ConsumerGroupOffsetPlan objects denoting the offsets for each partition in different topics
        :return:
        """
        consumer = ConsumerFactory().create_consumer(
            group_id=consumer_id,
            topic_name=None,
            output_directory=None,
            last=False,
            avro=False,
            initialize_default_output_directory=False,
            match=None,
            enable_auto_commit=False,
        )

        offsets = [
            TopicPartition(topic=plan_element.topic_name,
                           partition=plan_element.partition_id,
                           offset=plan_element.proposed_offset)
            for plan_element in offset_plan if not plan_element.offset_equal
        ]
        consumer.commit(offsets=offsets)
Esempio n. 5
0
def test_set_offsets_offset_to_delta_all_topics(
    topic: str,
    interactive_cli_runner,
    producer: ConfluenceProducer,
    consumer_group: str,
    consumergroup_controller: ConsumerGroupController,
):
    produce_text_test_messages(producer=producer, topic_name=topic, amount=10)

    consumergroup_controller.commit_offsets(
        consumer_group, [TopicPartition(topic=topic, partition=0, offset=10)])

    consumergroup_desc_before = consumergroup_controller.get_consumer_group(
        consumer_id=consumer_group).describe(partitions=True)

    interactive_cli_runner.invoke(
        esque,
        args=["set", "offsets", consumer_group, "--offset-by-delta", "-2"],
        input="y\n",
        catch_exceptions=False)
    # Check assertions:
    consumergroup_desc_after = consumergroup_controller.get_consumer_group(
        consumer_id=consumer_group).describe(partitions=True)
    assert consumergroup_desc_before["offsets"][topic][0][
        "consumer_offset"] == 10
    assert consumergroup_desc_after["offsets"][topic][0][
        "consumer_offset"] == 8
Esempio n. 6
0
 def assign_specific_partitions(self,
                                topic_name: str,
                                partitions: list = None,
                                offset: int = 0):
     self._topic_name = topic_name
     if partitions is not None:
         topic_partitions = [
             TopicPartition(self._topic_name,
                            partition=partition,
                            offset=offset) for partition in partitions
         ]
     else:
         topic_partitions = [
             TopicPartition(self._topic_name, partition=0, offset=offset)
         ]
     self._consumer.assign(topic_partitions)
Esempio n. 7
0
def produced_messages(
    records: RecordList,
    plain_avro_producer,
    plain_avro_consumer,
    topic_and_partitions: Tuple[str, int],
    running_cluster_config: Dict[str, str],
    consume_all,
) -> Iterable[List[Tuple[str, dict]]]:
    """
    Creates 15 random messages, produces them to the currently active topic and then yields them for the test.
    """
    topic_id, partitions = topic_and_partitions

    for key, value in records:
        plain_avro_producer.produce(key=key, value=value)

    plain_avro_producer.flush()

    cluster_metadata: ClusterMetadata = plain_avro_consumer.list_topics(
        topic=topic_id)
    topic_metadata: TopicMetadata = cluster_metadata.topics[topic_id]
    logger.info(f"Topic partitions: {topic_metadata.partitions.keys()}")
    assert partitions == len(
        topic_metadata.partitions.keys()), "Not all partitions present"
    offsets = 0
    for partition in topic_metadata.partitions.keys():
        _, ho = plain_avro_consumer.get_watermark_offsets(
            TopicPartition(topic_id, partition))
        offsets += ho

    assert len(records) == offsets, ""
    yield records
Esempio n. 8
0
def test_set_offsets_offset_from_group(
    topic: str,
    interactive_cli_runner,
    producer: ConfluenceProducer,
    consumer_group: str,
    target_consumer_group: str,
    consumergroup_controller: ConsumerGroupController,
):
    produce_text_test_messages(producer=producer, topic_name=topic, amount=10)

    consumergroup_controller.commit_offsets(
        consumer_group, [TopicPartition(topic=topic, partition=0, offset=10)])

    consumergroup_desc_before = consumergroup_controller.get_consumer_group(
        consumer_id=consumer_group).describe(partitions=True)

    interactive_cli_runner.invoke(
        esque,
        args=["set", "offsets", consumer_group, "--offset-by-delta", "-2"],
        input="y\n",
        catch_exceptions=False)
    consumergroup_desc_after = consumergroup_controller.get_consumer_group(
        consumer_id=consumer_group).describe(partitions=True)

    # create a new consumer in a separate group and consume just one message
    consumergroup_controller.commit_offsets(
        target_consumer_group,
        [TopicPartition(topic=topic, partition=0, offset=1)])

    interactive_cli_runner.invoke(
        esque,
        args=[
            "set", "offsets", target_consumer_group, "--offset-from-group",
            consumer_group
        ],
        input="y\n",
        catch_exceptions=False,
    )
    consumergroup_desc_target = consumergroup_controller.get_consumer_group(
        consumer_id=target_consumer_group).describe(partitions=True)

    assert consumergroup_desc_before["offsets"][topic][0][
        "consumer_offset"] == 10
    assert consumergroup_desc_after["offsets"][topic][0][
        "consumer_offset"] == 8
    assert consumergroup_desc_target["offsets"][topic][0][
        "consumer_offset"] == 8
Esempio n. 9
0
 def pause_topic(self):
     if not self.paused:
         logging.info('Topic {} paused. Last event timestamp: {}'.format(
             self.topic_name, self.queue[-1].message.timestamp()
             if len(self.queue) > 0 else None))
         self.paused = True
         self.consumer_ref.pause([
             TopicPartition(topic=self.topic_name, partition=self.partition)
         ])
Esempio n. 10
0
 def get_messages(self, timestamp):
     ret = []
     while len(self.queue) > 0 and self.queue[0].message.timestamp()[1] <= timestamp:
         ret.append(self.queue.popleft().message)
     if len(self.queue) < self.min_limit and self.paused and not self.stopped:
         logging.info('Resume reading on topic: {}'.format(self.topic_name))
         self.paused = False
         self.consumer_ref.resume([TopicPartition(topic=self.topic_name, partition=self.partition)])
     self.last_message_ts = timestamp
     return ret
Esempio n. 11
0
def poll_next_message(c, partition, resolution, topic, transactional):
    msg = None
    try:
        offset = get_next_offset(c, partition, resolution, topic,
                                 transactional)
        c.seek(TopicPartition(topic, partition, offset))
        msg = c.poll(timeout=0.05)
    except Exception as e:
        print(e)
    return msg
Esempio n. 12
0
def target_topic_avro_consumer(unittest_config: Config, target_topic: Tuple[str, int]) -> AvroConsumer:
    consumer = AvroConsumer(
        {
            "group.id": "asdf",
            "enable.auto.commit": False,
            "enable.partition.eof": False,
            **unittest_config.create_confluent_config(include_schema_registry=True),
        }
    )
    consumer.assign([TopicPartition(topic=target_topic[0], partition=i, offset=0) for i in range(target_topic[1])])
    yield consumer
    consumer.close()
def test_json_record_serialization_custom(kafka_cluster, load_file):
    """
    Ensures to_dict and from_dict hooks are properly applied by the serializer.

    Args:
        kafka_cluster (KafkaClusterFixture): cluster fixture

        load_file (callable(str)): JSON Schema file reader

    """
    topic = kafka_cluster.create_topic("serialization-json")
    sr = kafka_cluster.schema_registry({'url': 'http://localhost:8081'})

    schema_str = load_file("product.json")
    value_serializer = JSONSerializer(sr,
                                      schema_str,
                                      to_dict=_testProduct_to_dict)
    value_deserializer = JSONDeserializer(schema_str,
                                          from_dict=_testProduct_from_dict)

    producer = kafka_cluster.producer(value_serializer=value_serializer)

    record = _TestProduct(product_id=1,
                          name="The ice sculpture",
                          price=12.50,
                          tags=["cold", "ice"],
                          dimensions={
                              "length": 7.0,
                              "width": 12.0,
                              "height": 9.5
                          },
                          location={
                              "latitude": -78.75,
                              "longitude": 20.4
                          })

    producer.produce(topic, value=record, partition=0)
    producer.flush()

    consumer = kafka_cluster.consumer(value_deserializer=value_deserializer)
    consumer.assign([TopicPartition(topic, 0)])

    msg = consumer.poll()
    actual = msg.value()

    assert all([
        getattr(actual, attribute) == getattr(record, attribute)
        for attribute in vars(record)
    ])
Esempio n. 14
0
    def consumer_factory_(topic: str) -> Consumer:
        consumer = Consumer({
            "group.id": "asdf",
            "enable.auto.commit": False,
            "enable.partition.eof": False,
            **unittest_config.create_confluent_config(),
        })
        partitions = consumer.list_topics(topic=topic).topics[topic].partitions

        consumer.assign([
            TopicPartition(topic=topic, partition=p, offset=0)
            for p in partitions
        ])
        consumers.append(consumer)
        return consumer
def test_json_record_serialization(kafka_cluster, load_file):
    """
    Tests basic JsonSerializer and JsonDeserializer basic functionality.

    product.json from:
        https://json-schema.org/learn/getting-started-step-by-step.html

    Args:
        kafka_cluster (KafkaClusterFixture): cluster fixture

        load_file (callable(str)): JSON Schema file reader

    """
    topic = kafka_cluster.create_topic("serialization-json")
    sr = kafka_cluster.schema_registry({'url': 'http://localhost:8081'})

    schema_str = load_file("product.json")
    value_serializer = JSONSerializer(sr, schema_str)
    value_deserializer = JSONDeserializer(schema_str)

    producer = kafka_cluster.producer(value_serializer=value_serializer)

    record = {
        "productId": 1,
        "productName": "An ice sculpture",
        "price": 12.50,
        "tags": ["cold", "ice"],
        "dimensions": {
            "length": 7.0,
            "width": 12.0,
            "height": 9.5
        },
        "warehouseLocation": {
            "latitude": -78.75,
            "longitude": 20.4
        }
    }

    producer.produce(topic, value=record, partition=0)
    producer.flush()

    consumer = kafka_cluster.consumer(value_deserializer=value_deserializer)
    consumer.assign([TopicPartition(topic, 0)])

    msg = consumer.poll()
    actual = msg.value()

    assert all([actual[k] == v for k, v in record.items()])
Esempio n. 16
0
    def edit_consumer_group_offsets(
            self, consumer_id: str,
            offset_plan: List[ConsumerGroupOffsetPlan]):
        """
        Commit consumergroup offsets to specific values
        :param consumer_id: ID of the consumer group
        :param offset_plan: List of ConsumerGroupOffsetPlan objects denoting the offsets for each partition in different topics
        :return:
        """

        offsets = [
            TopicPartition(topic=plan_element.topic_name,
                           partition=plan_element.partition_id,
                           offset=plan_element.proposed_offset)
            for plan_element in offset_plan if not plan_element.offset_equal
        ]
        self.commit_offsets(consumer_id, offsets)
 def _manually_commit(self):
     """
     kafka要求消费线程数量和分区数量是一对一或一对多,不能多对一,消息并发处理收到分区数量的限制,这种是支持超高线程数量消费,所以commit非常复杂。
     因为这种是支持单分区200线程消费,消费本身和拉取kafka任务不在同一个线程,而且可能offset较大的比offset较小的任务先完成,
     每隔2秒对1组offset,对连续消费状态是1的最大offset进行commit
     :return:
     """
     from confluent_kafka.cimpl import TopicPartition  # 这个包不好安装,用户用这个中间件的时候自己再想办法安装。
     if time.time() - self._recent_commit_time > 2:
         partion_max_consumed_offset_map = dict()
         to_be_remove_from_partion_max_consumed_offset_map = defaultdict(
             list)
         for partion, offset_consume_status in self._partion__offset_consume_status_map.items(
         ):
             max_consumed_offset = 0
             for offset, consume_status in offset_consume_status.items():
                 # print(offset,consume_status)
                 if consume_status == 1:
                     max_consumed_offset = offset
                     to_be_remove_from_partion_max_consumed_offset_map[
                         partion].append(offset)
                 else:
                     break
             if max_consumed_offset:
                 partion_max_consumed_offset_map[
                     partion] = max_consumed_offset
         # self.logger.info(partion_max_consumed_offset_map)
         # TopicPartition
         offsets = list()
         for partion, max_consumed_offset in partion_max_consumed_offset_map.items(
         ):
             # print(partion,max_consumed_offset)
             offsets.append(
                 TopicPartition(topic=self._queue_name,
                                partition=partion,
                                offset=max_consumed_offset + 1))
         if len(offsets):
             self._confluent_consumer.commit(offsets=offsets,
                                             asynchronous=False)
         self._recent_commit_time = time.time()
         for partion, offset_list in to_be_remove_from_partion_max_consumed_offset_map.items(
         ):
             for offset in offset_list:
                 del self._partion__offset_consume_status_map[partion][
                     offset]
Esempio n. 18
0
def reset_offsets_from_partitions(client: AdminClient, brokers: str,
                                  app_name: str, input_topic: str):
    topic_description = get_topic(client, input_topic)
    partition_ids = [
        partition_metada.id
        for partition_metada in topic_description.partitions.values()
    ]
    partitions = [
        TopicPartition(input_topic, id_partition, 0)
        for id_partition in partition_ids
    ]
    consumer = Consumer({
        'bootstrap.servers': brokers,
        'group.id': app_name,
        'session.timeout.ms': 6000
    })
    response = consumer.commit(offsets=partitions, asynchronous=False)
    if not isinstance(response, list):
        raise FaustAppCleanException("Error while cleaning the Faust app!")
Esempio n. 19
0
def test_consume_error(kafka_cluster):
    """
    Tests to ensure librdkafka errors are propagated as
    an instance of ConsumeError.
    """
    topic = kafka_cluster.create_topic("test_commit_transaction")
    consumer_conf = {'enable.partition.eof': True}

    producer = kafka_cluster.producer()
    producer.produce(topic=topic, value="a")
    producer.flush()

    consumer = kafka_cluster.consumer(consumer_conf,
                                      value_deserializer=StringSerializer())
    consumer.assign([TopicPartition(topic, 0, OFFSET_END)])

    with pytest.raises(ConsumeError, match="No more messages"):
        # Trigger EOF error
        consumer.poll()
Esempio n. 20
0
def consumer(topic_object: Topic, consumer_group):
    _config = Config().create_confluent_config()
    _config.update({
        "group.id": consumer_group,
        "error_cb": raise_for_kafka_error,
        # We need to commit offsets manually once we"re sure it got saved
        # to the sink
        "enable.auto.commit": False,
        "enable.partition.eof": False,
        # We need this to start at the last committed offset instead of the
        # latest when subscribing for the first time
        "default.topic.config": {
            "auto.offset.reset": "latest"
        },
    })
    _consumer = confluent_kafka.Consumer(_config)
    _consumer.assign(
        [TopicPartition(topic=topic_object.name, partition=0, offset=0)])
    yield _consumer
Esempio n. 21
0
def test_set_offsets_offset_to_timestamp_value(
    topic: str,
    interactive_cli_runner,
    producer: ConfluenceProducer,
    consumer_group: str,
    consumergroup_controller: ConsumerGroupController,
):
    messages = produce_text_test_messages(producer=producer,
                                          topic_name=topic,
                                          amount=10)

    consumergroup_controller.commit_offsets(
        consumer_group, [TopicPartition(topic=topic, partition=0, offset=10)])

    consumergroup_desc_before = consumergroup_controller.get_consumer_group(
        consumer_id=consumer_group).describe(partitions=True)

    fifth_message = messages[4]
    timestamp = fifth_message.timestamp
    dt = pendulum.from_timestamp(round(timestamp / 1000) - 1)

    interactive_cli_runner.invoke(
        esque,
        args=[
            "set",
            "offsets",
            consumer_group,
            "--topic-name",
            topic,
            "--offset-to-timestamp",
            dt.format("YYYY-MM-DDTHH:mm:ss"),
        ],
        input="y\n",
        catch_exceptions=False,
    )
    # Check assertions:
    consumergroup_desc_after = consumergroup_controller.get_consumer_group(
        consumer_id=consumer_group).describe(partitions=True)
    assert consumergroup_desc_before["offsets"][topic][0][
        "consumer_offset"] == 10
    assert consumergroup_desc_after["offsets"][topic][0][
        "consumer_offset"] == 4
Esempio n. 22
0
def randomly_generated_consumer_groups(filled_topic,
                                       unittest_config: Config,
                                       prefix="") -> str:
    randomly_generated_consumer_group = prefix + "".join(
        random.choices(ascii_letters, k=8))
    _config = unittest_config.create_confluent_config()
    _config.update({
        "group.id": randomly_generated_consumer_group,
        "enable.auto.commit": False,
        "default.topic.config": {
            "auto.offset.reset": "latest"
        },
    })
    _consumer = confluent_kafka.Consumer(_config)
    _consumer.assign(
        [TopicPartition(topic=filled_topic.name, partition=0, offset=0)])
    for i in range(2):
        msg = _consumer.consume(timeout=10)[0]
        _consumer.commit(msg, asynchronous=False)
    return randomly_generated_consumer_group
def test_json_record_deserialization_mismatch(kafka_cluster, load_file):
    """
    Ensures to_dict and from_dict hooks are properly applied by the serializer.

    Args:
        kafka_cluster (KafkaClusterFixture): cluster fixture

        load_file (callable(str)): JSON Schema file reader

    """
    topic = kafka_cluster.create_topic("serialization-json")
    sr = kafka_cluster.schema_registry({'url': 'http://localhost:8081'})

    schema_str = load_file("contractor.json")
    schema_str2 = load_file("product.json")

    value_serializer = JSONSerializer(sr, schema_str)
    value_deserializer = JSONDeserializer(schema_str2)

    producer = kafka_cluster.producer(value_serializer=value_serializer)

    record = {
        "contractorId": 2,
        "contractorName": "Magnus Edenhill",
        "contractRate": 30,
        "trades": ["pickling"]
    }

    producer.produce(topic, value=record, partition=0)
    producer.flush()

    consumer = kafka_cluster.consumer(value_deserializer=value_deserializer)
    consumer.assign([TopicPartition(topic, 0)])

    with pytest.raises(
            ConsumeError,
            match=r"(.*) is a required property \(KafkaError code {}\)".format(
                KafkaError._VALUE_DESERIALIZATION)):
        consumer.poll()
Esempio n. 24
0
def create_consumers(args, num_partitions, partition_table):
    consumers = []
    transactional = args["transactional"]
    for i in range(num_partitions):
        partition_table[i] = []
        oc = Consumer({
            'bootstrap.servers':
            args["kafka"],
            'group.id':
            str(uuid.uuid4()),
            'auto.offset.reset':
            'latest',
            'api.version.request':
            True,
            'isolation.level':
            ('read_committed' if transactional else 'read_uncommitted'),
            'max.poll.interval.ms':
            86400000
        })
        oc.assign([TopicPartition(args["output_topic"], i)])
        oc.poll(0.5)
        consumers.append(oc)
    return consumers
Esempio n. 25
0
 def _assign_consumer_to_last_offset(self):
     partition = TopicPartition(self.config["offset_topic"], 0)
     _, high_offset = self._offset_consumer.get_watermark_offsets(partition)
     partition.offset = max(0, high_offset - 1)
     self._offset_consumer.assign([partition])
Esempio n. 26
0
    def __init__(self,
                 broker,
                 groupid,
                 topics_infos: List[TopicInfo],
                 latency_ms,
                 commit_interval_sec=None,
                 group_by_time=False,
                 begin_timestamp=None,
                 begin_flag=None,
                 end_timestamp=None,
                 end_flag=None,
                 heartbeat_interval_ms=-1):
        """
        :param broker: Broker to connect to.
        :param groupid: Group id of the consumer.
        :param topics_infos: [TopicInfo()] - list of TopicInfo objects.
        :param latency_ms: (integer >=0) Latency to wait before serving a message.
                            After this messages with lower or equal timestamps will be discarded.
        :param commit_interval_sec: How many seconds to wait between commits.-1 does not commit with the given group id.
        :param group_by_time: Group messages with the same timestamp. This will yield a list of messages.
        :param begin_timestamp: Timestamp of the kafka messages where the generator will start.
        :param begin_flag: BEGINNING, CONTINUE, LIVE - CONTINUE will continue from the last committed offset.
                            If there was no committed offset will start from the end of the stream.
        :param end_timestamp: Timestamp where to end the reading.
        :param end_flag: NEVER, END_OF_PARTITION
        :param heartbeat_interval_ms: -1 does not produce heartbeat. After every interval will produce a HeartBeat typed
                                        message with the timestamp.
        """
        if begin_timestamp is not None and begin_flag is not None:
            raise Exception(
                'You can not set the begin timestamp and a flag in the same time.'
            )
        if end_timestamp is not None and end_flag is not None:
            raise Exception(
                'You can not set the end timestamp and a flag in the same time.'
            )
        if begin_timestamp is not None and end_timestamp is not None and begin_timestamp >= end_timestamp:
            raise Exception(
                'The begin timestamp is larger then the end timestamp.')
        if begin_flag is not None and end_flag is not None and \
                begin_flag == BeginFlag.LIVE and end_flag == EndFlag.END_OF_PARTITION:
            raise Exception(
                'You can not start in live and process until the end of the streams.'
            )
        if end_flag is not None and not (end_flag == EndFlag.END_OF_PARTITION
                                         or end_flag == EndFlag.NEVER):
            raise Exception(
                'Unknow end flag: {} . Please use the given enum to use proper end flag.'
                .format(end_flag))
        self.end_ts = end_timestamp
        self.end_flag = end_flag
        self.commit_interval_sec = commit_interval_sec
        self.latency_ms = latency_ms
        self.group_by_time = group_by_time
        self.max_poll_interval_ms = 5 * 60 * 1000
        self.consumer = Consumer({
            'bootstrap.servers':
            broker,
            'group.id':
            groupid,
            'enable.auto.commit':
            False,
            'auto.offset.reset':
            'earliest'
            if begin_flag == BeginFlag.CONTINUE_OR_BEGINNING else 'latest',
            'fetch.wait.max.ms':
            20,
            'max.poll.interval.ms':
            self.max_poll_interval_ms,
            'enable.partition.eof':
            True
        })
        self.last_poll = None

        self.tps = []
        self.queues = {}
        self.messages_to_be_committed = {}
        self.begin_timestamp = begin_timestamp
        for ti in topics_infos:
            topic_name = ti.topic
            self.messages_to_be_committed[topic_name] = {
                'last_msg': None,
                'committed': True
            }
            if begin_timestamp is not None:
                self.tps.extend(
                    self.consumer.offsets_for_times([
                        TopicPartition(topic_name,
                                       partition=ti.partition,
                                       offset=begin_timestamp)
                    ]))
            elif begin_flag is not None:
                if begin_flag == BeginFlag.BEGINNING:
                    self.tps.append(
                        TopicPartition(topic_name,
                                       partition=ti.partition,
                                       offset=OFFSET_BEGINNING))
                elif begin_flag in (BeginFlag.CONTINUE,
                                    BeginFlag.CONTINUE_OR_BEGINNING):
                    self.tps.append(
                        TopicPartition(topic_name,
                                       partition=ti.partition,
                                       offset=OFFSET_STORED))
                elif begin_flag == BeginFlag.LIVE:
                    self.tps.append(
                        TopicPartition(topic_name,
                                       partition=ti.partition,
                                       offset=OFFSET_END))
                else:
                    raise Exception(
                        'Unknown begin flag. Please use the enum to provide proper begin flag.'
                    )
            else:
                self.tps.append(
                    TopicPartition(topic_name,
                                   partition=ti.partition,
                                   offset=OFFSET_END))
            end_offset = None
            if end_flag is not None and end_flag == EndFlag.END_OF_PARTITION:
                end_offset = self.consumer.get_watermark_offsets(
                    TopicPartition(topic_name, 0))[1] - 1
            if end_offset is None or end_offset >= 0:
                self.queues[topic_name] = Topic(topic_name,
                                                self.consumer,
                                                end_offset=end_offset,
                                                partition=ti.partition,
                                                drop=ti.drop)
        self.consumer.assign(self.tps)
        self.last_commit = time.time()
        self.running = True
        self.heartbeat_interval_ms = heartbeat_interval_ms
        self.next_hb = None
Esempio n. 27
0
def main():
    parser = argparse.ArgumentParser(
        epilog="""Description:
           Reidentification demo using any number of cameras: 
           Either camera can be used for registration or reidentification only, or for both.
           
           Plays a video from a jpeg topic,
           visualizes head detection with a gray bounding box around a head.
           When a detection is identified, changes the bounding box color to orange
           and writes the dwell time, age and ID (derived from the reid MS ID) above the heads.
           
           Displays ('-d') or stores ('-o') the result of this demo in kafka topics.

           Required topics (example):
           - <prefix>.cam.0.original.Image.jpg
           - <prefix>.cam.0.dets.ObjectDetectionRecord.json
           - <prefix>.cam.0.frameinfo.FrameInfoRecord.json
           - <prefix>.cam.0.ages.AgeRecord.json
           - <prefix>.cam.1.original.Image.jpg
           - <prefix>.cam.1.dets.ObjectDetectionRecord.json
           - <prefix>.cam.1.frameinfo.FrameInfoRecord.json
           - <prefix>.cam.1.ages.AgeRecord.json
           ...
           - <prefix>.cam.1.reids.ReidRecord.json
           """,
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument("broker",
                        help="The name of the kafka broker.",
                        type=str)
    parser.add_argument("prefix",
                        help="Prefix of topics (base|skeleton).",
                        type=str)
    parser.add_argument('-d', "--display", action='store_true')
    parser.add_argument('-o',
                        '--output',
                        help='write output image into kafka topic',
                        action='store_true')
    parser.add_argument('text',
                        help='Text to display (age|dwell_time|both).',
                        type=str)
    args = parser.parse_args()

    if not args.display and not args.output:
        parser.error(
            "Missing argument: -d (display output) or -o (write output to kafka) is needed"
        )

    if args.output:
        producer = Producer({'bootstrap.servers': args.broker})

    overlay = cv2.imread('resources/powered_by_white.png',
                         cv2.IMREAD_UNCHANGED)

    # Prepare the topics to read
    input_topics = [
        f"{args.prefix}.cam.{id}.{topic_postfix}" for id in CAMERA_TOPIC_IDS
        for topic_postfix in TOPIC_POSTFIXES
    ]
    reid_topics = [
        f"{args.prefix}.cam.{id}.{topic_postfix}" for id in REID_TOPIC_IDS
        for topic_postfix in REID_TOPIC_POSTFIXES
    ]
    consumable_topics = list(map(TopicInfo, input_topics)) \
                        + (list(map(lambda t: TopicInfo(t, drop=False), reid_topics)))

    # TODO (when names via person stream): Remove this consumer
    reg_consumer = Consumer({
        'bootstrap.servers': args.broker,
        'group.id': 'multicamreid_reg',
        'auto.offset.reset': 'earliest'
    })
    reg_consumer.assign(
        [TopicPartition(topic="named.records.json", partition=0, offset=0)])

    output_topics = dict((id, f"{args.prefix}.cam.{id}.{OUTPUT_TOPIC_POSTFIX}")
                         for id in CAMERA_TOPIC_IDS)

    # read message, draw and display them
    consumer = TimeOrderedGeneratorWithTimeout(broker=args.broker,
                                               groupid="detection",
                                               topics_infos=consumable_topics,
                                               latency_ms=200,
                                               commit_interval_sec=None,
                                               group_by_time=True)

    registrations: Dict[str, Registration] = {}
    i = 0
    inner_id = 0
    scaling = 1.0
    for msgs in consumer.getMessages():
        k = -1
        for time, v in message_list_to_frame_structure(msgs).items():
            message = v.get(args.prefix, {})

            # Collect Reid records
            reid_records = {}
            for reid_id in REID_TOPIC_IDS:
                reid_message = message.get(reid_id, {})
                reid_records.update(reid_message.get("reid", {}))

            # Process the image
            for topic_key, topic_message in filter(
                    lambda t: t[0] not in REID_TOPIC_IDS, message.items()):
                img = topic_message.get("image", {})
                if not isinstance(img, np.ndarray):
                    continue
                head_detections = topic_message.get("head_detection", {})
                # Set the image scale
                shape_orig = head_detections.pop("image", {})
                if shape_orig:
                    scaling = img.shape[1] / shape_orig["frame_info"]["columns"]

                # Processing the detections of the image
                for detection_key, detection_record in head_detections.items():
                    object_detection_record = detection_record.get(
                        "bounding_box", {})
                    if not object_detection_record:
                        continue
                    key_to_display = ""
                    color = COLOR_DARK_GREY

                    face_detection = detection_record.get("unknown", {})
                    if face_detection:
                        color = COLOR_LIGHT_GREY

                    age = None
                    age_detection_record = detection_record.get("age", {})
                    if age_detection_record:
                        age = age_detection_record["age"]
                    if args.text == "age" or args.text == "both":
                        key_to_display = f"Age: {age}" if age else ""

                    # Reidentification received for the detection
                    reid_records_for_det = reid_records.get(detection_key, {})
                    if reid_records_for_det:
                        for reid_record in filter(lambda r: "reid_event" in r,
                                                  reid_records_for_det):
                            # We only use the first [0] identified face now
                            reid_key = reid_record["reid_event"]["match_list"][
                                0]["id"]["first_detection_key"]
                            registered = registrations.get(reid_key, None)
                            if registered:
                                age_to_display = ""
                                if age:
                                    registered.addAge(age)
                                if args.text == "age" or args.text == "both":
                                    age_to_display = f"; Age: {registered.age:d}" if age else ""
                                # Calculate the dwell time if required
                                dwell_time_display = ""
                                if args.text == "dwell_time" or args.text == "both":
                                    detection_time = reid_record["reid_event"][
                                        "match_list"][0]["id"][
                                            "first_detection_time"]
                                    dwell_time = time - int(detection_time)
                                    dwell_time_display = f"; Dwell time: {dwell_time}ms"
                                color = COLOR_ORANGE
                                name_to_display = registered.name if registered.name else f"ID: {registered.id}"
                                key_to_display = f"{name_to_display}{age_to_display}{dwell_time_display}"

                            else:
                                inner_id += 1
                                registrations[reid_key] = Registration(
                                    id=inner_id)
                                if age:
                                    registrations[reid_key].addAge(age)

                                # Update the technical naming topic
                                #  TODO (when names via person stream): remove
                                producer.produce(
                                    "detected.records.json",
                                    key=str(reid_key).encode("utf-8"),
                                    value=(str(inner_id) +
                                           ";").encode("utf-8"),
                                    timestamp=time)

                    # Read the technical naming topic
                    #  TODO (when names via person stream): remove
                    reg_msg = reg_consumer.poll(0.01)
                    if reg_msg is not None:
                        try:
                            key = reg_msg.key().decode("utf-8")
                            name = reg_msg.value().decode("utf-8")
                            # Update the person name
                            reg_to_update = registrations.get(key)
                            if reg_to_update:
                                reg_to_update.addName(name)
                            else:
                                registrations[key] = Registration(name=name)
                        except:
                            print(
                                "Decoding entry of the named.records topic failed.",
                                flush=True)

                    # draw text above bounding box
                    img = draw_nice_text(
                        canvas=img,
                        text=key_to_display,
                        bounding_box=object_detection_record["bounding_box"],
                        color=color,
                        scale=scaling)

                    # draw bounding_box
                    img = draw_nice_bounding_box(
                        canvas=img,
                        bounding_box=object_detection_record["bounding_box"],
                        color=color,
                        scaling=scaling)

                # draw ultinous logo
                img = draw_overlay(canvas=img,
                                   overlay=overlay,
                                   position=Position.BOTTOM_RIGHT,
                                   scale=scaling)

                # produce output topic
                if args.output:
                    out_topic = output_topics.get(topic_key)
                    producer.produce(out_topic,
                                     value=encode_image_to_message(img),
                                     timestamp=time)
                    producer.poll(0)
                    if i % 1000 == 0:
                        producer.flush()
                    i += 1

                # display #
                if args.display:
                    cv2.imshow(f"DEMO Camera {topic_key}", img)
                    k = cv2.waitKey(33)

        if k == 113:  # The 'q' key to stop
            break
        elif k == -1:  # normally -1 returned,so don't print it
            continue
        else:
            print(f"Press 'q' key for EXIT!")
Esempio n. 28
0
def get_next_offset(c, partition, resolution, topic, transactional):
    if transactional:
        return c.position([TopicPartition(topic, partition)
                           ])[0].offset + resolution
    else:
        return OFFSET_END
latest
当各分区下有已提交的offset时,从提交的offset开始消费;无提交的offset时,消费新产生的该分区下的数据

none
topic各分区都存在已提交的offset时,从offset后开始消费;只要有一个分区不存在已提交的offset,则抛出异常
"""
c = Consumer({
    'bootstrap.servers': '192.168.198.133:29092',
    'group.id': 'mygroup',
    'auto.offset.reset': 'earliest'
})

# 配置向指定(partition)分区,指定分区内的偏移位置起 消费数据
# TopicPartition(topic[, partition][, offset])
tp = TopicPartition('mytopic', 0, 0)
c.assign([tp])
c.seek(tp)

# c.subscribe(['mytopic'])

while True:
    msg = c.poll(1.0)

    if msg is None:
        continue
    if msg.error():
        print("Consumer error: {}".format(msg.error()))
        continue

    print('Received message {} [{}]: {}'.format(msg.topic(), msg.partition(), msg.value().decode('utf-8')))