Python Consumer.consume Examples, confluent_kafka.cimpl.Consumer.consume Python Examples

Example #1

0

Show file

def test_transfer_binary_message_using_file(
    producer: ConfluentProducer,
    target_topic_consumer: Consumer,
    source_topic: Tuple[str, int],
    target_topic: Tuple[str, int],
    non_interactive_cli_runner: CliRunner,
    tmpdir_factory,
):
    output_directory = tmpdir_factory.mktemp("output_directory")
    expected_messages = produce_binary_test_messages(topic_name=source_topic[0], producer=producer)

    non_interactive_cli_runner.invoke(
        esque,
        args=["consume", "-d", str(output_directory), "--binary", "--number", "10", source_topic[0]],
        catch_exceptions=False,
    )
    non_interactive_cli_runner.invoke(
        esque, args=["produce", "-d", str(output_directory), "--binary", target_topic[0]], catch_exceptions=False
    )

    actual_messages = {
        (msg.key(), msg.value(), msg.partition()) for msg in target_topic_consumer.consume(10, timeout=20)
    }
    expected_messages = {(msg.key, msg.value, msg.partition) for msg in expected_messages}
    assert expected_messages == actual_messages

Example #2

0

Show file

def test_transfer_binary_with_single_command(
    producer: ConfluentProducer,
    target_topic_consumer: Consumer,
    source_topic: Tuple[str, int],
    target_topic: Tuple[str, int],
    non_interactive_cli_runner: CliRunner,
):
    expected_messages = produce_binary_test_messages(topic_name=source_topic[0], producer=producer)

    non_interactive_cli_runner.invoke(
        esque,
        args=[
            "transfer",
            "--from-topic",
            source_topic[0],
            "--to-topic",
            target_topic[0],
            "--binary",
            "--number",
            "10",
            "--first",
        ],
        catch_exceptions=False,
    )

    actual_messages = {
        (msg.key(), msg.value(), msg.partition()) for msg in target_topic_consumer.consume(10, timeout=20)
    }
    expected_messages = {(msg.key, msg.value, msg.partition) for msg in expected_messages}
    assert expected_messages == actual_messages

Example #3

0

Show file

def get_partitions_with_offsets(broker):
    input_consumer = Consumer({
        'bootstrap.servers': broker,
        'group.id': str(uuid.uuid4()),
        'auto.offset.reset': 'earliest',
        'enable.auto.commit': True,
        'auto.commit.interval.ms': 1000,
        'api.version.request': True,
        'max.poll.interval.ms': 60000
    })

    output_consumer = Consumer({
        'bootstrap.servers': broker,
        'group.id': str(uuid.uuid4()),
        'auto.offset.reset': 'earliest',
        'enable.auto.commit': True,
        'auto.commit.interval.ms': 1000,
        'api.version.request': True,
        'max.poll.interval.ms': 60000
    })

    input_consumer.subscribe(['read', 'update', 'transfer'])
    output_consumer.subscribe(['responses'])

    msgs = input_consumer.consume(timeout=5, num_messages=100)
    if len(msgs) == 0:
        print("returned empty")
        return {}

    partitions_with_offsets = {'input': [], 'output': []}

    input_partitions = input_consumer.assignment()
    for p in input_partitions:
        _, h = input_consumer.get_watermark_offsets(p)
        p.offset = h
        partitions_with_offsets['input'].append(p)

    output_consumer.consume(timeout=5, num_messages=100)
    output_partitions = output_consumer.assignment()
    for p in output_partitions:
        _, h = output_consumer.get_watermark_offsets(p)
        p.offset = h
        partitions_with_offsets['output'].append(p)

    return partitions_with_offsets

Example #4

0

Show file

    def consume(self, num_messages=1, *args, **kwargs):
        """
        overridden method

        :param num_messages:
        :param args:
        :param kwargs:
        :return:
        """
        msgs = Consumer.consume(self, num_messages, *args, **kwargs)

        for msg in msgs:
            if msg is not None:
                self.build_and_finish_child_span(msg)

        return msgs

Example #5

0

Show file

def test_transfer_binary_message_using_cli_pipe(
    producer: ConfluentProducer,
    target_topic_consumer: Consumer,
    source_topic: Tuple[str, int],
    target_topic: Tuple[str, int],
    non_interactive_cli_runner,
):
    expected_messages = produce_binary_test_messages(topic_name=source_topic[0], producer=producer)

    result1 = non_interactive_cli_runner.invoke(
        esque, args=["consume", "--stdout", "--binary", "--number", "10", source_topic[0]], catch_exceptions=False
    )
    non_interactive_cli_runner.invoke(
        esque, args=["produce", "--stdin", "--binary", target_topic[0]], input=result1.output, catch_exceptions=False
    )

    actual_messages = {
        (msg.key(), msg.value(), msg.partition()) for msg in target_topic_consumer.consume(10, timeout=20)
    }
    expected_messages = {(msg.key, msg.value, msg.partition) for msg in expected_messages}
    assert expected_messages == actual_messages

Example #6

0

Show file

 def __consume_msgs(self, num_messages, *args, **kwargs):
     return Consumer.consume(self, num_messages, *args, **kwargs)

Example #7

0

Show file

File: MsgConsumer.py Project: ylin00/seizurecast_app

class MsgConsumer:
    def __init__(self,
                 topic,
                 broker_address,
                 group_id='group',
                 client_id='client',
                 auto_offset_reset='earliest',
                 num_messages=1,
                 verbose=False):
        """Consumer for handling EEG Streamer messages.

        Args:
            topic: Topic to subscribe to
            broker_address: Broker address
            group_id: group ID
            client_id: client ID
            auto_offset_reset: (default: 'earliest')
            num_messages: Maximum number of messages to consume each time (default: 1)
            verbose: verbose mode. (default: False)
        """
        self.data = deque()
        self.timestamps = deque()

        self.__num_msgs = num_messages
        """Maximum number of messages to consume each time (default: 1)"""

        self.__verbose = verbose

        self.__streamqueue = deque()

        self.__consumer = Consumer({
            'bootstrap.servers': broker_address,
            'auto.offset.reset': auto_offset_reset,
            'group.id': group_id,
            'client.id': client_id,
            'enable.auto.commit': True,
            'session.timeout.ms': 6000,
            'max.poll.interval.ms': 10000
        })
        """consumer that reads stream of EEG signal"""
        self.__consumer.subscribe([topic])

    def listen(self):
        """read stream from Kafka and append to streamqueue

        Returns:
            list of list: dataset (nchannel x nsample) or None
        """
        # If chunk size is too large, consume it multiple epochs
        chunk_size = self.__num_msgs
        msgs = []
        while chunk_size > 100:
            msgs.extend(self.__consumer.consume(num_messages=100, timeout=1))
            chunk_size -= 100
        msgs.extend(self.__consumer.consume(num_messages=chunk_size,
                                            timeout=1))

        print(f"INFO: Received {str(len(msgs))} messages"
              ) if self.__verbose else None

        if msgs is None or len(msgs) <= 0:
            return None

        self.__streamqueue.extendleft(msgs)  # Enqueue

        if len(self.__streamqueue) < self.__num_msgs:
            return None

        # Dequeue
        msgs__ = [self.__streamqueue.pop() for i in range(0, self.__num_msgs)]

        timestamps, data = [], []
        for msg in msgs__:
            time, values = msg_decode(msg.value())
            timestamps.append(time) if time is not None else None
            data.append(values) if time is not None else None
        #TODO:// assert there is not big time gap in the data

        if len(data) < self.__num_msgs:
            return None

        print(timestamps[0], data[0]) if self.__verbose else None

        data = tuple(zip(*data))
        self.data.append(data)
        self.timestamps.append(timestamps[0])

        print(f"INFO: Sucessfully Read a chunk") if self.__verbose else None

    def stop(self):
        self.__consumer.close()
        pass

    def drain(self):
        self.__num_msgs = 100000
        for i in range(0, 10):
            self.listen()

Example #8

0

Show file

    def consume(self, topic, topic_timeout):
        kafka_config_consumer = ConfigFactory(kafka_client="consumer")
        config = kafka_config_consumer.config
        log.info("kafka config for consume %s", config)
        consumer = Consumer(config)

        events = []

        start_time = time.monotonic()
        timeout_start_time = start_time
        timeout_consumer = 10.0

        # actual consumer starts now
        # subscribe to 1 or more topics and define the callback function
        # callback is only received after consumer.consume() is called!
        consumer.subscribe([topic], on_assign=self.callback_on_assignment)
        log.info(
            f"Waiting for partition assignment ... (timeout at {timeout_consumer} seconds"
        )
        try:
            while (time.monotonic() - timeout_start_time) < timeout_consumer:
                # start consumption
                messages = consumer.consume(timeout=0.1)
                # check for partition assignment
                if self.consume_lock == ConsumerState.PARTITIONS_UNASSIGNED:
                    # this should not happen but we are not 100% sure
                    if messages:
                        log.error("messages consumed but lock is unopened")
                        break
                    continue
                # after partition assignment set the timeout again
                # and reset the start time from which to determine timeout
                # violation
                elif self.consume_lock == ConsumerState.PARTITIONS_ASSIGNED:

                    timeout_start_time = time.monotonic()
                    timeout_consumer = topic_timeout

                    self.consume_lock = ConsumerState.TIMEOUT_SET
                    log.info("Lock has been opened, consuming ...")

                # appened messages to the events list to be returned
                if messages:
                    for msg in messages:
                        log.info(f"message at offset: {msg.offset()}, \
                                partition: {msg.partition()}, \
                                topic: {msg.topic()}")
                        # TODO: allow assertions to be on message headers etc.
                        # events.append({
                        #     "key": msg.key,
                        #     "headers": msg.headers,
                        #     "value": msg.value()
                        # })
                        events.append(msg.value())
            # only executed when while condition becomes false
            else:
                # at the end check if the partition assignment was achieved
                if self.consume_lock != ConsumerState.TIMEOUT_SET:
                    log.error("No partition assignments received in time")

        except KafkaException as e:
            log.error(f"Kafka error: {e}")
            pass

        finally:
            consumer.close()

        end_time = time.monotonic()
        log.debug(f"this cycle took: {(end_time - start_time)} seconds")

        return events

Example #9

0

Show file

File: Main.py Project: delftdata/flink-test-scripts

def exec_benchmark(duration_s, fps, kafka_loc, output_topic, silent):
    """Measures throughput at the output Kafka topic,
    by checking the growth in all partitions"""

    c = Consumer({
        'bootstrap.servers': kafka_loc,
        'group.id': 'benchmark-' + str(uuid.uuid4()),
        'auto.offset.reset': 'latest',
        'max.poll.interval.ms': 86400000,
        'isolation.level': 'read_committed'
    })

    # === Get topic partitions

    topic_partitions = None

    def store_topic_partition(consumer, partitions):
        nonlocal topic_partitions
        topic_partitions = partitions

    c.subscribe([output_topic], on_assign=store_topic_partition)
    while topic_partitions is None:
        c.consume(timeout=0.5)

    #Loop read partitions

    throughput_measured = []
    throughput_measured_per_partition = {}
    last_values = {}
    for p in topic_partitions:
        low, high = c.get_watermark_offsets(p)
        throughput_measured_per_partition[p.partition] = []
        last_values[p.partition] = high
        #if silent != "silent":
        #    print("Starting value for partition {}: {}".format(p.partition, high))

    MS_PER_UPDATE = 1000 / fps

    start_time = current_milli_time()
    last_time = start_time
    current_time = start_time
    last_write_time = current_time

    lag = 0.0

    while current_time < start_time + duration_s * 1000:
        current_time = current_milli_time()
        elapsed = current_time - last_time
        last_time = current_time
        lag += elapsed
        while lag >= MS_PER_UPDATE:
            #calc new val
            total_new = 0
            curr_time_for_print = current_milli_time()
            time_delta = ((curr_time_for_print - last_write_time) / 1000)
            if time_delta > 0:
                for p in topic_partitions:
                    low, high = c.get_watermark_offsets(p)
                    delta = high - last_values[p.partition]
                    total_new += delta
                    throughput_measured_per_partition[p.partition].append(
                        (delta / time_delta, curr_time_for_print))
                    last_values[p.partition] = high
                throughput_measured.append(
                    (total_new / time_delta, curr_time_for_print))
                last_write_time = curr_time_for_print

            lag -= MS_PER_UPDATE

    if silent != "silent":
        #Print column names
        #TIME THROUGHPUT PART-0 ... PART-N
        columns = "TIME\tTHROUGHPUT"
        for i in range(len(topic_partitions)):
            columns += "\tPART-{}".format(str(i))
        print(columns)
        for row in range(len(throughput_measured)):
            row_data = "{}\t{}".format(throughput_measured[row][1],
                                       int(throughput_measured[row][0]))
            for i in range(len(topic_partitions)):
                row_data += "\t{}".format(
                    int(throughput_measured_per_partition[i][row][0]))
            print(row_data)
    else:
        print(
            int(
                statistics.mean(
                    [x[0] for x in throughput_measured if x[0] > 0.0])))

Example #10

0

Show file

def compute_achieved_throughput(broker, partitions_with_offsets, result_dict):
    partitions_with_offsets = {}
    input_consumer = Consumer({
        'bootstrap.servers': broker,
        'group.id': str(uuid.uuid4()),
        # 'group.id': 'achieved_throughput_measurer',
        'auto.offset.reset': 'earliest',
        'enable.auto.commit': True,
        'auto.commit.interval.ms': 1000,
        'api.version.request': True,
        'max.poll.interval.ms': 60000
    })

    output_consumer = Consumer({
        'bootstrap.servers': broker,
        'group.id': str(uuid.uuid4()),
        # 'group.id': 'achieved_throughput_measurer',
        'auto.offset.reset': 'earliest',
        'enable.auto.commit': True,
        'auto.commit.interval.ms': 1000,
        'api.version.request': True,
        'max.poll.interval.ms': 60000
    })

    if 'input' in partitions_with_offsets and len(
            partitions_with_offsets['input']) > 0:
        input_consumer.assign(partitions_with_offsets['input'])
    else:
        input_consumer.subscribe(['read', 'update', 'transfer'])

    if 'output' in partitions_with_offsets and len(
            partitions_with_offsets['output']) > 0:
        output_consumer.assign(partitions_with_offsets['output'])
    else:
        output_consumer.subscribe(['responses'])

    while True:
        msgs = input_consumer.consume(timeout=5, num_messages=500)
        if len(msgs) == 0:
            break
        for msg in msgs:
            try:
                wrapped = Wrapper()
                wrapped.ParseFromString(msg.value())

                result = {}
                result['operation'] = msg.topic()
                result['input_time'] = msg.timestamp()[1]
                result_dict[wrapped.request_id] = result
            except DecodeError as e:
                print("Could not decode?")
                pass

    partitions_with_offsets['input'] = input_consumer.position(
        input_consumer.assignment())
    input_consumer.close()

    total_messages = 0
    start_time = 0
    end_time = 0
    first = True

    while True:
        msgs = output_consumer.consume(timeout=5, num_messages=500)
        if len(msgs) == 0:
            break
        for msg in msgs:
            response = Response()
            response.ParseFromString(msg.value())
            key = response.request_id
            status_code = response.status_code
            if key in result_dict:
                if first:
                    start_time = msg.timestamp()[1] / 1000
                    first = False
                total_messages += 1
                end_time = msg.timestamp()[1] / 1000
                result_dict[key]['output_time'] = msg.timestamp()[1]
                result_dict[key]['status_code'] = status_code

    partitions_with_offsets['output'] = output_consumer.position(
        output_consumer.assignment())
    output_consumer.close()

    print("Total messages considered: " + str(total_messages))

    if total_messages == 0 or end_time - start_time == 0:
        return 0

    return total_messages / (end_time - start_time)