def test_transfer_binary_message_using_file( producer: ConfluentProducer, target_topic_consumer: Consumer, source_topic: Tuple[str, int], target_topic: Tuple[str, int], non_interactive_cli_runner: CliRunner, tmpdir_factory, ): output_directory = tmpdir_factory.mktemp("output_directory") expected_messages = produce_binary_test_messages(topic_name=source_topic[0], producer=producer) non_interactive_cli_runner.invoke( esque, args=["consume", "-d", str(output_directory), "--binary", "--number", "10", source_topic[0]], catch_exceptions=False, ) non_interactive_cli_runner.invoke( esque, args=["produce", "-d", str(output_directory), "--binary", target_topic[0]], catch_exceptions=False ) actual_messages = { (msg.key(), msg.value(), msg.partition()) for msg in target_topic_consumer.consume(10, timeout=20) } expected_messages = {(msg.key, msg.value, msg.partition) for msg in expected_messages} assert expected_messages == actual_messages
def test_transfer_binary_with_single_command( producer: ConfluentProducer, target_topic_consumer: Consumer, source_topic: Tuple[str, int], target_topic: Tuple[str, int], non_interactive_cli_runner: CliRunner, ): expected_messages = produce_binary_test_messages(topic_name=source_topic[0], producer=producer) non_interactive_cli_runner.invoke( esque, args=[ "transfer", "--from-topic", source_topic[0], "--to-topic", target_topic[0], "--binary", "--number", "10", "--first", ], catch_exceptions=False, ) actual_messages = { (msg.key(), msg.value(), msg.partition()) for msg in target_topic_consumer.consume(10, timeout=20) } expected_messages = {(msg.key, msg.value, msg.partition) for msg in expected_messages} assert expected_messages == actual_messages
def get_partitions_with_offsets(broker): input_consumer = Consumer({ 'bootstrap.servers': broker, 'group.id': str(uuid.uuid4()), 'auto.offset.reset': 'earliest', 'enable.auto.commit': True, 'auto.commit.interval.ms': 1000, 'api.version.request': True, 'max.poll.interval.ms': 60000 }) output_consumer = Consumer({ 'bootstrap.servers': broker, 'group.id': str(uuid.uuid4()), 'auto.offset.reset': 'earliest', 'enable.auto.commit': True, 'auto.commit.interval.ms': 1000, 'api.version.request': True, 'max.poll.interval.ms': 60000 }) input_consumer.subscribe(['read', 'update', 'transfer']) output_consumer.subscribe(['responses']) msgs = input_consumer.consume(timeout=5, num_messages=100) if len(msgs) == 0: print("returned empty") return {} partitions_with_offsets = {'input': [], 'output': []} input_partitions = input_consumer.assignment() for p in input_partitions: _, h = input_consumer.get_watermark_offsets(p) p.offset = h partitions_with_offsets['input'].append(p) output_consumer.consume(timeout=5, num_messages=100) output_partitions = output_consumer.assignment() for p in output_partitions: _, h = output_consumer.get_watermark_offsets(p) p.offset = h partitions_with_offsets['output'].append(p) return partitions_with_offsets
def consume(self, num_messages=1, *args, **kwargs): """ overridden method :param num_messages: :param args: :param kwargs: :return: """ msgs = Consumer.consume(self, num_messages, *args, **kwargs) for msg in msgs: if msg is not None: self.build_and_finish_child_span(msg) return msgs
def test_transfer_binary_message_using_cli_pipe( producer: ConfluentProducer, target_topic_consumer: Consumer, source_topic: Tuple[str, int], target_topic: Tuple[str, int], non_interactive_cli_runner, ): expected_messages = produce_binary_test_messages(topic_name=source_topic[0], producer=producer) result1 = non_interactive_cli_runner.invoke( esque, args=["consume", "--stdout", "--binary", "--number", "10", source_topic[0]], catch_exceptions=False ) non_interactive_cli_runner.invoke( esque, args=["produce", "--stdin", "--binary", target_topic[0]], input=result1.output, catch_exceptions=False ) actual_messages = { (msg.key(), msg.value(), msg.partition()) for msg in target_topic_consumer.consume(10, timeout=20) } expected_messages = {(msg.key, msg.value, msg.partition) for msg in expected_messages} assert expected_messages == actual_messages
def __consume_msgs(self, num_messages, *args, **kwargs): return Consumer.consume(self, num_messages, *args, **kwargs)
class MsgConsumer: def __init__(self, topic, broker_address, group_id='group', client_id='client', auto_offset_reset='earliest', num_messages=1, verbose=False): """Consumer for handling EEG Streamer messages. Args: topic: Topic to subscribe to broker_address: Broker address group_id: group ID client_id: client ID auto_offset_reset: (default: 'earliest') num_messages: Maximum number of messages to consume each time (default: 1) verbose: verbose mode. (default: False) """ self.data = deque() self.timestamps = deque() self.__num_msgs = num_messages """Maximum number of messages to consume each time (default: 1)""" self.__verbose = verbose self.__streamqueue = deque() self.__consumer = Consumer({ 'bootstrap.servers': broker_address, 'auto.offset.reset': auto_offset_reset, 'group.id': group_id, 'client.id': client_id, 'enable.auto.commit': True, 'session.timeout.ms': 6000, 'max.poll.interval.ms': 10000 }) """consumer that reads stream of EEG signal""" self.__consumer.subscribe([topic]) def listen(self): """read stream from Kafka and append to streamqueue Returns: list of list: dataset (nchannel x nsample) or None """ # If chunk size is too large, consume it multiple epochs chunk_size = self.__num_msgs msgs = [] while chunk_size > 100: msgs.extend(self.__consumer.consume(num_messages=100, timeout=1)) chunk_size -= 100 msgs.extend(self.__consumer.consume(num_messages=chunk_size, timeout=1)) print(f"INFO: Received {str(len(msgs))} messages" ) if self.__verbose else None if msgs is None or len(msgs) <= 0: return None self.__streamqueue.extendleft(msgs) # Enqueue if len(self.__streamqueue) < self.__num_msgs: return None # Dequeue msgs__ = [self.__streamqueue.pop() for i in range(0, self.__num_msgs)] timestamps, data = [], [] for msg in msgs__: time, values = msg_decode(msg.value()) timestamps.append(time) if time is not None else None data.append(values) if time is not None else None #TODO:// assert there is not big time gap in the data if len(data) < self.__num_msgs: return None print(timestamps[0], data[0]) if self.__verbose else None data = tuple(zip(*data)) self.data.append(data) self.timestamps.append(timestamps[0]) print(f"INFO: Sucessfully Read a chunk") if self.__verbose else None def stop(self): self.__consumer.close() pass def drain(self): self.__num_msgs = 100000 for i in range(0, 10): self.listen()
def consume(self, topic, topic_timeout): kafka_config_consumer = ConfigFactory(kafka_client="consumer") config = kafka_config_consumer.config log.info("kafka config for consume %s", config) consumer = Consumer(config) events = [] start_time = time.monotonic() timeout_start_time = start_time timeout_consumer = 10.0 # actual consumer starts now # subscribe to 1 or more topics and define the callback function # callback is only received after consumer.consume() is called! consumer.subscribe([topic], on_assign=self.callback_on_assignment) log.info( f"Waiting for partition assignment ... (timeout at {timeout_consumer} seconds" ) try: while (time.monotonic() - timeout_start_time) < timeout_consumer: # start consumption messages = consumer.consume(timeout=0.1) # check for partition assignment if self.consume_lock == ConsumerState.PARTITIONS_UNASSIGNED: # this should not happen but we are not 100% sure if messages: log.error("messages consumed but lock is unopened") break continue # after partition assignment set the timeout again # and reset the start time from which to determine timeout # violation elif self.consume_lock == ConsumerState.PARTITIONS_ASSIGNED: timeout_start_time = time.monotonic() timeout_consumer = topic_timeout self.consume_lock = ConsumerState.TIMEOUT_SET log.info("Lock has been opened, consuming ...") # appened messages to the events list to be returned if messages: for msg in messages: log.info(f"message at offset: {msg.offset()}, \ partition: {msg.partition()}, \ topic: {msg.topic()}") # TODO: allow assertions to be on message headers etc. # events.append({ # "key": msg.key, # "headers": msg.headers, # "value": msg.value() # }) events.append(msg.value()) # only executed when while condition becomes false else: # at the end check if the partition assignment was achieved if self.consume_lock != ConsumerState.TIMEOUT_SET: log.error("No partition assignments received in time") except KafkaException as e: log.error(f"Kafka error: {e}") pass finally: consumer.close() end_time = time.monotonic() log.debug(f"this cycle took: {(end_time - start_time)} seconds") return events
def exec_benchmark(duration_s, fps, kafka_loc, output_topic, silent): """Measures throughput at the output Kafka topic, by checking the growth in all partitions""" c = Consumer({ 'bootstrap.servers': kafka_loc, 'group.id': 'benchmark-' + str(uuid.uuid4()), 'auto.offset.reset': 'latest', 'max.poll.interval.ms': 86400000, 'isolation.level': 'read_committed' }) # === Get topic partitions topic_partitions = None def store_topic_partition(consumer, partitions): nonlocal topic_partitions topic_partitions = partitions c.subscribe([output_topic], on_assign=store_topic_partition) while topic_partitions is None: c.consume(timeout=0.5) #Loop read partitions throughput_measured = [] throughput_measured_per_partition = {} last_values = {} for p in topic_partitions: low, high = c.get_watermark_offsets(p) throughput_measured_per_partition[p.partition] = [] last_values[p.partition] = high #if silent != "silent": # print("Starting value for partition {}: {}".format(p.partition, high)) MS_PER_UPDATE = 1000 / fps start_time = current_milli_time() last_time = start_time current_time = start_time last_write_time = current_time lag = 0.0 while current_time < start_time + duration_s * 1000: current_time = current_milli_time() elapsed = current_time - last_time last_time = current_time lag += elapsed while lag >= MS_PER_UPDATE: #calc new val total_new = 0 curr_time_for_print = current_milli_time() time_delta = ((curr_time_for_print - last_write_time) / 1000) if time_delta > 0: for p in topic_partitions: low, high = c.get_watermark_offsets(p) delta = high - last_values[p.partition] total_new += delta throughput_measured_per_partition[p.partition].append( (delta / time_delta, curr_time_for_print)) last_values[p.partition] = high throughput_measured.append( (total_new / time_delta, curr_time_for_print)) last_write_time = curr_time_for_print lag -= MS_PER_UPDATE if silent != "silent": #Print column names #TIME THROUGHPUT PART-0 ... PART-N columns = "TIME\tTHROUGHPUT" for i in range(len(topic_partitions)): columns += "\tPART-{}".format(str(i)) print(columns) for row in range(len(throughput_measured)): row_data = "{}\t{}".format(throughput_measured[row][1], int(throughput_measured[row][0])) for i in range(len(topic_partitions)): row_data += "\t{}".format( int(throughput_measured_per_partition[i][row][0])) print(row_data) else: print( int( statistics.mean( [x[0] for x in throughput_measured if x[0] > 0.0])))
def compute_achieved_throughput(broker, partitions_with_offsets, result_dict): partitions_with_offsets = {} input_consumer = Consumer({ 'bootstrap.servers': broker, 'group.id': str(uuid.uuid4()), # 'group.id': 'achieved_throughput_measurer', 'auto.offset.reset': 'earliest', 'enable.auto.commit': True, 'auto.commit.interval.ms': 1000, 'api.version.request': True, 'max.poll.interval.ms': 60000 }) output_consumer = Consumer({ 'bootstrap.servers': broker, 'group.id': str(uuid.uuid4()), # 'group.id': 'achieved_throughput_measurer', 'auto.offset.reset': 'earliest', 'enable.auto.commit': True, 'auto.commit.interval.ms': 1000, 'api.version.request': True, 'max.poll.interval.ms': 60000 }) if 'input' in partitions_with_offsets and len( partitions_with_offsets['input']) > 0: input_consumer.assign(partitions_with_offsets['input']) else: input_consumer.subscribe(['read', 'update', 'transfer']) if 'output' in partitions_with_offsets and len( partitions_with_offsets['output']) > 0: output_consumer.assign(partitions_with_offsets['output']) else: output_consumer.subscribe(['responses']) while True: msgs = input_consumer.consume(timeout=5, num_messages=500) if len(msgs) == 0: break for msg in msgs: try: wrapped = Wrapper() wrapped.ParseFromString(msg.value()) result = {} result['operation'] = msg.topic() result['input_time'] = msg.timestamp()[1] result_dict[wrapped.request_id] = result except DecodeError as e: print("Could not decode?") pass partitions_with_offsets['input'] = input_consumer.position( input_consumer.assignment()) input_consumer.close() total_messages = 0 start_time = 0 end_time = 0 first = True while True: msgs = output_consumer.consume(timeout=5, num_messages=500) if len(msgs) == 0: break for msg in msgs: response = Response() response.ParseFromString(msg.value()) key = response.request_id status_code = response.status_code if key in result_dict: if first: start_time = msg.timestamp()[1] / 1000 first = False total_messages += 1 end_time = msg.timestamp()[1] / 1000 result_dict[key]['output_time'] = msg.timestamp()[1] result_dict[key]['status_code'] = status_code partitions_with_offsets['output'] = output_consumer.position( output_consumer.assignment()) output_consumer.close() print("Total messages considered: " + str(total_messages)) if total_messages == 0 or end_time - start_time == 0: return 0 return total_messages / (end_time - start_time)