async def _get_kafka_messages(topic: str, start: int) -> List[ConsumerRecord]: def _value_deserializer(value): value = value.decode("utf-8") try: return json.loads(value) except JSONDecodeError: return ast.literal_eval(value) loop = asyncio.get_event_loop() consumer = AIOKafkaConsumer( topic, value_deserializer=_value_deserializer, loop=loop, bootstrap_servers=settings.KAFKA_SERVER, ) await consumer.start() try: partitions = consumer.partitions_for_topic(topic) tps = [TopicPartition(topic, p) for p in partitions] offsets = await consumer.offsets_for_times({tp: start for tp in tps}) for tp, offset in offsets.items(): offset = offset.offset if offset else (await consumer.end_offsets([tp]))[tp] consumer.seek(tp, offset) records = await consumer.getmany(*tps, timeout_ms=1000*60) messages = [] for tp in tps: messages += records.get(tp, []) logger.info(f"Got kafka messages {messages} by key {topic}") return messages finally: # Will leave consumer group; perform autocommit if enabled. await consumer.stop()
async def seek_to_offset(consumer: AIOKafkaConsumer, topic: str, start: int = -1): """ Seek to the last message in topic. """ partition_number, offset = -1, -1 # Loop through partitions and find the latest offset for p in consumer.partitions_for_topic(topic): tp = TopicPartition(topic, p) committed = await consumer.committed(tp) await consumer.seek_to_end(tp) last_offset = await consumer.position(tp) # print("topic: {} partition: {} committed: {} last: {}".format(topic, p, committed, last_offset)) if offset < last_offset: offset = last_offset partition_number = p tp = TopicPartition(topic, partition_number) consumer.seek(tp, offset - start)
async def pull(loop, server, topic, group_id, batch_size=1, shuffle=False): client = AIOKafkaConsumer( topic, loop=loop, bootstrap_servers=server, group_id=group_id, auto_offset_reset='earliest', enable_auto_commit=False, ) await client.start() partitions = client.partitions_for_topic(topic) while partitions is None: await asyncio.sleep(0.1) partitions = list(partitions) partitions = [TopicPartition(topic, partition) for partition in partitions] #current_offsets = await client.beginning_offsets(partitions) end_offsets = await client.end_offsets(partitions) current_partition = 0 done = False async def next_partition(current_partition): current_partition += 1 # todo recursive if current_partition >= len(partitions): return None current_offset = await client.position(partitions[current_partition]) if current_offset >= end_offsets[partitions[current_partition]]: current_partition = await next_partition(current_partition) print("remaining record: {}, partition: {}".format( remaining_records, current_partition)) return current_partition current_offset = await client.position(partitions[current_partition]) if current_offset >= end_offsets[partitions[current_partition]]: done = True while done is False: remaining_records = batch_size batch = [] while remaining_records > 0: msg = await client.getone(partitions[current_partition]) batch.append(msg) remaining_records -= 1 current_offset = await client.position( partitions[current_partition]) if current_offset >= end_offsets[partitions[current_partition]]: current_partition = await next_partition(current_partition) print("remaining record: {}, partition: {}".format( remaining_records, current_partition)) if current_partition is None: done = True break if len(batch) > 0: yield (batch) ''' data = await client.getmany(max_records=batch_size) print(data) #for tp, messages in data.items(): messages = data[topic] if len(messages) > 0: batch = [] for msg in messages: batch.append(msg) yield(batch) else: done = True ''' await client.stop()