Beispiel #1
0
async def _get_kafka_messages(topic: str, start: int) -> List[ConsumerRecord]:
    def _value_deserializer(value):
        value = value.decode("utf-8")
        try:
            return json.loads(value)
        except JSONDecodeError:
            return ast.literal_eval(value)

    loop = asyncio.get_event_loop()
    consumer = AIOKafkaConsumer(
        topic, value_deserializer=_value_deserializer,
        loop=loop, bootstrap_servers=settings.KAFKA_SERVER,
    )

    await consumer.start()
    try:
        partitions = consumer.partitions_for_topic(topic)
        tps = [TopicPartition(topic, p) for p in partitions]

        offsets = await consumer.offsets_for_times({tp: start for tp in tps})
        for tp, offset in offsets.items():
            offset = offset.offset if offset else (await consumer.end_offsets([tp]))[tp]
            consumer.seek(tp, offset)

        records = await consumer.getmany(*tps, timeout_ms=1000*60)

        messages = []
        for tp in tps:
            messages += records.get(tp, [])
        logger.info(f"Got kafka messages {messages} by key {topic}")
        return messages
    finally:
        # Will leave consumer group; perform autocommit if enabled.
        await consumer.stop()
Beispiel #2
0
async def seek_to_offset(consumer: AIOKafkaConsumer, topic: str, start: int = -1):
    """
    Seek to the last message in topic.
    """
    partition_number, offset = -1, -1
    # Loop through partitions and find the latest offset
    for p in consumer.partitions_for_topic(topic):
        tp = TopicPartition(topic, p)
        committed = await consumer.committed(tp)
        await consumer.seek_to_end(tp)
        last_offset = await consumer.position(tp)
        # print("topic: {} partition: {} committed: {} last: {}".format(topic, p, committed, last_offset))
        if offset < last_offset:
            offset = last_offset
            partition_number = p
    tp = TopicPartition(topic, partition_number)
    consumer.seek(tp, offset - start)
Beispiel #3
0
async def pull(loop, server, topic, group_id, batch_size=1, shuffle=False):
    client = AIOKafkaConsumer(
        topic,
        loop=loop,
        bootstrap_servers=server,
        group_id=group_id,
        auto_offset_reset='earliest',
        enable_auto_commit=False,
    )
    await client.start()

    partitions = client.partitions_for_topic(topic)
    while partitions is None:
        await asyncio.sleep(0.1)

    partitions = list(partitions)
    partitions = [TopicPartition(topic, partition) for partition in partitions]
    #current_offsets = await client.beginning_offsets(partitions)
    end_offsets = await client.end_offsets(partitions)
    current_partition = 0
    done = False

    async def next_partition(current_partition):
        current_partition += 1  # todo recursive
        if current_partition >= len(partitions):
            return None

        current_offset = await client.position(partitions[current_partition])
        if current_offset >= end_offsets[partitions[current_partition]]:
            current_partition = await next_partition(current_partition)
        print("remaining record: {}, partition: {}".format(
            remaining_records, current_partition))
        return current_partition

    current_offset = await client.position(partitions[current_partition])
    if current_offset >= end_offsets[partitions[current_partition]]:
        done = True

    while done is False:
        remaining_records = batch_size
        batch = []
        while remaining_records > 0:
            msg = await client.getone(partitions[current_partition])
            batch.append(msg)
            remaining_records -= 1

            current_offset = await client.position(
                partitions[current_partition])
            if current_offset >= end_offsets[partitions[current_partition]]:
                current_partition = await next_partition(current_partition)
                print("remaining record: {}, partition: {}".format(
                    remaining_records, current_partition))
                if current_partition is None:
                    done = True
                    break

        if len(batch) > 0:
            yield (batch)
        '''
        data = await client.getmany(max_records=batch_size)
        print(data)
        #for tp, messages in data.items():
        messages = data[topic]
        if len(messages) > 0:
            batch = []
            for msg in messages:
                batch.append(msg)
            yield(batch)
        else:
            done = True
        '''

    await client.stop()