def __seek_from_to_offsets(self, partition, start_offset, end_offset, fft):
        self.log.info(
            f'Start : __seek_from_to_offsets({partition}, {start_offset}, {end_offset})'
        )

        consumer = AvroConsumer({
            'bootstrap.servers': self.bootstrap_servers,
            'group.id': self.group_id,
            'schema.registry.url': self.schema_registry_url
        })

        topic_partition = TopicPartition(self.topic, partition)
        topic_partition.offset = start_offset
        consumer.assign([topic_partition])

        messages = []

        while True:
            message = consumer.poll(10)
            if fft:
                dasfft = DasFft()
                message.value()['fft'] = dasfft.amplitudes_fft(
                    message.value()['amplitudes'])

            messages.append(message)
            if (message.offset() >= end_offset):
                self.log.info(
                    f'End : __seek_from_to_offsets({partition}, {start_offset}, {end_offset})'
                )
                return messages
    def __get_message(self, partition, offset, fft):
        self.log.info(f'Start : __get_message({partition},{offset})')

        consumer = AvroConsumer({
            'bootstrap.servers': self.bootstrap_servers,
            'group.id': self.group_id,
            'schema.registry.url': self.schema_registry_url
        })

        topic_partition = TopicPartition(self.topic, partition)
        topic_partition.offset = offset
        consumer.assign([topic_partition])

        message = consumer.poll(10)

        consumer.close()

        if fft:
            dasfft = DasFft()
            message.value()['fft'] = dasfft.amplitudes_fft(
                message.value()['amplitudes'])

        self.log.info(f'End : __get_message({partition},{offset})')

        return message
    def get_offsets(self, topic_partition, timestamps):

        i = 0
        topic_partitions = []
        for timestamp in timestamps:
            tp = TopicPartition(topic_partition.topic,
                                topic_partition.partition)
            tp.offset = timestamp
            topic_partitions.append(tp)

        offsets = self.consumer.offsets_for_times(topic_partitions)
        return offsets
Exemple #4
0
    def retrieve_configuration(self):
        # Retrieve last message
        topic = TopicPartition(self._topic, 0)
        _, high_offset = self._consumer.get_watermark_offsets(topic)
        topic.offset = high_offset - 1
        self._consumer.assign([topic])

        msg = self._consumer.consume(timeout=2)

        if msg:
            return msg[~0].value()
        else:
            raise RuntimeError("Could not retrieve stored configuration")
Exemple #5
0
def reset_offset(consumer, topic, number):
    latest_offset = get_latest_watermark_offset(consumer, topic)
    target_offset = latest_offset - number
    if target_offset <= 0:
        target_offset = 0
    filtered_topics = consumer.list_topics(topic)
    partitions_dict = filtered_topics.topics[topic].partitions
    for index in list(partitions_dict.keys()):
        partition = TopicPartition(topic, index)
        partition.offset = target_offset
        consumer.assign([partition])
        print("Offset assigned to " + topic + " partition " + str(index) +
              " " + str(target_offset))

    return consumer
Exemple #6
0
    def _copyKafkaOffset(self):
        """
        将新的消费者的offset设置为 latest
        """
        # 首先要获取kafka topic的所有分区
        topicName = config().get('kafka', 'topic')

        if self.status.nextConfig:
            nextStatusConfig = RedisStatusConfig(self.status.nextConfig, forceSync=True) 

            try:
                nextConsumer = remote.getKafkaConsumer(
                        nextStatusConfig.kafkaGroupId,
                        autoCommit=False,
                        autoOffsetReset='latest'
                        )
                
                _logger.debug('next kafka groupid is: %s', nextStatusConfig.kafkaGroupId)

                clusterMetadata = nextConsumer.list_topics(topicName)
                topicMetadata = clusterMetadata.topics.get(topicName, {})
                partitions = topicMetadata.partitions

                for pid in partitions.keys():
                    p = TopicPartition(topicName, pid)
                    nextConsumer.assign([p])

                    msg = nextConsumer.poll(10)
                    if msg:
                        offset = msg.offset() - 1
                        _logger.debug('pid[%s] topic[%s] offset[%s]', pid, topicName, offset)

                        if offset >= 0:
                            p.offset = offset
                            nextConsumer.commit(offsets=[p])
            except Exception as e:
                _logger.error('exception occurs when setting offset for new consumer: %s', Failure()) 
                raise
            finally:
                if nextConsumer:
                    nextConsumer.close()
                                                                timeout=1,
                                                                cached=False)
last_offset = next_offset_to_create - 1

f = open('pure_project.xml', 'w')
f.write(
    '<?xml version="1.0"?>' + "\n" +
    '<project:upmprojects xmlns:common="v3.commons.pure.atira.dk" xmlns:project="v1.upmproject.pure.atira.dk">'
    + "\n")

# range values explained: We read the topic backwards, starting with the
# last offset. We use `first_offset - 1` because Python's range will stop
# before it reaches that value. So the last offset used will actually be
# the first offset. The last argument is the step, for which we pass -1,
# because we're reading backwards.
for offset in range(last_offset, first_offset - 1, -1):

    # Since Kafka Consumers normally read messages fro oldest to newest, we
    # manually set the offset to read:
    # TODO: Can we ensure that this offset actually exists somehow?
    tp.offset = offset
    c.assign([tp])

    msg = c.poll(10)
    value = msg.value()
    f.write(value['xml'] + "\n")

c.close()

f.write('</project:upmprojects>' + "\n")
def replicate(topic, rerun, delete, source, src_groupid, target, trg_groupid,
              trg_partitions):
    global source_partitions

    # Connect to source kafka cluster
    src = Consumer({
        'bootstrap.servers': source,
        'group.id': src_groupid,
        'auto.offset.reset': 'smallest',
        'enable.auto.commit': False
    })

    # Connect to target kafka cluster
    trg = Consumer({
        'bootstrap.servers': target,
        'group.id': trg_groupid,
    })

    admin_client = KafkaAdminClient(bootstrap_servers=TRG_BOOTSTRAP_SERVERS,
                                    client_id=TRG_GROUP_ID)

    if delete:
        logger.warning(
            f"DELETING topic {topic} on {TRG_BOOTSTRAP_SERVERS} as requested")
        admin_client.delete_topics([topic])
        logger.warning(f"DELETION of {topic} completed.")

    logger.info(f"source cluster: {source}  source group_id: {src_groupid}")
    logger.info(f"target cluster: {target}  target group_id: {trg_groupid}")

    # Determine if latest source topic is at least partially loaded to target
    trg_topics, the_topic, offset_sum_delta = determine_topic(
        topic, src, trg, rerun)

    src_cm = src.list_topics()  # returns ClusterMetadata
    if the_topic not in src_cm.topics:
        logger.error(
            f"Current topics in {source} with group id {src_groupid} are:")
        logger.error(f"{src_cm.topics}")
        logger.error(
            f"Topic {topic} not in cluster {source} with group id {src_groupid}"
        )
        sys.exit(1)

    src_partition_count = len(src_cm.topics[the_topic].partitions)

    logger.info(
        f"topic: {the_topic} has # of partitions: {src_partition_count}")
    # Calculate multiplier for demuxing
    # Example:
    #    source = 4 target = 9 then multiplier is 9/4=2.25
    #    int(2.25) = 2
    multiplier = int(trg_partitions / src_partition_count)
    trg_partition_count = src_partition_count * multiplier
    logger.info(
        f"multiplier={multiplier} target_partition_count={trg_partition_count}"
    )

    # Add the new topic in target cluster
    if the_topic not in trg_topics:
        logger.info(
            f"replicate {the_topic} to {TRG_BOOTSTRAP_SERVERS} with source group id: {src_groupid}"
        )

        topic_list = [
            NewTopic(name=the_topic,
                     num_partitions=trg_partition_count,
                     replication_factor=1)
        ]
        try:
            logger.info(
                f"Creating topic {the_topic} with {trg_partition_count} partitions"
            )
            admin_client.create_topics(new_topics=topic_list,
                                       validate_only=False)
        except kafka.errors.TopicAlreadyExistsError:
            logger.info(f"Topic already exists in {TRG_BOOTSTRAP_SERVERS} ")
    part_map = create_part_map(src_partition_count, multiplier)

    # Get offset status for each partition
    logger.info(f"Source broker partitions for topic {the_topic}")
    logger.info(
        "-------------------------------------------------------------------------"
    )
    parts = {}
    total_committed = 0
    total_offsets = 0

    for part in src_cm.topics[the_topic].partitions:
        tp = TopicPartition(the_topic, part)
        tp.offset = confluent_kafka.OFFSET_BEGINNING
        src.assign([tp])
        any_committed = src.committed([tp])
        committed = any_committed[0].offset
        total_committed += committed
        end_offset = src.get_watermark_offsets(tp, cached=False)[1]
        position = src.position([tp])[0].offset
        if position == confluent_kafka.OFFSET_BEGINNING:
            position = 0
        elif position == confluent_kafka.OFFSET_END:
            position = end_offset
        elif position == confluent_kafka.OFFSET_INVALID:
            position = 0

        parts[str(part)] = end_offset
        total_offsets += end_offset
        logger.info(
            "Source topic: %s partition: %s end offset: %s committed: %s position: %s lag: %s"
            % (the_topic, part, end_offset, committed, position,
               (position - committed)))

    src.close()
    logger.info(
        f"Source: total_committed={total_committed} total_offsets={total_offsets}"
    )
    logger.info(
        "========================================================================="
    )

    logger.info(
        f"Starting multi-process: the_topic={the_topic} rerun={rerun} src_partition_count={src_partition_count}"
    )
    procs = [
        mp.Process(target=proc_replicate,
                   args=(the_topic, part, parts[str(part)], part_map, rerun))
        for part in range(0, src_partition_count)
    ]

    for proc in procs:
        proc.start()
    for proc in procs:
        proc.join()

    logger.info(f"END")
Exemple #9
0
from confluent_kafka import Consumer, TopicPartition, OFFSET_BEGINNING

conf = {
    'bootstrap.servers': "localhost:9092",
    'group.id': 'my-new-group',
    'auto.offset.reset': 'earliest',
}

# consumer1 = Consumer(conf)
consumer = Consumer(conf)

topic = 'first_topic'

# creating a topic partition with topic - 'first_topic' and partition - 0
topicPartition = TopicPartition(topic=topic, partition=2)
print(topicPartition)

consumer.assign([topicPartition])

topicPartition.offset = OFFSET_BEGINNING
consumer.seek(topicPartition)

while True:
    message = consumer.poll(timeout=1.0)
    print(message.code())
    print(message.value())