def __seek_from_to_offsets(self, partition, start_offset, end_offset, fft): self.log.info( f'Start : __seek_from_to_offsets({partition}, {start_offset}, {end_offset})' ) consumer = AvroConsumer({ 'bootstrap.servers': self.bootstrap_servers, 'group.id': self.group_id, 'schema.registry.url': self.schema_registry_url }) topic_partition = TopicPartition(self.topic, partition) topic_partition.offset = start_offset consumer.assign([topic_partition]) messages = [] while True: message = consumer.poll(10) if fft: dasfft = DasFft() message.value()['fft'] = dasfft.amplitudes_fft( message.value()['amplitudes']) messages.append(message) if (message.offset() >= end_offset): self.log.info( f'End : __seek_from_to_offsets({partition}, {start_offset}, {end_offset})' ) return messages
def __get_message(self, partition, offset, fft): self.log.info(f'Start : __get_message({partition},{offset})') consumer = AvroConsumer({ 'bootstrap.servers': self.bootstrap_servers, 'group.id': self.group_id, 'schema.registry.url': self.schema_registry_url }) topic_partition = TopicPartition(self.topic, partition) topic_partition.offset = offset consumer.assign([topic_partition]) message = consumer.poll(10) consumer.close() if fft: dasfft = DasFft() message.value()['fft'] = dasfft.amplitudes_fft( message.value()['amplitudes']) self.log.info(f'End : __get_message({partition},{offset})') return message
def get_offsets(self, topic_partition, timestamps): i = 0 topic_partitions = [] for timestamp in timestamps: tp = TopicPartition(topic_partition.topic, topic_partition.partition) tp.offset = timestamp topic_partitions.append(tp) offsets = self.consumer.offsets_for_times(topic_partitions) return offsets
def retrieve_configuration(self): # Retrieve last message topic = TopicPartition(self._topic, 0) _, high_offset = self._consumer.get_watermark_offsets(topic) topic.offset = high_offset - 1 self._consumer.assign([topic]) msg = self._consumer.consume(timeout=2) if msg: return msg[~0].value() else: raise RuntimeError("Could not retrieve stored configuration")
def reset_offset(consumer, topic, number): latest_offset = get_latest_watermark_offset(consumer, topic) target_offset = latest_offset - number if target_offset <= 0: target_offset = 0 filtered_topics = consumer.list_topics(topic) partitions_dict = filtered_topics.topics[topic].partitions for index in list(partitions_dict.keys()): partition = TopicPartition(topic, index) partition.offset = target_offset consumer.assign([partition]) print("Offset assigned to " + topic + " partition " + str(index) + " " + str(target_offset)) return consumer
def _copyKafkaOffset(self): """ 将新的消费者的offset设置为 latest """ # 首先要获取kafka topic的所有分区 topicName = config().get('kafka', 'topic') if self.status.nextConfig: nextStatusConfig = RedisStatusConfig(self.status.nextConfig, forceSync=True) try: nextConsumer = remote.getKafkaConsumer( nextStatusConfig.kafkaGroupId, autoCommit=False, autoOffsetReset='latest' ) _logger.debug('next kafka groupid is: %s', nextStatusConfig.kafkaGroupId) clusterMetadata = nextConsumer.list_topics(topicName) topicMetadata = clusterMetadata.topics.get(topicName, {}) partitions = topicMetadata.partitions for pid in partitions.keys(): p = TopicPartition(topicName, pid) nextConsumer.assign([p]) msg = nextConsumer.poll(10) if msg: offset = msg.offset() - 1 _logger.debug('pid[%s] topic[%s] offset[%s]', pid, topicName, offset) if offset >= 0: p.offset = offset nextConsumer.commit(offsets=[p]) except Exception as e: _logger.error('exception occurs when setting offset for new consumer: %s', Failure()) raise finally: if nextConsumer: nextConsumer.close()
timeout=1, cached=False) last_offset = next_offset_to_create - 1 f = open('pure_project.xml', 'w') f.write( '<?xml version="1.0"?>' + "\n" + '<project:upmprojects xmlns:common="v3.commons.pure.atira.dk" xmlns:project="v1.upmproject.pure.atira.dk">' + "\n") # range values explained: We read the topic backwards, starting with the # last offset. We use `first_offset - 1` because Python's range will stop # before it reaches that value. So the last offset used will actually be # the first offset. The last argument is the step, for which we pass -1, # because we're reading backwards. for offset in range(last_offset, first_offset - 1, -1): # Since Kafka Consumers normally read messages fro oldest to newest, we # manually set the offset to read: # TODO: Can we ensure that this offset actually exists somehow? tp.offset = offset c.assign([tp]) msg = c.poll(10) value = msg.value() f.write(value['xml'] + "\n") c.close() f.write('</project:upmprojects>' + "\n")
def replicate(topic, rerun, delete, source, src_groupid, target, trg_groupid, trg_partitions): global source_partitions # Connect to source kafka cluster src = Consumer({ 'bootstrap.servers': source, 'group.id': src_groupid, 'auto.offset.reset': 'smallest', 'enable.auto.commit': False }) # Connect to target kafka cluster trg = Consumer({ 'bootstrap.servers': target, 'group.id': trg_groupid, }) admin_client = KafkaAdminClient(bootstrap_servers=TRG_BOOTSTRAP_SERVERS, client_id=TRG_GROUP_ID) if delete: logger.warning( f"DELETING topic {topic} on {TRG_BOOTSTRAP_SERVERS} as requested") admin_client.delete_topics([topic]) logger.warning(f"DELETION of {topic} completed.") logger.info(f"source cluster: {source} source group_id: {src_groupid}") logger.info(f"target cluster: {target} target group_id: {trg_groupid}") # Determine if latest source topic is at least partially loaded to target trg_topics, the_topic, offset_sum_delta = determine_topic( topic, src, trg, rerun) src_cm = src.list_topics() # returns ClusterMetadata if the_topic not in src_cm.topics: logger.error( f"Current topics in {source} with group id {src_groupid} are:") logger.error(f"{src_cm.topics}") logger.error( f"Topic {topic} not in cluster {source} with group id {src_groupid}" ) sys.exit(1) src_partition_count = len(src_cm.topics[the_topic].partitions) logger.info( f"topic: {the_topic} has # of partitions: {src_partition_count}") # Calculate multiplier for demuxing # Example: # source = 4 target = 9 then multiplier is 9/4=2.25 # int(2.25) = 2 multiplier = int(trg_partitions / src_partition_count) trg_partition_count = src_partition_count * multiplier logger.info( f"multiplier={multiplier} target_partition_count={trg_partition_count}" ) # Add the new topic in target cluster if the_topic not in trg_topics: logger.info( f"replicate {the_topic} to {TRG_BOOTSTRAP_SERVERS} with source group id: {src_groupid}" ) topic_list = [ NewTopic(name=the_topic, num_partitions=trg_partition_count, replication_factor=1) ] try: logger.info( f"Creating topic {the_topic} with {trg_partition_count} partitions" ) admin_client.create_topics(new_topics=topic_list, validate_only=False) except kafka.errors.TopicAlreadyExistsError: logger.info(f"Topic already exists in {TRG_BOOTSTRAP_SERVERS} ") part_map = create_part_map(src_partition_count, multiplier) # Get offset status for each partition logger.info(f"Source broker partitions for topic {the_topic}") logger.info( "-------------------------------------------------------------------------" ) parts = {} total_committed = 0 total_offsets = 0 for part in src_cm.topics[the_topic].partitions: tp = TopicPartition(the_topic, part) tp.offset = confluent_kafka.OFFSET_BEGINNING src.assign([tp]) any_committed = src.committed([tp]) committed = any_committed[0].offset total_committed += committed end_offset = src.get_watermark_offsets(tp, cached=False)[1] position = src.position([tp])[0].offset if position == confluent_kafka.OFFSET_BEGINNING: position = 0 elif position == confluent_kafka.OFFSET_END: position = end_offset elif position == confluent_kafka.OFFSET_INVALID: position = 0 parts[str(part)] = end_offset total_offsets += end_offset logger.info( "Source topic: %s partition: %s end offset: %s committed: %s position: %s lag: %s" % (the_topic, part, end_offset, committed, position, (position - committed))) src.close() logger.info( f"Source: total_committed={total_committed} total_offsets={total_offsets}" ) logger.info( "=========================================================================" ) logger.info( f"Starting multi-process: the_topic={the_topic} rerun={rerun} src_partition_count={src_partition_count}" ) procs = [ mp.Process(target=proc_replicate, args=(the_topic, part, parts[str(part)], part_map, rerun)) for part in range(0, src_partition_count) ] for proc in procs: proc.start() for proc in procs: proc.join() logger.info(f"END")
from confluent_kafka import Consumer, TopicPartition, OFFSET_BEGINNING conf = { 'bootstrap.servers': "localhost:9092", 'group.id': 'my-new-group', 'auto.offset.reset': 'earliest', } # consumer1 = Consumer(conf) consumer = Consumer(conf) topic = 'first_topic' # creating a topic partition with topic - 'first_topic' and partition - 0 topicPartition = TopicPartition(topic=topic, partition=2) print(topicPartition) consumer.assign([topicPartition]) topicPartition.offset = OFFSET_BEGINNING consumer.seek(topicPartition) while True: message = consumer.poll(timeout=1.0) print(message.code()) print(message.value())