def set_offsets_to_time(start_from_seconds_ago: int, consumer: confluent_kafka.DeserializingConsumer, partitions: List[confluent_kafka.TopicPartition]) -> None: start_from = datetime.now(timezone.utc) - timedelta(seconds=start_from_seconds_ago) logger.info('Setting consumer offsets to start from %s', start_from) for p in partitions: p.offset = int(start_from.timestamp() * 1000) # yep, it's a weird API consumer.assign(partitions) for p in consumer.offsets_for_times(partitions): logger.debug('Topic %s partition %s SEEKing to offset %s', p.topic, p.partition, p.offset) consumer.seek(p)
def test_consumer(self): consumer_config = { 'bootstrap.servers': self.conf['bootstrap.servers'], 'key.deserializer': self.key_avro_deserializer, 'value.deserializer': self.value_avro_deserializer, 'group.id': '1', 'auto.offset.reset': 'earliest' } offset = kafka_utils.offset - len(self.test_messages) + 1 consumer = DeserializingConsumer(consumer_config) partitions = [] partition = TopicPartition(topic=self.topic, partition=0, offset=offset) partitions.append(partition) consumer.assign(partitions) # Process messages result = [] attempt = 0 while len(result) < len(self.test_messages): try: msg = consumer.poll(1.0) attempt += 1 if msg is None: print("no message received") if attempt < 10: pass else: break elif msg.error(): break else: value_object = msg.value() text = value_object.text print("adding {} to result".format(text)) result.append(text) except KeyboardInterrupt: break except SerializerError as e: break # Leave group and commit final offsets consumer.close() assert result == self.test_messages
consumer_config = { 'bootstrap.servers': conf['bootstrap.servers'], 'key.deserializer': key_avro_deserializer, 'value.deserializer': value_avro_deserializer, 'group.id': '1' } consumer = DeserializingConsumer(consumer_config) #create the sql interface db = create_engine("postgres://*****:*****@postgres:5432/ngrams") #Wait until the kafka topic is up before proceeding kafka_utils.wait_topic(consumer, topic) #assign partitions to consumer partitions = [] partition = TopicPartition(topic=topic, partition=0, offset=0) partitions.append(partition) consumer.assign(partitions) #maybe should change this to subscribe so we can use the on_assign and on_revoke callbacks #there is an infinite loop within this function that won't break until it sees a keyboard interupt msg = kafka_utils.consume_messages(consumer, db, kafka_utils.send_ngrams_postgres) # After you exit the poll loop commit the final offsets and close the consumer consumer.commit(message=msg, asynchronous=False) consumer.close() #finally disconnect from redis server r.connection_pool.disconnect()