Beispiel #1
0
def set_offsets_to_time(start_from_seconds_ago: int, consumer: confluent_kafka.DeserializingConsumer,
                        partitions: List[confluent_kafka.TopicPartition]) -> None:
    start_from = datetime.now(timezone.utc) - timedelta(seconds=start_from_seconds_ago)
    logger.info('Setting consumer offsets to start from %s', start_from)
    for p in partitions:
        p.offset = int(start_from.timestamp() * 1000)  # yep, it's a weird API
    consumer.assign(partitions)
    for p in consumer.offsets_for_times(partitions):
        logger.debug('Topic %s partition %s SEEKing to offset %s', p.topic, p.partition, p.offset)
        consumer.seek(p)
Beispiel #2
0
    def test_consumer(self):
        consumer_config = {
            'bootstrap.servers': self.conf['bootstrap.servers'],
            'key.deserializer': self.key_avro_deserializer,
            'value.deserializer': self.value_avro_deserializer,
            'group.id': '1',
            'auto.offset.reset': 'earliest'
        }
        offset = kafka_utils.offset - len(self.test_messages) + 1
        consumer = DeserializingConsumer(consumer_config)
        partitions = []
        partition = TopicPartition(topic=self.topic,
                                   partition=0,
                                   offset=offset)
        partitions.append(partition)
        consumer.assign(partitions)
        # Process messages
        result = []
        attempt = 0
        while len(result) < len(self.test_messages):
            try:
                msg = consumer.poll(1.0)
                attempt += 1
                if msg is None:
                    print("no message received")
                    if attempt < 10:
                        pass
                    else:
                        break
                elif msg.error():
                    break
                else:
                    value_object = msg.value()
                    text = value_object.text
                    print("adding {} to result".format(text))
                    result.append(text)
            except KeyboardInterrupt:
                break
            except SerializerError as e:
                break
        # Leave group and commit final offsets
        consumer.close()

        assert result == self.test_messages
    consumer_config = {
        'bootstrap.servers': conf['bootstrap.servers'],
        'key.deserializer': key_avro_deserializer,
        'value.deserializer': value_avro_deserializer,
        'group.id': '1'
    }
    consumer = DeserializingConsumer(consumer_config)

    #create the sql interface
    db = create_engine("postgres://*****:*****@postgres:5432/ngrams")

    #Wait until the kafka topic is up before proceeding
    kafka_utils.wait_topic(consumer, topic)

    #assign partitions to consumer
    partitions = []
    partition = TopicPartition(topic=topic, partition=0, offset=0)
    partitions.append(partition)
    consumer.assign(partitions)
    #maybe should change this to subscribe so we can use the on_assign and on_revoke callbacks

    #there is an infinite loop within this function that won't break until it sees a keyboard interupt
    msg = kafka_utils.consume_messages(consumer, db,
                                       kafka_utils.send_ngrams_postgres)

    # After you exit the poll loop commit the final offsets and close the consumer
    consumer.commit(message=msg, asynchronous=False)
    consumer.close()
    #finally disconnect from redis server
    r.connection_pool.disconnect()