Python Consumer.list_topics Exemples, confluent_kafka.Consumer.list_topics Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_kafka.py Projet : tacaswell/bluesky-kafka

def kafka_available():
    try:
        consumer_params = {'bootstrap.servers': '127.0.0.1',
                           'group.id': 'kafka-unit-test',
                           'auto.offset.reset': 'latest'}
        consumer = Consumer(consumer_params)
        consumer.list_topics(timeout=5)
        return True
    except KafkaException:
        return False

Exemple #2

0

Afficher le fichier

 def kafka_GetOffset(self, p_szTopicName, p_szGroupID=''):
     if self.__kafka_servers__ is None:
         raise SQLCliException(
             "Missed kafka server information. Please use set kafka server first .."
         )
     c = Consumer({
         'bootstrap.servers': self.__kafka_servers__,
         'group.id': p_szGroupID,
     })
     m_OffsetResults = []
     try:
         for pid in c.list_topics(topic=p_szTopicName
                                  ).topics[p_szTopicName].partitions.keys():
             tp = TopicPartition(p_szTopicName, pid)
             (low, high) = c.get_watermark_offsets(tp)
             m_OffsetResults.append([pid, low, high])
         if len(m_OffsetResults) == 0:
             raise SQLCliException("Topic [" + p_szTopicName +
                                   "] does not exist!")
         return m_OffsetResults
     except KafkaException as ke:
         if "SQLCLI_DEBUG" in os.environ:
             print('traceback.print_exc():\n%s' % traceback.print_exc())
             print('traceback.format_exc():\n%s' % traceback.format_exc())
         raise ke

Exemple #3

0

Afficher le fichier

def desc_topic(args):
    c = Consumer({
        'bootstrap.servers': f'{args.broker}',
        'group.id': 'confluent-kafka-describe-topic',
    })
    topics = c.list_topics().topics

    if args.topic not in topics.keys():
        print(f'Topic "{args.topic}" not in cluster.')
    else:
        topic_metadata = topics[args.topic]
        partitions, leaders, replicas, isrs = [], [], [], []
        for metadata in topic_metadata.partitions.values():
            partitions.append(str(metadata.id))
            leaders.append(str(metadata.leader))
            replicas.append(str(metadata.replicas))
            isrs.append(str(metadata.isrs))
        partitions = ', '.join(partitions)
        leaders = ', '.join(leaders)
        replicas = ', '.join(replicas)
        isrs = ', '.join(isrs)
        print(f'Topic:     {topic_metadata.topic}')
        print(f'Partition: {partitions}')
        print(f'Leader:    {leaders}')
        print(f'Replica:   {replicas}')
        print(f'ISRs:      {isrs}')

    c.close()

Exemple #4

0

Afficher le fichier

def get_starting_offsets(topic_name):
    offsets_history_filename = get_topic_offsets_filename(topic_name)

    consumer = Consumer({
        'bootstrap.servers': BOOTSTRAP_SERVERS,
        'group.id': 'borisov_get_partitions_number',
    })

    n_partitions = len(consumer.list_topics().topics[topic_name].partitions)

    if os.path.isfile(offsets_history_filename):
        status, message = validate_offset_dump(topic_name, offsets_history_filename, n_partitions)
        if status == 'ERROR':
            raise Exception(message)
        else:
            return message

    else:
        starting_offsets_dict = {
            topic_name: {
                str(partition): -2
                for partition in range(n_partitions)
            }
        }
        return json.dumps(starting_offsets_dict)

Exemple #5

0

Afficher le fichier

Fichier : test_kafka_writer.py Projet : SoftwareHeritage/swh-storage

def test_storage_direct_writer_anonymized(
    kafka_prefix: str, kafka_server, consumer: Consumer
):

    writer_config = {
        "cls": "kafka",
        "brokers": [kafka_server],
        "client_id": "kafka_writer",
        "prefix": kafka_prefix,
        "anonymize": True,
    }
    storage_config: Dict[str, Any] = {
        "cls": "pipeline",
        "steps": [
            {"cls": "memory", "journal_writer": writer_config},
        ],
    }

    storage = get_storage(**storage_config)

    expected_messages = 0

    for obj_type, objs in TEST_OBJECTS.items():
        if obj_type == "origin_visit":
            # these have non-consistent API and are unrelated with what we
            # want to test here
            continue
        method = getattr(storage, obj_type + "_add")
        method(objs)
        expected_messages += len(objs)

    existing_topics = set(
        topic
        for topic in consumer.list_topics(timeout=10).topics.keys()
        if topic.startswith(kafka_prefix)
    )
    assert existing_topics == {
        f"{kafka_prefix}.{obj_type}"
        for obj_type in (
            "content",
            "directory",
            "extid",
            "metadata_authority",
            "metadata_fetcher",
            "origin",
            "origin_visit",
            "origin_visit_status",
            "raw_extrinsic_metadata",
            "release",
            "revision",
            "snapshot",
            "skipped_content",
        )
    } | {
        f"{kafka_prefix}_privileged.{obj_type}"
        for obj_type in (
            "release",
            "revision",
        )
    }

Exemple #6

0

Afficher le fichier

Fichier : lasair_consumer.py Projet : rafia17/Zooniverse_SLSN

class msgConsumer():
    def __init__(self, kafka_server, group_id):
        self.group_id = group_id

        conf = {
            'bootstrap.servers': kafka_server,
            'group.id': self.group_id,
            'default.topic.config': {
                'auto.offset.reset': 'smallest'
            }
        }
        self.streamReader = Consumer(conf)

    def topics(self):
        t = self.streamReader.list_topics()
        t = t.topics
        t = t.keys()
        t = list(t)
        return t

    def subscribe(self, topic):
        self.streamReader.subscribe([topic])

    def poll(self):
        try:
            msg = self.streamReader.poll(timeout=30)
            if (msg != None):
                return msg.value()
        except:
            return None

    def close(self):
        self.streamReader.close()

Exemple #7

0

Afficher le fichier

def consume_topic(broker, topic, start_from_oldest=False, truncate=False):
    consumer = KafkaConsumer({
        "bootstrap.servers":
        broker,
        "group.id":
        f"get-topic-{time.time_ns()}",
        "auto.offset.reset":
        "earliest" if start_from_oldest else "latest",
    })

    metadata = consumer.list_topics(topic)
    if topic not in metadata.topics:
        raise Exception("Topic does not exist")

    topic_partitions = [
        TopicPartition(topic, p) for p in metadata.topics[topic].partitions
    ]

    consumer.assign(topic_partitions)

    while True:
        msg = consumer.poll(0.0)

        if msg:
            value = msg.value()[0:100] if truncate else msg.value()
            print(f"Timestamp: {msg.timestamp()[1]}\n{value}")

        time.sleep(0.1)

Exemple #8

0

Afficher le fichier

Fichier : _consumer.py Projet : scipp/scippneutron

class KafkaQueryConsumer:
    """
    Wraps Kafka library consumer methods which query the
    broker for metadata and poll for single messages.
    It is a thin wrapper but allows a fake to be used
    in unit tests.
    """
    def __init__(self, broker: str):
        # Set "enable.auto.commit" to False, as we do not need to report to the
        # kafka broker where we got to (it usually does this in case of a
        # crash, but we simply restart the process and go and find the last
        # run_start message.
        #
        # Set "queued.min.messages" to 1 as we will consume backwards through
        # the partition one message at a time; we do not want to retrieve
        # multiple messages in the forward direction each time we step
        # backwards by 1 offset
        conf = {
            "bootstrap.servers": broker,
            "group.id": "consumer_group_name",
            "auto.offset.reset": "latest",
            "enable.auto.commit": False,
            "queued.min.messages": 1
        }
        self._consumer = Consumer(**conf)

    def get_topic_partitions(self, topic: str, offset: int = -1):
        metadata = self._consumer.list_topics(topic)
        return [
            TopicPartition(topic, partition[1].id, offset=offset)
            for partition in metadata.topics[topic].partitions.items()
        ]

    def seek(self, partition: TopicPartition):
        """
        Set offset in partition, the consumer will seek to that offset
        """
        self._consumer.seek(partition)

    def poll(self, timeout=2.):
        """
        Poll for a message from Kafka
        """
        return self._consumer.poll(timeout=timeout)

    def get_watermark_offsets(self,
                              partition: TopicPartition) -> Tuple[int, int]:
        """
        Get the offset of the first and last available
        message in the given partition
        """
        return self._consumer.get_watermark_offsets(partition, cached=False)

    def assign(self, partitions: List[TopicPartition]):
        self._consumer.assign(partitions)

    def offsets_for_times(self, partitions: List[TopicPartition]):
        return self._consumer.offsets_for_times(partitions)

Exemple #9

0

Afficher le fichier

def get_topics(broker):
    consumer = KafkaConsumer({
        "bootstrap.servers": broker,
        "group.id": f"get-topic-{time.time_ns()}",
        "auto.offset.reset": "latest",
    })
    metadata = consumer.list_topics()
    for n, v in metadata.topics.items():
        print(f"{n} {len(v.partitions)}")

Exemple #10

0

Afficher le fichier

def list_topics(args):
    c = Consumer({
        'bootstrap.servers': f'{args.broker}',
        'group.id': 'confluent-kafka-list-topic',
    })
    metadata = c.list_topics()
    c.close()

    for topic in metadata.topics.keys():
        print(topic)

Exemple #11

0

Afficher le fichier

Fichier : kafkaHelper.py Projet : Asamasach/etl

class Kafka():
    def __init__(self, topic_name, group_id, auto_offset_reset):

        with open(config_file_path) as kafka_conf:
            self.conf = yaml.load(kafka_conf, Loader=yaml.FullLoader)

        self.group_id = group_id
        self.topic_name = topic_name
        self.auto_offset_reset = auto_offset_reset
        self.running_consumer = True

        self.c = Consumer({
            'bootstrap.servers': self.conf['bootstrap_servers'],
            'group.id': self.group_id,
            'auto.offset.reset': self.auto_offset_reset
        })
        self.c.subscribe([self.topic_name])
        print(self.c.list_topics())

    def consume(self):
        #        self.batch_size = batch_size
        while self.running_consumer:
            a = 0
            msg = self.c.poll(1.0)

            if msg is None:

                #        empty = Log("Empty")
                #        empty.write("Empty message!","kafka")
                print("empty message!")
                msg = "empty".encode('utf-8')
                #if a%10 == 0:
                #break

        #    if msg.error():
        #        err = Log("Error")
        #        err.write(msg.error(),"kafka")

        #    print(msg.value().decode('utf-8'))
            else:
                a += 1
                msg = msg.value().decode('utf-8')
            print("message is : {}".format(msg))  #.decode('utf-8')))
            self.c.commit()
            if a % 10 == 0:
                self.running_consumer = False
            #return msg
#        self.c.close()
        return msg
        self.consume()

    def stop_consume(self):
        self.running_consumer = False
        time.sleep(10)
        self.consume()

Exemple #12

0

Afficher le fichier

def count_messages(bootstrap_servers):
    c = Consumer({'bootstrap.servers': bootstrap_servers,
                  'group.id': 'group2',
                  'enable.auto.commit': False,
                  'auto.offset.reset': 'beginning'})

    metadata = c.list_topics()
    topics = metadata.topics
    for topic, topicMetadata in topics.items():
        for partition in topicMetadata.partitions:
            (low, high) = c.get_watermark_offsets(TopicPartition(topic, partition))
            print(f"{topic} {partition}: {high}")

Exemple #13

0

Afficher le fichier

class KafkaConsumer:
    def __init__(self, conf, group_id='kafka-rest-service'):
        conf = dict(conf)
        conf['group.id'] = group_id
        self.consumer = Consumer(conf)

    # @cached(cache=TTLCache(maxsize=1024, ttl=60))
    def get_topic_partition_count(self, topic_name):
        cmd = self.consumer.list_topics(topic_name)
        tmd = cmd.topics.get(topic_name, None)
        pcount = 0
        if tmd:
            pcount = len(tmd.partitions)
        return pcount

    # @cached(cache=TTLCache(maxsize=1024, ttl=60))
    def get_topic_offsets(self, topic_name):
        pcount = self.get_topic_partition_count(topic_name)
        if pcount == 0:
            return dict(error=f"Requested topic {topic_name} not found",
                        status="ERROR",
                        report=None)

        part_status_map = {}
        for p in range(pcount):
            l, h = self.consumer.get_watermark_offsets(
                TopicPartition(topic_name, p))
            part_status_map[p] = [h, '1 month']

        def get_minute_report(minute, time_text):
            timestamp = (datetime.now() -
                         timedelta(minutes=minute)).timestamp()
            timestamp = int(timestamp) * 1000
            partitions = [
                TopicPartition(topic_name, p, timestamp) for p in range(pcount)
            ]
            partitions = self.consumer.offsets_for_times(partitions)
            for par in partitions:
                if par.offset > -1:
                    part_status_map[par.partition][-1] = time_text

        get_minute_report(60 * 24 * 7, '1 week')
        get_minute_report(60 * 24, '1 day')
        get_minute_report(60, '1 hour')
        get_minute_report(10, '10 minutes')
        get_minute_report(1, '1 minute')

        part_status_map = {k: list(v) for k, v in part_status_map.items()}
        return dict(error=None,
                    status="SUCCESS",
                    topic=topic_name,
                    offsets=part_status_map)

Exemple #14

0

Afficher le fichier

    def get_topics(kafka_broker):
        c_ = Consumer({
            'bootstrap.servers': kafka_broker,
            'group.id': "group" + str(uuid.uuid1()),
            'auto.offset.reset': 'earliest'
        })

        try:
            topics = c_.list_topics(timeout=2).topics
        except:
            raise NoValidKafkaBroker("no valid broker: ", kafka_broker)

        return topics

Exemple #15

0

Afficher le fichier

Fichier : kafka.py Projet : Haner27/cel

class KafkaConsumer(object):
    """
    消费者
    """
    def __init__(self, kafka_url, topic, group_id):
        self.__kafka_url = kafka_url
        self.__topic = topic
        self.consumer = Consumer({
            'bootstrap.servers': self.__kafka_url,
            'group.id': group_id,
            'default.topic.config': {'auto.offset.reset': 'smallest'}
        })

        assert self.__topic in self.consumer.list_topics().topics, \
            'Kafka.Consumer.init: not found topic[{0}]'.format(self.__topic)

        self.consumer.subscribe([self.__topic])

    def run(self, callbacks=None):
        if callbacks is None:
            callbacks = []

        try:
            while True:
                msg = self.consumer.poll(1)
                if msg is None:
                    continue

                if not msg.error():
                    value = msg.value()
                    try:
                        data = json.loads(value.decode('utf-8'))
                    except Exception as ex:
                        print('[failed][kafka]json.loads message failed: {0}\nvalue: {1}'.format(ex, value))
                    else:
                        print('[succeed][kafka]message received from {0} [{1}] value: {2}'.format(msg.topic(),
                                                                                         msg.partition(),
                                                                                         value))
                        for callback in callbacks:
                            callback(data)

                elif msg.error().code() != KafkaError._PARTITION_EOF:
                    break

        except KeyboardInterrupt as e:
            print('[failed][kafka]KeyboardInterrupt.')

        finally:
            self.consumer.close()

Exemple #16

0

Afficher le fichier

def consume():

    c = Consumer(conf)

    num_partitions = len(c.list_topics().topics[topic_name].partitions)
    topic_partitions = []
    for partition_index in range(0, num_partitions - 1):
        topic_partition = TopicPartition(topic_name, partition_index, 0)
        topic_partitions.append(topic_partition)

    c.subscribe([topic_name])
    c.assign(topic_partitions)

    while True:
        msg = c.poll(5)
        if msg is None:
            continue
        if msg.error():
            raise KafkaException(msg.error())
        else:
            print(msg.value())

Exemple #17

0

Afficher le fichier

def create_topic(brokers, topic, partition_count=1, replica_count=1):
    """Create a topic if it does not exist.

    Args:
        brokers (list): The 'host[:port]' list that the producer should 
            contact to bootstrap initial cluster metadata.
        topic (str): Topic where the message will be published.
        partition_count (int): Specified partition number (default 1).
        replica_count (int): Specified replication factor (default 1).

    Returns:
        partitions (list): A set including partition number.

    """
    c = Consumer({
        'bootstrap.servers': ','.join(brokers),
        'group.id': 'a_consumer'
    })
    topics = c.list_topics().topics
    c.close()

    if topic in topics.keys():
        partitions = list(topics[topic].partitions.keys())
    else:
        new_topic = NewTopic(
            topic=topic,
            num_partitions=partition_count,
            replication_factor=replica_count,
        )
        admin = AdminClient({
            'bootstrap.servers': ','.join(brokers),
        })
        status = admin.create_topics([new_topic])
        while status[topic].done() == False:
            pass
        partitions = [p for p in range(partition_count)]

    return partitions

Exemple #18

0

Afficher le fichier

    def create_topic(self):
        """Creates the topic with the given topic name"""
        conf = {
            'bootstrap.servers': self.bootstrap_servers,
            'group.id': 'listTopics',
            'session.timeout.ms': 6000,
            'auto.offset.reset': 'latest'
        }
        consumer = Consumer(**conf)
        topics = consumer.list_topics().topics.keys()
        if (self.topic not in topics):
            client = AdminClient({"bootstrap.servers": self.bootstrap_servers})
            futures = client.create_topics([
                NewTopic(topic=self.topic,
                         num_partitions=1,
                         replication_factor=1)
            ])

            for _, future in futures.items():
                try:
                    future.result()
                except Exception as e:
                    pass

Exemple #19

0

Afficher le fichier

Fichier : kafka_consumer_confluent.py Projet : adelagon/amazon-msk-benchmark-scripts

group_id = 'consumer_group_' + sys.argv[1]

# Load Config
config = yaml.load(open('./kafka_consumer.yaml'), Loader=yaml.FullLoader)
bootstrap_servers = ",".join(config['bootstrap_servers'])

# Initialize Consumer
consumer = Consumer({
    'bootstrap.servers': bootstrap_servers,
    'group.id': group_id,
    'auto.offset.reset': 'end'
})

# Subscribe to all topics
topics = list(consumer.list_topics().topics.keys())
print("Total Topics Found: {}".format(len(topics)))
consumer.subscribe(topics)

count = 0
while 1:
    message = consumer.poll(1.0)

    if message is None:
        continue
    if message.error():
        print('ERROR: {}'.format(message.error()))

    print("[{}]: RECEIVE: {}:{}:{}: key={} value={}".format(
        datetime.now().isoformat(), message.topic(), message.partition(),
        message.offset(), message.key(), msgpack.unpackb(message.value())))

Exemple #20

0

Afficher le fichier

Fichier : list_topics.py Projet : Ultinous/uvap

def get_cluster_metadata(broker: str, timeout: float) -> admin.ClusterMetadata:
    consumer = Consumer({'bootstrap.servers': broker, 'group.id': 'groupid'})
    ret = consumer.list_topics(timeout=timeout)
    consumer.close()
    return ret

Exemple #21

0

Afficher le fichier

def launch(api_key, port, data_directory=None, topic='announce'):

    logging.basicConfig(level=logging.DEBUG)

    # Initialize the database
    if data_directory is None:
        data_directory = os.getcwd()
    db = tinydb.TinyDB(os.path.join(data_directory, 'run_db.json'))

    logging.info('Constructing local consumer')
    consumer = Consumer({
        'bootstrap.servers': 'localhost:' + str(port),
        'group.id': 0,
        'auto.offset.reset': 'earliest',
        'sasl.username': '******',
        'sasl.password': api_key,
        'security.protocol': 'sasl_plaintext',
        'sasl.mechanism': 'PLAIN',
    })

    adm_client = AdminClient({
        'bootstrap.servers': 'localhost:' + str(port),
        'group.id': 0,
        'auto.offset.reset': 'earliest',
        'sasl.username': '******',
        'sasl.password': api_key,
        'security.protocol': 'sasl_plaintext',
        'sasl.mechanism': 'PLAIN',
    })

    # Clean up the Kafka board
    try:
        results = adm_client.delete_topics(
            list(consumer.list_topics().topics.keys()))
        for v in results.values():
            v.result()
    except ValueError:
        pass

    # Create the announce topic
    try:
        logging.info('Setting up announce topic')
        tp_future = adm_client.create_topics([NewTopic('announce', 1, 1)])
        tp_future['announce'].result()  # Wait for the future
        logging.info('Topic created!')
    except KafkaException as ex:
        logging.warning(ex)

    logging.info('Connecting to topic: %s', topic)
    consumer.subscribe([topic])

    # Main consumer loop
    while True:
        msg = consumer.poll(0.1)

        # Validate the message is good
        if msg is None:
            continue
        if msg.error():
            logging.error('Topic Consumer Error: %s', msg.error())
            continue
        logging.info('Processing Message')
        process_message(msg.value(), db, data_directory, api_key, port,
                        adm_client)

Exemple #22

0

Afficher le fichier

class Kafka():
    def __init__(self, topic_name, group_id, auto_offset_reset, kafka_id):

        with open(config_file_path) as kafka_conf:
            self.conf = yaml.load(kafka_conf, Loader=yaml.FullLoader)
        self.mysql = Mysql()
        self.elasticsearch_instance = Elastic()
        self.group_id = group_id
        self.topic_name = topic_name
        self.auto_offset_reset = auto_offset_reset
        self.running_consumer = True
        self.kafka_id = kafka_id
        self.c = Consumer({
            'bootstrap.servers': self.conf['bootstrap_servers'],
            'group.id': self.group_id,
            'auto.offset.reset': self.auto_offset_reset
        })
        self.c.subscribe([self.topic_name])
        self.batch_size = float(self.conf['batch_size'])
        print(self.c.list_topics())
        print("{}th kafka_object has created!".format(self.kafka_id))

    def consume(self, index, consumer_id):
        #        self.batch_size = batch_size
        a = 0
        data = []
        self.index = index
        self.consumer_id = consumer_id
        self.old_consumer_record = self.mysql.get_list(
            consumer_id=self.consumer_id)
        while self.running_consumer:

            msg = self.c.poll(self.batch_size)

            if msg is None:
                a += 1

                print("empty message!")

            else:
                a += 1
                msg = msg.value().decode('utf-8')
                data.append(msg)

            if a % 10 == 0:

                self.running_consumer = False
                if len(data) > 5:
                    self.elasticsearch_instance.post(data=data,
                                                     index=self.index)
                    print("elk_consume for index : {}".format(self.index))
                # check change in mysql
                data = []
                a = 0
                consumer_record = self.mysql.get_list(
                    consumer_id=self.consumer_id)
                if consumer_record != self.old_consumer_record:
                    self.old_consumer_record = consumer_record
                    print("record has changed in database!")
                    self.c.close()
                    break
                else:
                    self.c.commit()
                    self.running_consumer = True

        return None

Exemple #23

0

Afficher le fichier

Fichier : kafkaConsumerWeatherToInflux.py Projet : RexBarker/GreenEnergy

consumers = []
for group, (topicslist, partition, offset) in groupsTopics.items():

    topicFilter = [re.compile(pat) for pat in topicslist]

    con = Consumer({
        'bootstrap.servers': ",".join(bsServers),
        'group.id': group,
        'default.topic.config': {
            'auto.offset.reset': 'earliest'
        }
    })
    thesetopics = [
        tpmat.group(0) for tpmat in [
            pat.match(topic) for pat in topicFilter
            for topic in con.list_topics().topics
        ] if tpmat
    ]
    if thesetopics:
        con.assign(
            [TopicPartition(tp, partition, offset) for tp in thesetopics])

        didAssign = {tpp.topic for tpp in con.assignment()}
        diffAssign = set(thesetopics).difference(didAssign)
        if diffAssign:
            pe_log(
                f"Error, something awry: attempt assign topics to consumer group \'{group}\' did not assign topics: {diffAssign}"
            )

        consumers.append((group, con))
        pi_log(

Exemple #24

0

Afficher le fichier

Fichier : consumer.py Projet : r-glyde/pyka-cli

class ConsoleConsumer:
    def __init__(self, brokers, topic, offset, key_decoder, value_decoder,
                 registry_url, additional_properties):
        config = {
            'bootstrap.servers': brokers,
            'enable.partition.eof': 'true',
            'group.id': 'not-used',
            'auto.offset.reset': 'earliest',
            'enable.auto.commit': 'false'
        }
        self.consumer = Consumer({**additional_properties, **config})
        self.topic = topic
        self.offset = offset.lower()
        self.key_decoder = key_decoder.lower()
        self.value_decoder = value_decoder.lower()
        self.avro_serializer = None
        if registry_url:
            client = CachedSchemaRegistryClient(registry_url)
            self.avro_serializer = MessageSerializer(client)

    def run(self):
        try:
            partition_ends = 0
            total_parts, partitions = self._partitions()
            self.consumer.assign(partitions)
            while True:
                msg = self.consumer.poll(timeout=0.5)
                if msg is None:
                    continue

                if msg.error():
                    if msg.error().code() == KafkaError._PARTITION_EOF:
                        eprint(
                            f'{msg.topic()} reached end of partition [{msg.partition()}] at offset {msg.offset()}'
                        )
                        partition_ends += 1
                        if partition_ends == total_parts:
                            break
                    elif msg.error():
                        raise KafkaException(msg.error())
                else:
                    record = {
                        'key': self._decode(self.key_decoder, msg.key()),
                        'payload': self._decode(self.value_decoder,
                                                msg.value()),
                        'topic': msg.topic(),
                        'partition': msg.partition(),
                        'offset': msg.offset(),
                        'timestamp': msg.timestamp()[1]
                    }
                    print(json.dumps(record))
        finally:
            self.consumer.close()

    def _partitions(self):
        parts = []
        topic_data = self.consumer.list_topics(topic=self.topic)
        total_parts = len(topic_data.topics[self.topic].partitions)
        for i in range(0, total_parts):
            partition = TopicPartition(self.topic, i, offset=OFFSET_BEGINNING)
            if self.offset == 'earliest':
                parts.append(partition)
            else:
                try:
                    start, end = self.consumer.get_watermark_offsets(
                        partition, timeout=0.5)
                    real_offset = int(self.offset)
                    ass_offset = (end + real_offset) if (
                        real_offset < 0) else (start + real_offset)
                    parts.append(
                        TopicPartition(self.topic, i, offset=ass_offset))
                except ValueError:
                    eprint(f"Could not parse offset: {self.offset}")
                    exit(1)
        return total_parts, parts

    def _decode(self, data_type, payload):
        if data_type == "avro":
            return self.avro_serializer.decode_message(payload)
        payload_str = payload.decode('utf-8')
        try:
            return json.loads(payload_str)
        except (JSONDecodeError, TypeError):
            return payload_str

Exemple #25

0

Afficher le fichier

    def get_last_n_messages(
            self, n: int) -> Optional[List[Tuple[datetime.datetime, Dict]]]:
        '''
        Returns the last n published timestamps and messages or None, if no message has been published yet.
        If the configured topic has more than one partition, you will receive more messages than requested
        (at most partitions * n). You might receive less messages than requested, if the broker has cleared messages.

        :return: List of tuples with timestamp and message or None if no message has been published yet
        '''

        consumer = Consumer({
            'bootstrap.servers': self.__kafka_bootstrap,
            'group.id': self.__import_id
        })
        partitions = consumer.list_topics(topic=self.__kafka_topic).topics[
            self.__kafka_topic].partitions.keys()
        self.__logger.debug("Found " + str(len(partitions)) +
                            " partition(s) of topic " + self.__kafka_topic)
        num_messages = 0
        topic_partitions = []
        for partition in partitions:
            high_low_offset = consumer.get_watermark_offsets(
                cimpl.TopicPartition(self.__kafka_topic, partition=partition))
            high_offset = high_low_offset[1]
            low_offset = high_low_offset[0]
            available_messages = high_offset - low_offset
            self.__logger.debug("Low/High offset of partition " +
                                str(partition) + " is " + str(low_offset) +
                                "/" + str(high_offset))
            if high_offset > 0:  # Ignore partitions without data
                if available_messages >= n:
                    offset = high_offset - n
                    num_messages += n
                else:
                    offset = low_offset
                    num_messages += available_messages
                partition = cimpl.TopicPartition(self.__kafka_topic,
                                                 partition=partition,
                                                 offset=offset)
                topic_partitions.append(partition)
                self.__logger.debug("Setting offset of partition " +
                                    str(partition))

        if len(topic_partitions) == 0:  # No partition has any data
            return None

        consumer.assign(topic_partitions)
        consumer.commit(offsets=topic_partitions)
        tuples = []
        consumed_messages = 0
        batch_size = 10000
        self.__logger.debug("Consuming last " + str(num_messages) +
                            " message(s)")

        while consumed_messages < num_messages:
            if consumed_messages + batch_size <= num_messages:
                to_consume = batch_size
            else:
                to_consume = num_messages - consumed_messages

            consumed_messages += to_consume
            self.__logger.debug("Consuming batch of " + str(to_consume) +
                                " messages")
            msgs = consumer.consume(num_messages=to_consume, timeout=30)

            for msg in msgs:
                value = json.loads(msg.value())
                if 'time' not in value:
                    self.__logger.warning(
                        "time field missing in message, is someone else using this topic? Ignoring "
                        "message")
                    continue
                if 'value' not in value or not isinstance(
                        value['value'], Dict):
                    self.__logger.warning(
                        "value field missing or malformed in message, is someone else using this topic? "
                        "Ignoring message")
                    continue

                try:
                    date_time = datetime.datetime.strptime(
                        value["time"], "%Y-%m-%dT%H:%M:%SZ")
                except ValueError:
                    self.__logger.warning(
                        "time field not in rfc3339 format, is someone else using this topic? Ignoring "
                        "message")
                    continue
                tuples.append((date_time, value["value"]))

        consumer.close()
        return tuples

Exemple #26

0

Afficher le fichier

Fichier : test_Consumer.py Projet : confluentinc/confluent-kafka-python

def test_basic_api():
    """ Basic API tests, these wont really do anything since there is no
        broker configured. """

    try:
        kc = Consumer()
    except TypeError as e:
        assert str(e) == "expected configuration dict"

    def dummy_commit_cb(err, partitions):
        pass

    kc = Consumer({'group.id': 'test', 'socket.timeout.ms': '100',
                   'session.timeout.ms': 1000,  # Avoid close() blocking too long
                   'on_commit': dummy_commit_cb})

    kc.subscribe(["test"])
    kc.unsubscribe()

    def dummy_assign_revoke(consumer, partitions):
        pass

    kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke)
    kc.unsubscribe()

    msg = kc.poll(timeout=0.001)
    if msg is None:
        print('OK: poll() timeout')
    elif msg.error():
        print('OK: consumer error: %s' % msg.error().str())
    else:
        print('OK: consumed message')

    if msg is not None:
        assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1)

    msglist = kc.consume(num_messages=10, timeout=0.001)
    assert len(msglist) == 0, "expected 0 messages, not %d" % len(msglist)

    with pytest.raises(ValueError) as ex:
        kc.consume(-100)
    assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value)

    with pytest.raises(ValueError) as ex:
        kc.consume(1000001)
    assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value)

    partitions = list(map(lambda part: TopicPartition("test", part), range(0, 100, 3)))
    kc.assign(partitions)

    with pytest.raises(KafkaException) as ex:
        kc.seek(TopicPartition("test", 0, 123))
    assert 'Erroneous state' in str(ex.value)

    # Verify assignment
    assignment = kc.assignment()
    assert partitions == assignment

    # Pause partitions
    kc.pause(partitions)

    # Resume partitions
    kc.resume(partitions)

    # Get cached watermarks, should all be invalid.
    lo, hi = kc.get_watermark_offsets(partitions[0], cached=True)
    assert lo == -1001 and hi == -1001
    assert lo == OFFSET_INVALID and hi == OFFSET_INVALID

    # Query broker for watermarks, should raise an exception.
    try:
        lo, hi = kc.get_watermark_offsets(partitions[0], timeout=0.5, cached=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._WAIT_COORD, KafkaError.LEADER_NOT_AVAILABLE),\
            str(e.args([0]))

    kc.unassign()

    kc.commit(asynchronous=True)

    try:
        kc.commit(asynchronous=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET)

    # Get current position, should all be invalid.
    kc.position(partitions)
    assert len([p for p in partitions if p.offset == OFFSET_INVALID]) == len(partitions)

    try:
        kc.committed(partitions, timeout=0.001)
    except KafkaException as e:
        assert e.args[0].code() == KafkaError._TIMED_OUT

    try:
        kc.list_topics(timeout=0.2)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT)

    try:
        kc.list_topics(topic="hi", timeout=0.1)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT)

    kc.close()

Exemple #27

0

Afficher le fichier

class ConfluentKafkaMsgQAPI:
    """
    This class provides API's into interact with Kafka Queue.
    """
    def __init__(self,
                 is_producer=False,
                 is_consumer=False,
                 perform_subscription=False,
                 thread_identifier=None):
        if not is_producer and not is_consumer:
            logging_to_console_and_syslog(
                "ConfluentKafkaMsgQAPI: You need to pick either producer or consumer."
            )
            pass
        self.producer_instance = None
        self.consumer_instance = None
        self.broker_name = None
        self.topic = None
        self.producer_conf = None
        self.consumer_conf = None
        self.is_topic_created = False
        self.perform_subscription = perform_subscription
        self.thread_identifier = thread_identifier
        self.__read_environment_variables()
        if is_producer:
            self.__producer_connect()
        if is_consumer:
            self.__consumer_connect()

    def __read_environment_variables(self):
        """
        This method is used to read the environment variables defined in the OS.
        :return:
        """
        while self.broker_name is None or \
                self.topic is None:
            time.sleep(2)
            logging_to_console_and_syslog(
                "ConfluentKafkaMsgQAPI: "
                "Trying to read the environment variables...")
            self.broker_name = os.getenv("broker_name_key", default=None)
            self.topic = os.getenv("topic_key", default=None)
        logging_to_console_and_syslog(
            "ConfluentKafkaMsgQAPI: broker_name={}".format(self.broker_name))
        logging_to_console_and_syslog("ConfluentKafkaMsgQAPI: topic={}".format(
            self.topic))

    # Optional per-message delivery callback (triggered by poll() or flush())
    # when a message has been successfully delivered or permanently
    # failed delivery (after retries).
    @staticmethod
    def delivery_callback(err, msg):
        if err:
            logging_to_console_and_syslog('%% Message failed delivery: %s\n' %
                                          err)
        else:
            logging_to_console_and_syslog(
                '%% Message delivered to %s [%d] @ %s\n' %
                (msg.topic(), msg.partition(), str(msg.offset())))

    def __producer_connect(self):
        """
        This method tries to connect to the kafka broker based upon the type of kafka.
        :return:
        """
        while self.producer_instance is None:
            try:
                self.producer_conf = {'bootstrap.servers': self.broker_name}
                # Create Producer instance
                self.producer_instance = Producer(**self.producer_conf)
            except:
                print("Exception in user code:")
                print("-" * 60)
                traceback.print_exc(file=sys.stdout)
                print("-" * 60)
                time.sleep(5)
            else:
                logging_to_console_and_syslog(
                    "ConfluentKafkaMsgQAPI: Successfully "
                    "connected to broker_name={}".format(self.broker_name))

    def __consumer_connect(self):
        status = False
        try:
            if self.perform_subscription:
                self.__consumer_connect_to_broker()
                self.__subscribe_to_a_topic()
                # self.__iterate_over_kafka_consumer_instance_messages()
            else:
                self.__consumer_connect_to_kafka_broker_and_to_a_topic()
                # self.__consumer_poll_for_new_messages()
            status = True
        except:
            logging_to_console_and_syslog(
                "{}:Exception occurred while polling for "
                "a message from kafka Queue. {} ".format(
                    self.thread_identifier,
                    sys.exc_info()[0]))

            print("{}:Exception in user code:".format(self.thread_identifier))
            print("-" * 60)
            traceback.print_exc(file=sys.stdout)
            print("-" * 60)
        return status

    def enqueue(self, filename):
        """
        This method tries to post a message to the pre-defined kafka topic.
        :param filename:
        :return status False or True:
        """
        status = False

        if filename is None or len(filename) == 0:
            logging_to_console_and_syslog(
                "ConfluentKafkaMsgQAPI: filename is None or invalid")
            return status
        if self.producer_instance is None:
            logging_to_console_and_syslog(
                "ConfluentKafkaMsgQAPI: instance is None")
            return status

        if not self.is_topic_created:
            try:
                if self.producer_instance.list_topics(self.topic, timeout=1.0):
                    logging_to_console_and_syslog(
                        "Found topic name = {} in the zookeeper.".format(
                            self.topic))
                    self.is_topic_created = True
            except KafkaException:
                self.kafka_admin_client = admin.AdminClient(self.producer_conf)
                logging_to_console_and_syslog("Creating topic {}.".format(
                    self.topic))
                ret = self.kafka_admin_client.create_topics(
                    new_topics=[
                        admin.NewTopic(topic=self.topic, num_partitions=1)
                    ],
                    operation_timeout=1.0)
                logging_to_console_and_syslog("ret = {}".format(ret))

        # Asynchronously produce a message, the delivery report callback
        # will be triggered from poll() above, or flush() below, when the message has
        # been successfully delivered or failed permanently.
        logging_to_console_and_syslog(
            "ConfluentKafkaMsgQAPI: Posting filename={} into "
            "kafka broker={}, topic={}".format(filename, self.broker_name,
                                               self.topic))
        value = filename.encode('utf-8')
        try:
            # Produce line (without newline)
            self.producer_instance.produce(
                self.topic,
                value,
                callback=ConfluentKafkaMsgQAPI.delivery_callback)
            status = True
        except BufferError:
            sys.stderr.write('%% Local producer queue is full '
                             '(%d messages awaiting delivery): try again\n' %
                             len(self.producer_instance))
            status = False
        except:
            print("ConfluentKafkaMsgQAPI: Exception in user code:")
            print("-" * 60)
            traceback.print_exc(file=sys.stdout)
            print("-" * 60)
            status = False
        else:
            event = "ConfluentKafkaMsgQAPI: Posting filename={} into " \
                    "kafka broker={}, topic={}." \
                .format(filename,
                        self.broker_name,
                        self.topic)
            logging_to_console_and_syslog(event)
            # Wait for any outstanding messages to be delivered and delivery report
            # callbacks to be triggered.
            # Serve delivery callback queue.
            # NOTE: Since produce() is an asynchronous API this poll() call
            #       will most likely not serve the delivery callback for the
            #       last produce()d message.
            self.producer_instance.poll(timeout=0.1)
            # Wait until all messages have been delivered
            # sys.stderr.write('%% Waiting for %d deliveries\n' % len(self.producer_instance))
            self.producer_instance.flush(timeout=0.1)

            return status

    def __consumer_connect_to_kafka_broker_and_to_a_topic(self):
        """
        This method tries to connect to the kafka broker.
        :return:
        """
        pass

    def __consumer_poll_for_new_messages(self):

        logging_to_console_and_syslog(
            "{}: Polling the kafka consumer instance for "
            "new messages in the topic {}.".format(self.thread_identifier,
                                                   self.topic))
        # Read messages from Kafka, print to stdout
        try:
            while True:
                msg = self.consumer_instance.poll(timeout=1.0)
                if msg is None:
                    continue
                if msg.error():
                    raise KafkaException(msg.error())
                else:
                    # Proper message
                    sys.stderr.write('%% %s [%d] at offset %d with key %s:\n' %
                                     (msg.topic(), msg.partition(),
                                      msg.offset(), str(msg.key())))
                    print(msg.value())

        except KeyboardInterrupt:
            sys.stderr.write('%% Aborted by user\n')

        finally:
            # Close down consumer to commit final offsets.
            self.consumer_instance.close()
        """
        msg = self.consumer_instance.poll(timeout=5.0)
        if msg is None:
            return None

        if msg.error():
            raise KafkaException(msg.error())
        else:
            logging_to_console_and_syslog("msg = {}".format(msg))

            logging_to_console_and_syslog('Consumer:{}: Rcvd msg %% %s [%d] at offset %d with key %s: value : %s\n'
                                          .format(self.thread_identifier,
                                                  msg.topic(),
                                                  msg.partition(),
                                                  msg.offset(),
                                                  str(msg.key()),
                                                  str(msg.value()))
                                          )
        return msg.value()
        """
        return None

    def __consumer_connect_to_broker(self):
        """
        This method tries to connect to the kafka broker.
        :return:
        """
        if self.consumer_instance:
            return

        # Consumer configuration
        # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
        """
            self.consumer_conf = {'bootstrap.servers': self.broker_name,
                              'group.id': 'kafka-consumer{}'.format(self.thread_identifier),
                              'session.timeout.ms': 6000,
                              'auto.offset.reset': 'earliest'}
        """
        consumer_conf = {
            'bootstrap.servers': self.broker_name,
            'group.id': 'group',
            'session.timeout.ms': 6000,
            'auto.offset.reset': 'earliest'
        }
        consumer_conf['stats_cb'] = stats_cb
        consumer_conf['statistics.interval.ms'] = 0

        # Create logger for consumer (logs will be emitted when poll() is called)
        logger = logging.getLogger('consumer')
        logger.setLevel(logging.DEBUG)
        handler = logging.StreamHandler()
        handler.setFormatter(
            logging.Formatter('%(asctime)-15s %(levelname)-8s %(message)s'))
        logger.addHandler(handler)

        while self.consumer_instance is None:
            try:

                logging_to_console_and_syslog(
                    "Consumer:{}:Trying to connect to broker_name={}".format(
                        self.thread_identifier, self.broker_name))
                # Create Consumer instance
                # Hint: try debug='fetch' to generate some log messages
                self.consumer_instance = Consumer(consumer_conf, logger=logger)
            except:
                logging_to_console_and_syslog(
                    "Consumer:{}:Exception in user code:".format(
                        self.thread_identifier))
                logging_to_console_and_syslog("-" * 60)
                traceback.print_exc(file=sys.stdout)
                logging_to_console_and_syslog("-" * 60)
                time.sleep(5)

        logging_to_console_and_syslog("Consumer:{}:Consumer Successfully "
                                      "connected to broker_name={}".format(
                                          self.thread_identifier,
                                          self.broker_name))

    @staticmethod
    def print_assignment(consumer, partitions):
        logging_to_console_and_syslog('consumer = {}, Assignment {}:',
                                      repr(consumer), partitions)

    def __subscribe_to_a_topic(self):
        try:
            # Subscribe to topics
            cluster_meta_data = self.consumer_instance.list_topics(self.topic,
                                                                   timeout=0.3)
            logging_to_console_and_syslog("ClusterMetaData={}".format(
                repr(cluster_meta_data)))
            if self.topic not in cluster_meta_data.topics.keys():
                logging_to_console_and_syslog(
                    "Topic {} is "
                    "not found in the ClusterMetaData {}".format(
                        self.topic, repr(cluster_meta_data.topics.keys())))
                raise KafkaException

            def print_assignment(consumer, partitions):
                print('Assignment:', partitions)

            # Subscribe to topics
            self.consumer_instance.subscribe(self.topics,
                                             on_assign=print_assignment)
            """
            self.consumer_instance.subscribe(self.topic,
                                             on_assign=ConfluentKafkaMsgQAPI.print_assignment)
            """
        except:
            logging_to_console_and_syslog(
                "Consumer:{}: Subscribed to topic {}.".format(
                    self.thread_identifier, self.topic))
        return True

    def __iterate_over_kafka_consumer_instance_messages(self):
        """
        logging_to_console_and_syslog("Consumer:{}: dequeue {}."
                                      .format(self.thread_identifier,
                                           self.topic))
        """
        pass

    def dequeue(self):
        try:
            if self.perform_subscription:
                # logging_to_console_and_syslog("{}:Perform __consumer_poll_for_new_messages."
                #                              .format(self.thread_identifier))
                return self.__consumer_poll_for_new_messages()
            else:
                # logging_to_console_and_syslog("{}:Perform __iterate_over_kafka_consumer_instance_messages."
                #                             .format(self.thread_identifier))
                return self.__iterate_over_kafka_consumer_instance_messages()

        except:
            logging_to_console_and_syslog(
                "ConfluentKafkaMsgQAPI:Exception occurred while polling for "
                "a message from kafka Queue. {} ".format(sys.exc_info()[0]))

            logging_to_console_and_syslog(
                "ConfluentKafkaMsgQAPI:Exception in user code:")
            logging_to_console_and_syslog("-" * 60)
            traceback.print_exc(file=sys.stdout)
            logging_to_console_and_syslog("-" * 60)

        return None

    def cleanup(self):
        pass

Exemple #28

0

Afficher le fichier

Fichier : test_Consumer.py Projet : z0u/confluent-kafka-python

def test_basic_api():
    """ Basic API tests, these wont really do anything since there is no
        broker configured. """

    try:
        kc = Consumer()
    except TypeError as e:
        assert str(e) == "expected configuration dict"

    def dummy_commit_cb(err, partitions):
        pass

    kc = Consumer({
        'group.id': 'test',
        'socket.timeout.ms': '100',
        'session.timeout.ms': 1000,  # Avoid close() blocking too long
        'on_commit': dummy_commit_cb
    })

    kc.subscribe(["test"])
    kc.unsubscribe()

    def dummy_assign_revoke(consumer, partitions):
        pass

    kc.subscribe(["test"],
                 on_assign=dummy_assign_revoke,
                 on_revoke=dummy_assign_revoke)
    kc.unsubscribe()

    msg = kc.poll(timeout=0.001)
    if msg is None:
        print('OK: poll() timeout')
    elif msg.error():
        print('OK: consumer error: %s' % msg.error().str())
    else:
        print('OK: consumed message')

    if msg is not None:
        assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1)

    msglist = kc.consume(num_messages=10, timeout=0.001)
    assert len(msglist) == 0, "expected 0 messages, not %d" % len(msglist)

    with pytest.raises(ValueError) as ex:
        kc.consume(-100)
    assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value)

    with pytest.raises(ValueError) as ex:
        kc.consume(1000001)
    assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value)

    partitions = list(
        map(lambda part: TopicPartition("test", part), range(0, 100, 3)))
    kc.assign(partitions)

    with pytest.raises(KafkaException) as ex:
        kc.seek(TopicPartition("test", 0, 123))
    assert 'Erroneous state' in str(ex.value)

    # Verify assignment
    assignment = kc.assignment()
    assert partitions == assignment

    # Pause partitions
    kc.pause(partitions)

    # Resume partitions
    kc.resume(partitions)

    # Get cached watermarks, should all be invalid.
    lo, hi = kc.get_watermark_offsets(partitions[0], cached=True)
    assert lo == -1001 and hi == -1001
    assert lo == OFFSET_INVALID and hi == OFFSET_INVALID

    # Query broker for watermarks, should raise an exception.
    try:
        lo, hi = kc.get_watermark_offsets(partitions[0],
                                          timeout=0.5,
                                          cached=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._WAIT_COORD, KafkaError.LEADER_NOT_AVAILABLE),\
            str(e.args([0]))

    kc.unassign()

    kc.commit(asynchronous=True)

    try:
        kc.commit(asynchronous=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT,
                                    KafkaError._NO_OFFSET)

    # Get current position, should all be invalid.
    kc.position(partitions)
    assert len([p for p in partitions
                if p.offset == OFFSET_INVALID]) == len(partitions)

    try:
        kc.committed(partitions, timeout=0.001)
    except KafkaException as e:
        assert e.args[0].code() == KafkaError._TIMED_OUT

    try:
        kc.list_topics(timeout=0.2)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT,
                                    KafkaError._TRANSPORT)

    try:
        kc.list_topics(topic="hi", timeout=0.1)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT,
                                    KafkaError._TRANSPORT)

    kc.close()

Exemple #29

0

Afficher le fichier

        if check and arr[0] >= arr[1] // 2:
            fw.write(f"{line}")

        line = f.readline()

    f.close()
    fw.close()

consumer = Consumer({
    'bootstrap.servers': config['DEFAULT']['KafkaServer'],
    'group.id': 'mygroup',
    'client.id': 'client-1',
    'enable.auto.commit': True,
    'session.timeout.ms': 6000,
    'default.topic.config': {
        'auto.offset.reset': 'smallest'
    },
})
admin_client = AdminClient(
    {'bootstrap.servers': config["DEFAULT"]["KafkaServer"]})
clusterMetaData = consumer.list_topics()

# gather topic name
topics_delete = []
for key in clusterMetaData.topics:
    if topic_header in key:
        topics_delete.append(key)

# delete finished topics
admin_client.delete_topics(topics_delete)

Exemple #30

0

Afficher le fichier

Fichier : magnitude_correction_consumer.py Projet : Evantastic/distribuidos_12020_pocs

def list_topics(c: Consumer, filter_by="ztf"):
    topics = c.list_topics().topics.keys()
    return list(filter(lambda x: filter_by in x, list(topics)))

Exemple #31

0

Afficher le fichier

Fichier : kafka.py Projet : real-digital/esque

class KafkaHandler(BaseHandler[KafkaHandlerConfig]):
    config_cls = KafkaHandlerConfig
    _eof_reached: Dict[int, bool]
    OFFSET_AT_FIRST_MESSAGE = OFFSET_BEGINNING
    OFFSET_AFTER_LAST_MESSAGE = OFFSET_END

    # hopefully this number won't get assigned any semantics by the Kafka Devs any time soon
    OFFSET_AT_LAST_MESSAGE = -101

    def __init__(self, config: KafkaHandlerConfig):
        super().__init__(config)
        self._assignment_created = False
        self._seek = OFFSET_BEGINNING
        self._high_watermarks: Dict[int, int] = {}
        self._consumer: Optional[Consumer] = None
        self._producer: Optional[Producer] = None
        self._errors: List[KafkaError] = []

    def _get_producer(self) -> Producer:
        if self._producer is not None:
            return self._producer

        config_instance = esque_config.Config()
        with config_instance.temporary_context(self.config.esque_context):
            self._producer = Producer(
                config_instance.create_confluent_config(
                    include_schema_registry=False))
        return self._producer

    def _get_consumer(self) -> Consumer:
        if self._consumer is not None:
            return self._consumer

        config_instance = esque_config.Config()
        with config_instance.temporary_context(self.config.esque_context):
            group_id = self.config.consumer_group_id
            self._consumer = Consumer({
                "group.id":
                group_id,
                "enable.partition.eof":
                True,
                "enable.auto.commit":
                False,
                **config_instance.create_confluent_config(include_schema_registry=False),
            })

        topic_metadata: TopicMetadata = self._consumer.list_topics(
            self.config.topic_name).topics[self.config.topic_name]
        if topic_metadata.error is not None:
            raise EsqueIOHandlerReadException(
                f"Topic {self.config.topic_name!r} not found.")

        self._eof_reached = {
            partition_id: False
            for partition_id in topic_metadata.partitions.keys()
        }
        for partition_id in topic_metadata.partitions.keys():
            self._high_watermarks[
                partition_id] = self._consumer.get_watermark_offsets(
                    TopicPartition(topic=self.config.topic_name,
                                   partition=partition_id))[1]

        return self._consumer

    def get_serializer_configs(self) -> Tuple[Dict[str, Any], Dict[str, Any]]:
        raise EsqueIOSerializerConfigNotSupported

    def put_serializer_configs(
            self, config: Tuple[Dict[str, Any], Dict[str, Any]]) -> None:
        raise EsqueIOSerializerConfigNotSupported

    def write_message(
            self, binary_message: Union[BinaryMessage, StreamEvent]) -> None:
        self._produce_single_message(binary_message=binary_message)
        self._flush()

    def write_many_messages(
            self, message_stream: Iterable[Union[BinaryMessage,
                                                 StreamEvent]]) -> None:
        for binary_message in message_stream:
            self._produce_single_message(binary_message=binary_message)
        self._flush()

    def _produce_single_message(self, binary_message: BinaryMessage) -> None:
        if isinstance(binary_message, StreamEvent):
            return
        partition_arg = {}
        partition = self._io_to_confluent_partition(binary_message.partition)
        if partition is not None:
            partition_arg["partition"] = partition
        self._get_producer().produce(
            topic=self.config.topic_name,
            value=binary_message.value,
            key=binary_message.key,
            headers=self._io_to_confluent_headers(binary_message.headers),
            timestamp=self._io_to_confluent_timestamp(
                binary_message.timestamp),
            on_delivery=self._delivery_callback,
            **partition_arg,
        )

    def _delivery_callback(self, err: Optional[KafkaError], msg: str):
        if err is None:
            return
        self._errors.append(err)

    def _flush(self):
        self._get_producer().flush()
        if self._errors:
            exception = EsqueIOHandlerWriteException(
                "The following exception(s) occurred while writing to Kafka:\n  "
                + "\n  ".join(map(str, self._errors)))
            self._errors.clear()
            raise exception

    @staticmethod
    def _io_to_confluent_partition(partition: int) -> Optional[int]:
        # TODO: introduce something like the config.send_timestamp flag to make it possible to always return None here.
        #  This would allow for moving messages between topics with different amounts of partitions without making them
        #  unbalanced.
        if partition < 0:
            return None
        return partition

    def _io_to_confluent_timestamp(self, message_ts: datetime.datetime):
        return int(message_ts.timestamp() *
                   1000) if self.config.send_timestamp else 0

    @staticmethod
    def _io_to_confluent_headers(
        headers: List[MessageHeader]
    ) -> Optional[List[Tuple[str, Optional[bytes]]]]:
        if not headers:
            return None
        confluent_headers: List[Tuple[str, Optional[bytes]]] = []
        for header in headers:
            key = header.key
            if header.value is not None:
                value = header.value.encode("utf-8")
            else:
                value = None
            confluent_headers.append((key, value))
        return confluent_headers

    def read_message(self) -> Union[BinaryMessage, StreamEvent]:
        if not self._assignment_created:
            self._assign()

        consumed_message: Optional[Message] = None
        while consumed_message is None:
            consumed_message = self._get_consumer().poll(timeout=0.1)
            if consumed_message is None and all(self._eof_reached.values()):
                return TemporaryEndOfPartition(
                    "Reached end of all partitions",
                    partition=EndOfStream.ALL_PARTITIONS)
        # TODO: process other error cases (connection issues etc.)
        if consumed_message.error() is not None and consumed_message.error(
        ).code() == KafkaError._PARTITION_EOF:
            self._eof_reached[consumed_message.partition()] = True
            return TemporaryEndOfPartition(
                "Reached end of partition",
                partition=consumed_message.partition())
        else:
            self._eof_reached[consumed_message.partition()] = False

            binary_message = self._confluent_to_binary_message(
                consumed_message)

            return binary_message

    def _confluent_to_binary_message(
            self, consumed_message: Message) -> BinaryMessage:
        binary_message = BinaryMessage(
            key=consumed_message.key(),
            value=consumed_message.value(),
            partition=consumed_message.partition(),
            offset=consumed_message.offset(),
            timestamp=self._confluent_to_io_timestamp(consumed_message),
            headers=self._confluent_to_io_headers(consumed_message.headers()),
        )
        return binary_message

    @staticmethod
    def _confluent_to_io_timestamp(
            consumed_message: Message) -> datetime.datetime:
        return datetime.datetime.fromtimestamp(
            consumed_message.timestamp()[1] / 1000, tz=datetime.timezone.utc)

    @staticmethod
    def _confluent_to_io_headers(
        confluent_headers: Optional[List[Tuple[str, Optional[bytes]]]]
    ) -> List[MessageHeader]:
        io_headers: List[MessageHeader] = []

        if confluent_headers is None:
            return io_headers

        for confluent_header in confluent_headers:
            key, value = confluent_header
            if value is not None:
                value = value.decode("utf-8")
            io_headers.append(MessageHeader(key, value))

        return io_headers

    def message_stream(self) -> Iterable[Union[BinaryMessage, StreamEvent]]:
        while True:
            yield self.read_message()

    def seek(self, position: int) -> None:
        self._seek = position

    def _assign(self) -> None:
        self._assignment_created = True
        if self._seek == self.OFFSET_AT_LAST_MESSAGE:
            self._get_consumer().assign([
                TopicPartition(topic=self.config.topic_name,
                               partition=partition_id,
                               offset=high_watermark - 1) for partition_id,
                high_watermark in self._high_watermarks.items()
            ])
        else:
            self._get_consumer().assign([
                TopicPartition(topic=self.config.topic_name,
                               partition=partition_id,
                               offset=self._seek)
                for partition_id in self._eof_reached.keys()
            ])

    def close(self) -> None:
        if self._consumer is not None:
            self._consumer.close()
            self._consumer = None
        if self._producer is not None:
            self._producer.flush()
            self._producer = None