Exemple #1
0
 def __init__(self, kafka_host, zk_host, log):
     self.log = log
     try:
         self.client = KafkaClient(hosts=kafka_host, zookeeper_hosts=zk_host)
     except Exception, e:
         self.log.error("consumer connect kafka failed:%s\n%s" % (e.message, traceback.format_exc()))
         exit(1)
Exemple #2
0
 def __init__(self, url: str, ca_file: str, cert_file: str, key_file: str):
     config = SslConfig(ca_file, cert_file,
                        key_file) if ca_file is not None else None
     self.url = url
     self.config = config
     self.client = KafkaClient(hosts=self.url, ssl_config=self.config)
     self.__producers = {}
Exemple #3
0
 def __init__(self, host, log):
     self.log = log
     try:
         self.client = KafkaClient(hosts=host)
     except Exception, e:
         self.log.error("producer connect kafka failed:%s\n%s" %
                        (e.message, traceback.format_exc()))
         exit(1)
Exemple #4
0
 def __init__(self, settings, stats):
     from pykafka.client import KafkaClient
     self.stats = stats
     self.settings = settings
     self.encoder = ScrapyJSONEncoder()
     self.kafka = KafkaClient(hosts=self.settings.get('KAFKA_HOST') + ":" +
                              str(self.settings.get('KAFKA_PORT')))
     self.producer = self.kafka.topics[
         self.settings['KAFKA_TOPIC']].get_sync_producer(
             min_queued_messages=1)
Exemple #5
0
def connect_to_kafka_client():
    kafka_client_address = os.getenv("KAFKA_ADVERTISED_HOST_NAME")
    kafka_port = os.getenv("KAFKA_PORT")
    client_address = kafka_client_address + ":" + kafka_port
    while True:
        time.sleep(5)
        try:
            client = KafkaClient(hosts=client_address)
            print("Connected to Kafka!")
            return client
        except NoBrokersAvailableError:
            print("Failed to connect to kafka")
Exemple #6
0
    def subscribe(self,
                  listener: BrokerSubscriber,
                  topic_name: str,
                  consumer_group=None):

        topic = self.client.topics[topic_name]

        while True:
            try:
                prvk8_logger.info(
                    f'Broker.subscribe: subscribing to {topic_name}@{self.url}'
                )
                consumer = topic.get_balanced_consumer(
                    consumer_group=consumer_group, managed=True)
                #                 consumer = topic.get_simple_consumer(consumer_group=consumer_group)
                for message in consumer:
                    if message is not None:
                        try:
                            obj = None
                            body = None
                            try:
                                obj = json.loads(message.value)
                            except ValueError as e:
                                body = message.value.decode()
                                pass

                            #process message
                            if listener.on_message(message.offset, obj, body):
                                #                                 prvk8_logger.debug(f'Broker.subscribe: dispatching message {body}')
                                #commit the offset only if the processing of the message completes with success
                                if consumer_group is not None:
                                    consumer.commit_offsets()

                        except Exception as e:
                            prvk8_logger.critical(
                                f"Broker.subscribe: unexpected error reading message: OFFSET: {message.offset} VALUE: {message.value}"
                            )
                            prvk8_logger.exception('', exc_info=e)

            except (KafkaException) as e:
                traceback.print_exc()
                prvk8_logger.warning(
                    f'Broker.subscribe: connection to {topic_name}@{self.url} lost... trying to reconnect in 5 seconds'
                )
                time.sleep(5)

                self.close()
                self.client = KafkaClient(hosts=self.url,
                                          ssl_config=self.config)
                topic = self.client.topics[topic_name]
    def consume(self, max_consume_count: int):
        start_time = int(time.time())

        client = KafkaClient(hosts=self.kafka_address)
        reset_offset_on_start_status = False
        topic = client.topics[self.topic_name.encode()]
        partitions = topic.partitions

        if self.offset_type.lower() == 'earliest':
            start_offset = OffsetType.EARLIEST
        elif self.offset_type.lower() == 'latest':
            start_offset = OffsetType.LATEST
        else:
            offsets = topic.fetch_offset_limits(int(self.offset_type))
            if len(offsets[self.partition_id].offset) == 0:
                start_offset = OffsetType.LATEST
            else:
                start_offset = offsets[self.partition_id].offset[0]
                reset_offset_on_start_status = True

        self.consumer = topic.get_simple_consumer(consumer_group=self.group_id, 
                                             partitions={partitions.get(self.partition_id)},
                                             consumer_timeout_ms=self.consumer_timeout_ms,
                                             auto_commit_enable=False,
                                             auto_offset_reset=start_offset,
                                             reset_offset_on_start=reset_offset_on_start_status,
                                            )

        try:
            while True:
                msg = self.consumer.consume()
                if msg:
                    self.msg_consumed_count += 1
                    self.sio.write(str(msg.value, encoding = "utf-8"))
                    self.sio.write('\n')
                if msg is None:
                    logger.info("already reach kafka consumer timeout, should be no msg")
                    break
                if self.msg_consumed_count >= max_consume_count:
                    logger.info("already reach max_consume_count:[%d], consume next time", max_consume_count)
                    break

            logger.info("consumer success, consume msg num: [%d], cost time: [%ds]", self.msg_consumed_count, int(time.time()) - start_time)
        except ConsumerStoppedException as e:
            logger.info("consumer fail, cost time: [%ds], error msg:[%s]", int(time.time()) - int(start_time), e) 
        finally:
            # file write会偏移pos,导致无法读取数据
            self.sio.seek(0)
Exemple #8
0
    def worker(self):
        local_path = '/tmp/local_file.txt'
        # local_path = os.getcwd() + '/local_file.txt'
        if os.path.exists(local_path):
            os.remove(local_path)
        os.mknod(local_path)
        f = open(local_path, 'w')
        max_to_cos_time = self.calculation_max_to_cos_time()
        start_time = int(time.time())
        logger.info("start time:%s", str(start_time))
        client = KafkaClient(hosts=self.kafka_address)
        msg_consumed_count = 0
        reset_offset_on_start_status = False
        topic = client.topics[self.topic_name.encode()]
        partitions = topic.partitions

        if self.offset_type.lower() == 'earliest':
            start_offset = OffsetType.EARLIEST
        elif self.offset_type.lower() == 'latest':
            start_offset = OffsetType.LATEST
        else:
            offsets = topic.fetch_offset_limits(int(self.offset_type))
            if len(offsets[self.partition_id].offset) == 0:
                start_offset = OffsetType.LATEST
            else:
                start_offset = offsets[self.partition_id].offset[0]
                reset_offset_on_start_status = True

        consumer = topic.get_simple_consumer(consumer_group=self.group_id,
                                             partitions={partitions.get(self.partition_id)},
                                             consumer_timeout_ms=self.consumer_timeout_ms,
                                             auto_commit_enable=False,
                                             auto_offset_reset=start_offset,
                                             reset_offset_on_start=reset_offset_on_start_status,
                                             )

        try:
            while True:
                msg = consumer.consume()
                if msg:
                    msg_consumed_count += 1
                    f.write(msg.value)
                    f.write("\n")
                if os.path.getsize(local_path) >= self.partition_max_to_cos_bytes:
                    logger.info("already reach partition_max_to_cos_bytes, file length: %s",
                                str(os.path.getsize(local_path)))
                    status = self.upload_local_file(local_path)
                    if status is False:
                        print("partition_max_to_cos_bytes failed to cos  time:" + str(int(time.time())))
                        return "partition_max_to_cos_bytes failed to cos"
                    consumer.commit_offsets()
                    f.seek(0)
                    f.truncate()
                if int(time.time()) - start_time >= self.partition_max_timeout_ms / 1000 - max_to_cos_time:
                    logger.info("already reach partition_max_timeout, cost time: %s",
                                str(int(time.time()) - start_time))
                    break
                if msg is None:
                    logger.info("already reach kafka consumer timeout, cost_time: %s",
                                str(int(time.time()) - start_time))
                    break

            f.close()
            logger.info("consumer finished, cost time: %s", str(int(time.time()) - start_time))
            logger.info("msg num: %s", str(msg_consumed_count))
            if msg_consumed_count > 0:
                status = self.upload_local_file(local_path)
                if status is False:
                    logger.error("failed to cos  time: %s", str(int(time.time())))
                    return "failed to cos"
            consumer.commit_offsets()
            consumer.stop()
            self.delete_local_file(local_path)
            logger.info("end time:%s", str(int(time.time())))
            return "success"
        except ConsumerStoppedException as err:
            logger.error("error:", str(err))
            logger.error("KafkaError failed consumer cost time: %s", str(int(time.time()) - int(start_time)))
            return "failed"
Exemple #9
0
# coding:utf-8

from pykafka.client import KafkaClient
import logging
from pykafka.protocol import PartitionOffsetFetchRequest

logging.basicConfig(level = logging.INFO)

offset_check_logger = logging.getLogger('offset_check')

client = KafkaClient('localhost:8990,localhost:8991,localhost:8992')

nmq = client.topics['nmq']

offsets = nmq.latest_available_offsets()

offset_check_logger.info('消息总量如下:')

for partition, item in offsets.iteritems():
    offset_check_logger.info('[partition={} offset={}]'.format(partition, item.offset[0]))
    
partitions = offsets.keys()
    
offset_check_logger.info('消息读取量如下:')

offset_manager = client.cluster.get_offset_manager('balance-consumer')

requests = [PartitionOffsetFetchRequest(topic_name = 'nmq', partition_id = part_id) for part_id in partitions]

response = offset_manager.fetch_consumer_group_offsets('balance-consumer', requests)
Exemple #10
0
from pykafka.common import OffsetType

import json
import urllib
import threading
import sys
from kazoo.client import KazooClient
from time import sleep
from _socket import gethostname

logging.basicConfig(level=logging.INFO)

consumer_logger = logging.getLogger('consumer')

# 2, 连接kafka集群
client = KafkaClient('49.4.90.247:6667')

nmq = client.topics['nmq']

consumer = nmq.get_(
    'balance-consumer',
    zookeeper_connect='localhost:3000,localhost:3001,localhost:3002/kafka',
    auto_offset_reset=OffsetType.LATEST,
    auto_commit_enable=True,
    num_consumer_fetchers=3)


# 3, 启动HTTP服务
def httpd_main(consumer):
    class ResetOffsetRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler):
        def __init__(self, request, client_addr, server):
Exemple #11
0
    def worker(self):
        local_path = '/tmp/local_file.txt'
        # local_path = os.getcwd() + '/local_file.txt'
        if os.path.exists(local_path):
            os.remove(local_path)
        os.mknod(local_path)
        f = open(local_path, 'w')
        max_to_cos_time = self.calculation_max_to_cos_time()
        start_time = int(time.time())
        logger.info("start time:%s", str(start_time))
        client = KafkaClient(hosts=self.kafka_address)
        msg_consumed_count = 0
        reset_offset_on_start_status = False
        topic = client.topics[self.topic_name.encode()]
        partitions = topic.partitions

        if self.offset_type.lower() == 'earliest':
            start_offset = OffsetType.EARLIEST
        elif self.offset_type.lower() == 'latest':
            start_offset = OffsetType.LATEST
        else:
            # 引用kafka库,解决pykafka fetch_offset_limits函数不能正确根据timestamp返回offset的问题
            start_offset = OffsetType.LATEST
            consumer = KafkaConsumer(self.topic_name,
                                     group_id=self.group_id,
                                     bootstrap_servers=[self.kafka_address])
            tp = TopicPartition(self.topic_name, self.partition_id)
            offsets = consumer.offsets_for_times({tp: int(self.offset_type)})
            if offsets[tp]:
                if offsets[tp].offset == 0:
                    start_offset = OffsetType.EARLIEST
                else:
                    committed = consumer._coordinator.fetch_committed_offsets(
                        [tp])
                    if not committed or (committed[tp] and committed[tp].offset
                                         < offsets[tp].offset):
                        start_offset = offsets[tp].offset - 1
                        reset_offset_on_start_status = True

        logger.info("consumer start offset on partition {} is {}".format(
            self.partition_id, start_offset))

        consumer = topic.get_simple_consumer(
            consumer_group=self.group_id,
            partitions={partitions.get(self.partition_id)},
            consumer_timeout_ms=self.consumer_timeout_ms,
            auto_commit_enable=False,
            auto_offset_reset=start_offset,
            reset_offset_on_start=reset_offset_on_start_status,
        )

        try:
            while True:
                msg = consumer.consume()
                if msg:
                    msg_consumed_count += 1
                    f.write(msg.value)
                    f.write("\n")
                if os.path.getsize(
                        local_path) >= self.partition_max_to_cos_bytes:
                    logger.info(
                        "already reach partition_max_to_cos_bytes, file length: %s",
                        str(os.path.getsize(local_path)))
                    status = self.upload_local_file(local_path)
                    if status is False:
                        print(
                            "partition_max_to_cos_bytes failed to cos  time:" +
                            str(int(time.time())))
                        return "partition_max_to_cos_bytes failed to cos"
                    consumer.commit_offsets()
                    f.seek(0)
                    f.truncate()
                if int(
                        time.time()
                ) - start_time >= self.partition_max_timeout_ms / 1000 - max_to_cos_time:
                    logger.info(
                        "already reach partition_max_timeout, cost time: %s",
                        str(int(time.time()) - start_time))
                    break
                if msg is None:
                    logger.info(
                        "already reach kafka consumer timeout, cost_time: %s",
                        str(int(time.time()) - start_time))
                    break

            f.close()
            logger.info("consumer finished, cost time: %s",
                        str(int(time.time()) - start_time))
            logger.info("msg num: %s", str(msg_consumed_count))
            if msg_consumed_count > 0:
                status = self.upload_local_file(local_path)
                if status is False:
                    logger.error("failed to cos  time: %s",
                                 str(int(time.time())))
                    return "failed to cos"
            consumer.commit_offsets()
            consumer.stop()
            self.delete_local_file(local_path)
            logger.info("end time:%s", str(int(time.time())))
            return "success"
        except ConsumerStoppedException as err:
            logger.error("error:", str(err))
            logger.error("KafkaError failed consumer cost time: %s",
                         str(int(time.time()) - int(start_time)))
            return "failed"
Exemple #12
0
 def client(self) -> KafkaClient:
     if self._client is None:
         self._client = KafkaClient(hosts=self._config.kafka_hosts,
                                    ssl_config=self.ssl_config)
     return self._client
Exemple #13
0
 def __init__(self):
     self.client = KafkaClient(KAFKA_CONFIG["hosts"])