def __init__(self, kafka_host, zk_host, log): self.log = log try: self.client = KafkaClient(hosts=kafka_host, zookeeper_hosts=zk_host) except Exception, e: self.log.error("consumer connect kafka failed:%s\n%s" % (e.message, traceback.format_exc())) exit(1)
def __init__(self, url: str, ca_file: str, cert_file: str, key_file: str): config = SslConfig(ca_file, cert_file, key_file) if ca_file is not None else None self.url = url self.config = config self.client = KafkaClient(hosts=self.url, ssl_config=self.config) self.__producers = {}
def __init__(self, host, log): self.log = log try: self.client = KafkaClient(hosts=host) except Exception, e: self.log.error("producer connect kafka failed:%s\n%s" % (e.message, traceback.format_exc())) exit(1)
def __init__(self, settings, stats): from pykafka.client import KafkaClient self.stats = stats self.settings = settings self.encoder = ScrapyJSONEncoder() self.kafka = KafkaClient(hosts=self.settings.get('KAFKA_HOST') + ":" + str(self.settings.get('KAFKA_PORT'))) self.producer = self.kafka.topics[ self.settings['KAFKA_TOPIC']].get_sync_producer( min_queued_messages=1)
def connect_to_kafka_client(): kafka_client_address = os.getenv("KAFKA_ADVERTISED_HOST_NAME") kafka_port = os.getenv("KAFKA_PORT") client_address = kafka_client_address + ":" + kafka_port while True: time.sleep(5) try: client = KafkaClient(hosts=client_address) print("Connected to Kafka!") return client except NoBrokersAvailableError: print("Failed to connect to kafka")
def subscribe(self, listener: BrokerSubscriber, topic_name: str, consumer_group=None): topic = self.client.topics[topic_name] while True: try: prvk8_logger.info( f'Broker.subscribe: subscribing to {topic_name}@{self.url}' ) consumer = topic.get_balanced_consumer( consumer_group=consumer_group, managed=True) # consumer = topic.get_simple_consumer(consumer_group=consumer_group) for message in consumer: if message is not None: try: obj = None body = None try: obj = json.loads(message.value) except ValueError as e: body = message.value.decode() pass #process message if listener.on_message(message.offset, obj, body): # prvk8_logger.debug(f'Broker.subscribe: dispatching message {body}') #commit the offset only if the processing of the message completes with success if consumer_group is not None: consumer.commit_offsets() except Exception as e: prvk8_logger.critical( f"Broker.subscribe: unexpected error reading message: OFFSET: {message.offset} VALUE: {message.value}" ) prvk8_logger.exception('', exc_info=e) except (KafkaException) as e: traceback.print_exc() prvk8_logger.warning( f'Broker.subscribe: connection to {topic_name}@{self.url} lost... trying to reconnect in 5 seconds' ) time.sleep(5) self.close() self.client = KafkaClient(hosts=self.url, ssl_config=self.config) topic = self.client.topics[topic_name]
def consume(self, max_consume_count: int): start_time = int(time.time()) client = KafkaClient(hosts=self.kafka_address) reset_offset_on_start_status = False topic = client.topics[self.topic_name.encode()] partitions = topic.partitions if self.offset_type.lower() == 'earliest': start_offset = OffsetType.EARLIEST elif self.offset_type.lower() == 'latest': start_offset = OffsetType.LATEST else: offsets = topic.fetch_offset_limits(int(self.offset_type)) if len(offsets[self.partition_id].offset) == 0: start_offset = OffsetType.LATEST else: start_offset = offsets[self.partition_id].offset[0] reset_offset_on_start_status = True self.consumer = topic.get_simple_consumer(consumer_group=self.group_id, partitions={partitions.get(self.partition_id)}, consumer_timeout_ms=self.consumer_timeout_ms, auto_commit_enable=False, auto_offset_reset=start_offset, reset_offset_on_start=reset_offset_on_start_status, ) try: while True: msg = self.consumer.consume() if msg: self.msg_consumed_count += 1 self.sio.write(str(msg.value, encoding = "utf-8")) self.sio.write('\n') if msg is None: logger.info("already reach kafka consumer timeout, should be no msg") break if self.msg_consumed_count >= max_consume_count: logger.info("already reach max_consume_count:[%d], consume next time", max_consume_count) break logger.info("consumer success, consume msg num: [%d], cost time: [%ds]", self.msg_consumed_count, int(time.time()) - start_time) except ConsumerStoppedException as e: logger.info("consumer fail, cost time: [%ds], error msg:[%s]", int(time.time()) - int(start_time), e) finally: # file write会偏移pos,导致无法读取数据 self.sio.seek(0)
def worker(self): local_path = '/tmp/local_file.txt' # local_path = os.getcwd() + '/local_file.txt' if os.path.exists(local_path): os.remove(local_path) os.mknod(local_path) f = open(local_path, 'w') max_to_cos_time = self.calculation_max_to_cos_time() start_time = int(time.time()) logger.info("start time:%s", str(start_time)) client = KafkaClient(hosts=self.kafka_address) msg_consumed_count = 0 reset_offset_on_start_status = False topic = client.topics[self.topic_name.encode()] partitions = topic.partitions if self.offset_type.lower() == 'earliest': start_offset = OffsetType.EARLIEST elif self.offset_type.lower() == 'latest': start_offset = OffsetType.LATEST else: offsets = topic.fetch_offset_limits(int(self.offset_type)) if len(offsets[self.partition_id].offset) == 0: start_offset = OffsetType.LATEST else: start_offset = offsets[self.partition_id].offset[0] reset_offset_on_start_status = True consumer = topic.get_simple_consumer(consumer_group=self.group_id, partitions={partitions.get(self.partition_id)}, consumer_timeout_ms=self.consumer_timeout_ms, auto_commit_enable=False, auto_offset_reset=start_offset, reset_offset_on_start=reset_offset_on_start_status, ) try: while True: msg = consumer.consume() if msg: msg_consumed_count += 1 f.write(msg.value) f.write("\n") if os.path.getsize(local_path) >= self.partition_max_to_cos_bytes: logger.info("already reach partition_max_to_cos_bytes, file length: %s", str(os.path.getsize(local_path))) status = self.upload_local_file(local_path) if status is False: print("partition_max_to_cos_bytes failed to cos time:" + str(int(time.time()))) return "partition_max_to_cos_bytes failed to cos" consumer.commit_offsets() f.seek(0) f.truncate() if int(time.time()) - start_time >= self.partition_max_timeout_ms / 1000 - max_to_cos_time: logger.info("already reach partition_max_timeout, cost time: %s", str(int(time.time()) - start_time)) break if msg is None: logger.info("already reach kafka consumer timeout, cost_time: %s", str(int(time.time()) - start_time)) break f.close() logger.info("consumer finished, cost time: %s", str(int(time.time()) - start_time)) logger.info("msg num: %s", str(msg_consumed_count)) if msg_consumed_count > 0: status = self.upload_local_file(local_path) if status is False: logger.error("failed to cos time: %s", str(int(time.time()))) return "failed to cos" consumer.commit_offsets() consumer.stop() self.delete_local_file(local_path) logger.info("end time:%s", str(int(time.time()))) return "success" except ConsumerStoppedException as err: logger.error("error:", str(err)) logger.error("KafkaError failed consumer cost time: %s", str(int(time.time()) - int(start_time))) return "failed"
# coding:utf-8 from pykafka.client import KafkaClient import logging from pykafka.protocol import PartitionOffsetFetchRequest logging.basicConfig(level = logging.INFO) offset_check_logger = logging.getLogger('offset_check') client = KafkaClient('localhost:8990,localhost:8991,localhost:8992') nmq = client.topics['nmq'] offsets = nmq.latest_available_offsets() offset_check_logger.info('消息总量如下:') for partition, item in offsets.iteritems(): offset_check_logger.info('[partition={} offset={}]'.format(partition, item.offset[0])) partitions = offsets.keys() offset_check_logger.info('消息读取量如下:') offset_manager = client.cluster.get_offset_manager('balance-consumer') requests = [PartitionOffsetFetchRequest(topic_name = 'nmq', partition_id = part_id) for part_id in partitions] response = offset_manager.fetch_consumer_group_offsets('balance-consumer', requests)
from pykafka.common import OffsetType import json import urllib import threading import sys from kazoo.client import KazooClient from time import sleep from _socket import gethostname logging.basicConfig(level=logging.INFO) consumer_logger = logging.getLogger('consumer') # 2, 连接kafka集群 client = KafkaClient('49.4.90.247:6667') nmq = client.topics['nmq'] consumer = nmq.get_( 'balance-consumer', zookeeper_connect='localhost:3000,localhost:3001,localhost:3002/kafka', auto_offset_reset=OffsetType.LATEST, auto_commit_enable=True, num_consumer_fetchers=3) # 3, 启动HTTP服务 def httpd_main(consumer): class ResetOffsetRequestHandler(SimpleHTTPServer.SimpleHTTPRequestHandler): def __init__(self, request, client_addr, server):
def worker(self): local_path = '/tmp/local_file.txt' # local_path = os.getcwd() + '/local_file.txt' if os.path.exists(local_path): os.remove(local_path) os.mknod(local_path) f = open(local_path, 'w') max_to_cos_time = self.calculation_max_to_cos_time() start_time = int(time.time()) logger.info("start time:%s", str(start_time)) client = KafkaClient(hosts=self.kafka_address) msg_consumed_count = 0 reset_offset_on_start_status = False topic = client.topics[self.topic_name.encode()] partitions = topic.partitions if self.offset_type.lower() == 'earliest': start_offset = OffsetType.EARLIEST elif self.offset_type.lower() == 'latest': start_offset = OffsetType.LATEST else: # 引用kafka库,解决pykafka fetch_offset_limits函数不能正确根据timestamp返回offset的问题 start_offset = OffsetType.LATEST consumer = KafkaConsumer(self.topic_name, group_id=self.group_id, bootstrap_servers=[self.kafka_address]) tp = TopicPartition(self.topic_name, self.partition_id) offsets = consumer.offsets_for_times({tp: int(self.offset_type)}) if offsets[tp]: if offsets[tp].offset == 0: start_offset = OffsetType.EARLIEST else: committed = consumer._coordinator.fetch_committed_offsets( [tp]) if not committed or (committed[tp] and committed[tp].offset < offsets[tp].offset): start_offset = offsets[tp].offset - 1 reset_offset_on_start_status = True logger.info("consumer start offset on partition {} is {}".format( self.partition_id, start_offset)) consumer = topic.get_simple_consumer( consumer_group=self.group_id, partitions={partitions.get(self.partition_id)}, consumer_timeout_ms=self.consumer_timeout_ms, auto_commit_enable=False, auto_offset_reset=start_offset, reset_offset_on_start=reset_offset_on_start_status, ) try: while True: msg = consumer.consume() if msg: msg_consumed_count += 1 f.write(msg.value) f.write("\n") if os.path.getsize( local_path) >= self.partition_max_to_cos_bytes: logger.info( "already reach partition_max_to_cos_bytes, file length: %s", str(os.path.getsize(local_path))) status = self.upload_local_file(local_path) if status is False: print( "partition_max_to_cos_bytes failed to cos time:" + str(int(time.time()))) return "partition_max_to_cos_bytes failed to cos" consumer.commit_offsets() f.seek(0) f.truncate() if int( time.time() ) - start_time >= self.partition_max_timeout_ms / 1000 - max_to_cos_time: logger.info( "already reach partition_max_timeout, cost time: %s", str(int(time.time()) - start_time)) break if msg is None: logger.info( "already reach kafka consumer timeout, cost_time: %s", str(int(time.time()) - start_time)) break f.close() logger.info("consumer finished, cost time: %s", str(int(time.time()) - start_time)) logger.info("msg num: %s", str(msg_consumed_count)) if msg_consumed_count > 0: status = self.upload_local_file(local_path) if status is False: logger.error("failed to cos time: %s", str(int(time.time()))) return "failed to cos" consumer.commit_offsets() consumer.stop() self.delete_local_file(local_path) logger.info("end time:%s", str(int(time.time()))) return "success" except ConsumerStoppedException as err: logger.error("error:", str(err)) logger.error("KafkaError failed consumer cost time: %s", str(int(time.time()) - int(start_time))) return "failed"
def client(self) -> KafkaClient: if self._client is None: self._client = KafkaClient(hosts=self._config.kafka_hosts, ssl_config=self.ssl_config) return self._client
def __init__(self): self.client = KafkaClient(KAFKA_CONFIG["hosts"])