Esempio n. 1
0
def main():
    consumer = KafkaConsumer(bootstrap_servers=[
        'localhost:9092', 'localhost:9093', 'localhost:9094'
    ],
                             auto_offset_reset='earliest',
                             client_id='test_fetcher',
                             consumer_timeout_ms=1000)

    consumer.subscribe(['Person'])

    print(consumer.topics())

    print(consumer.assignment())
    print(consumer.end_offsets(consumer.assignment()))

    for msg in consumer:
        print(msg.topic)
        print(msg.partition)
        print(msg.offset)
        print(msg.key)
        print(msg.value)

    #resp = consumer.poll(max_records=1000)

    #print(resp)

    # for message in consumer:
    #     print(message)
    #print(str(int.from_bytes(message, byteorder='little')))

    consumer.close()
Esempio n. 2
0
def create_consumer(args, policy):
    """
    Refer to Python package kafka-python, a high-level message consumer of Kafka brokers.
    The consumer iterator returns consumer records, which expose basic message
    attributes: topic, partition, offset, key, and value.

    :param args: Input arguments
    :param policy: Object to store Network Policy for processing
    :return: KafkaConsumer object, messages from the message bus for processing
    """
    consumer = KafkaConsumer(args.get('topic'),
                             api_version=API_VERSION,
                             bootstrap_servers=args.get('broker'),
                             client_id=CLIENT_ID,                       # name passed to servers for identification
                             auto_offset_reset=args.get('start_at'),    # consume earliest or latest available msgs
                             enable_auto_commit=AUTOCOMMIT,             # autocommit offsets?
                             consumer_timeout_ms=args.get('timeout'),   # StopIteration if no message after 'n' seconds
                             security_protocol=SSL,
                             ssl_context=create_ssl_context(args)
                             )

    # Returned values are of type Set
    msg = ["All the topics available :{}".format(consumer.topics()),
           "Subscription:{}".format(consumer.subscription()),
           "Partitions for topic:{}".format(consumer.partitions_for_topic(args.get('topic'))),
           "TopicPartitions:{}".format(consumer.assignment())
           ]
    policy.add_fact('consumer_debug', msg)
    # Offsets are type Int
    policy.add_fact('beginning_offsets', str(consumer.beginning_offsets(consumer.assignment())))
    policy.add_fact('end_offsets', str(consumer.end_offsets(consumer.assignment())))

    policy.start_at_offset = args.get('start_at_offset')
    policy.add_fact('start_at_offset', policy.start_at_offset)
    return consumer
Esempio n. 3
0
def consume(self):
    consumer = KafkaConsumer(self.topic,
                             bootstrap_servers=self.bootstrap_servers)
    print(consumer.partitions_for_topic(self.topic))  #获取test主题的分区信息
    print(consumer.topics())  #获取主题列表
    print(consumer.subscription())  #获取当前消费者订阅的主题
    print(consumer.assignment())  #获取当前消费者topic、分区信息
    print(consumer.beginning_offsets(consumer.assignment()))  #获取当前消费者可消费的偏移量
    consumer.seek(TopicPartition(topic=self.topic, partition=0),
                  1)  #重置偏移量,从第1个偏移量消费
    for message in consumer:
        print("%s:%d:%d: key=%s value=%s" %
              (message.topic, message.partition, message.offset, message.key,
               message.value))
Esempio n. 4
0
def test():
    from kafka import KafkaConsumer

    consumer = KafkaConsumer(
        TOPIC,
        bootstrap_servers="localhost:9092",
        auto_offset_reset="earliest",
        enable_auto_commit=True,
    )
    consumer.assignment()
    print(get_record_by_offset(consumer, TOPIC, 0))
    print(get_record_by_offset(consumer, TOPIC, 1))
    print(get_record_by_offset(consumer, TOPIC, 0))
    print(get_record_by_offset(consumer, TOPIC, -1))
    def consume(self):
        consumer = KafkaConsumer(self.topic,
                                 bootstrap_servers=self.bootstrap_servers)
        print(consumer.partitions_for_topic(self.topic))
        print(consumer.topics())
        print(consumer.subscription())
        print(consumer.assignment())
        print(consumer.beginning_offsets(consumer.assignment()))

        consumer.seek(TopicPartition(topic=self.topic, partition=0), 1)
        for message in consumer:
            print("%s:%d:%d: key=%s value=%s" %
                  (message.topic, message.partition, message.offset,
                   message.key, message.value))
Esempio n. 6
0
def main():
    consumer = KafkaConsumer('topic_test_cluster',
                             bootstrap_servers=['master:9092'])

    print consumer.partitions_for_topic('topic_test_cluster')
    print consumer.topics()
    print consumer.subscription()
    print consumer.assignment()
    print consumer.beginning_offsets(consumer.assignment())

    # 读取partition为2、偏移量从5开始的数据
    consumer.seek(TopicPartition(topic=u'topic_test_cluster', partition=2), 5)

    for msg in consumer:
        print('%s:%d:%d: key=%s value=%s' %
              (msg.topic, msg.partition, msg.offset, msg.key, msg.value))
Esempio n. 7
0
def main():
    consumer = KafkaConsumer(bootstrap_servers=["worker2.hengan.shop:9092"],group_id='me',
                             auto_offset_reset = 'earliest',
                             #value_deserializer = lambda m: json.loads(m.decode('utf-8')),
                             enable_auto_commit=False)
    #consumer.assign([TopicPartition('foobar2',0)])
    consumer.subscribe(['foobar2'])
    #consumer.seek(TopicPartition('foobar2',0),100)
    
    print(consumer.topics())
    print(consumer.subscription())
    ret = consumer.poll()
    print(ret)
    print(consumer.assignment())
    #consumer.seek_to_beginning()
    try:
        for message in consumer:
            print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,message.offset, message.key,message.value))
            print(consumer.partitions_for_topic('foobar2'))
            print("offset is %d" % message.offset)
            tp1 = TopicPartition(topic="foobar2",partition=0)
            om = OffsetAndMetadata(offset=message.offset+1,metadata=1)
            consumer.commit({tp1:om})
            break
    except KeyboardInterrupt:
        sys.exit()    
Esempio n. 8
0
class XactionConsumer:
    def __init__(self, partition_id):
        self.consumer = KafkaConsumer(bootstrap_servers=['localhost:9092'],
                                      value_deserializer=lambda m: loads(m.decode('ascii')))
        partitions = self.consumer.partitions_for_topic('bank-customer-new')
        print(partitions)
        # partition = TopicPartition('bank-customer-new', partition_id)
        partition = TopicPartition('bank-customer-new', partition_id)
        print(partition)
        self.ledger = {}
        self.custBalances = {}
        self.consumer.assign([partition])

    def handleMessages(self):
        print(self.consumer.assignment())
        for msg in self.consumer:
            message = msg.value
            print('{} received'.format(message))
            if message['custid'] not in self.custBalances:
                self.custBalances[message['custid']] = 0
            if message['type'] == 'dep':
                self.custBalances[message['custid']] += message['amt']
            else:
                self.custBalances[message['custid']] -= message['amt']
            print(self.custBalances)
def all_messages_consumed(consumer: KafkaConsumer):
    partitions = list(consumer.assignment())
    if len(partitions) < 1:
        return False
    last_commit = consumer.committed(partitions[0])
    last_offset = consumer.end_offsets(partitions=partitions)
    last_offset = list(last_offset.values())[0]
    return last_commit == last_offset
Esempio n. 10
0
def offset_manage_manually_consume():
    """
    手动设置offset
    :return:
    """
    consumer = KafkaConsumer(TOPIC, bootstrap_servers=BOOTSTRAP_SERVERS)
    print(consumer.partitions_for_topic(TOPIC))  # 获取topic的分区信息
    print(consumer.topics())  # 获取topic列表  当前kafka server有哪些topic
    print(consumer.subscription())  # 获取当前消费者订阅的topic
    print(consumer.assignment())  # 获取当前消费者topic、分区信息
    print(consumer.beginning_offsets(consumer.assignment()))  # 获取当前消费者可消费的偏移量
    print(consumer.assignment())  # 获取当前消费者可消费的偏移量
    consumer.seek(TopicPartition(topic=u'%s' % TOPIC, partition=0),
                  235000)  # 重置偏移量,从第235000个偏移量消费
    for message in consumer:
        print("%s:%d:%d: key=%s value=%s" %
              (message.topic, message.partition, message.offset, message.key,
               message.value))
Esempio n. 11
0
 def doTest(self):
     print(self.className + " - " +
           pyUtils.getCurrentRunningFunctionName() + "------------------")
     _topicName = "pro_bilog"
     local_url = ['网址:端口']
     real_url = ['ip:端口']
     _consumer = KafkaConsumer(_topicName,
                               bootstrap_servers=local_url,
                               group_id='test',
                               request_timeout_ms=3000,
                               session_timeout_ms=5000)
     # 获取test主题的分区信息
     print(_consumer.partitions_for_topic(_topicName))
     # 获取主题列表
     print(_consumer.topics())
     # 获取当前消费者订阅的主题
     print(_consumer.subscription())
     # 获取当前消费者topic、分区信息
     print(_consumer.assignment())
     # 获取当前消费者可消费的偏移量
     print(_consumer.beginning_offsets(_consumer.assignment()))
Esempio n. 12
0
def test_kafka_consumer(i):
    consumer = KafkaConsumer('kkll3', bootstrap_servers='localhost:9092',auto_offset_reset='earliest',group_id='123', partition_assignment_strategy=[RoundRobinPartitionAssignor])
# consumer2 = KafkaConsumer('kkll3', bootstrap_servers='localhost:9092',auto_offset_reset='earliest',group_id='123', partition_assignment_strategy=[RoundRobinPartitionAssignor])
    time.sleep(3)
    for msg in consumer:
        recv = "%s:%d:%d: key=%s value=%s" % (msg.topic, msg.partition, msg.offset, msg.key, msg.value)
        consumer.commit_async()
        print(recv)
        ps = consumer.assignment()
        print(ps)
        print(i)
        time.sleep(5)
Esempio n. 13
0
class TestMetricsProducer(unittest.TestCase):
    def setUp(self):
        self.settings = Settings()
        self.test_consumer = KafkaConsumer(
            self.settings.metrics_topic,
            bootstrap_servers=self.settings.kafka_addr,
            client_id="test-metrics-consumer-1",
            group_id="test-metrics-group",
        )
        # make sure the consumer has partition assignments
        self.test_consumer.poll()
        # and all messages has been consumed
        self.test_consumer.poll()
        self.test_consumer.commit()

    def tearDown(self):
        self.test_consumer.commit()

    def test_initialization(self):
        init_success = False
        try:
            producer = MetricsProducer(self.settings)
            init_success = True
        except:
            self.assertFalse(True)  # should never happen

        self.assertTrue(init_success)

    def test_writing_to_kafka(self):
        assignment = self.test_consumer.assignment()
        partition = assignment.pop()

        # get the offset before publishing
        old_offset = self.test_consumer.position(partition)

        # produce a message
        producer = MetricsProducer(self.settings)
        msg = producer.collect_metrics()
        producer.publish_metrics(msg)

        raw_result = self.test_consumer.poll()
        for _, items in raw_result.items():
            self.assertEqual(len(items), 1)
            item = items.pop()
            # check that the consumed item is the same we published
            self.assertEqual(item.value.decode('utf-8'), json.dumps(msg))

        self.test_consumer.commit()

        # get the offset after consuming and check it's +1
        new_offset = self.test_consumer.position(partition)
        self.assertEqual(new_offset - old_offset, 1)
Esempio n. 14
0
 def consumer_from_offset(self, process_msg, offset):
     consumer = KafkaConsumer(group_id=self.group_name,
                              bootstrap_servers=self.broker_list,
                              enable_auto_commit=self.enable_auto_commit,
                              auto_offset_reset=self.auto_offset_reset)
     consumer.subscribe(self.topic)
     consumer.poll(0)
     for topic_partition in consumer.assignment():
         consumer.seek(topic_partition, offset)
     while True:
         consumer_records = consumer.poll(100)
         for partition_info, records in consumer_records.items():
             for record in records:
                 process_msg(record)
Esempio n. 15
0
def main(args):
    kafka_brokers_sasl = [
        "kafka02-prod01.messagehub.services.us-south.bluemix.net:9093"
    ]
    sasl_plain_username = "******"
    sasl_plain_password = "******"

    sasl_mechanism = 'PLAIN'  # <-- changed from 'SASL_PLAINTEXT'
    security_protocol = 'SASL_SSL'

    oldoffset = args.get("offset", "0")
    oldoffset = int(oldoffset)

    # Create a new context using system defaults, disable all but TLS1.2
    context = ssl.create_default_context()
    context.options &= ssl.OP_NO_TLSv1
    context.options &= ssl.OP_NO_TLSv1_1
    try:
        consumer = KafkaConsumer('kar2',
                                 bootstrap_servers=kafka_brokers_sasl,
                                 sasl_plain_username=sasl_plain_username,
                                 sasl_plain_password=sasl_plain_password,
                                 security_protocol=security_protocol,
                                 ssl_context=context,
                                 sasl_mechanism=sasl_mechanism)
        consumer.topics()
        part = consumer.assignment()

        # print part
        part = part.pop()
        offset = consumer.position(partition=part)
        # print offset
        consumer.seek(part, oldoffset)
        # print consumer.position(partition=part)
        message_queue = consumer.poll(timeout_ms=2000, max_records=20)
        message_queue = message_queue[message_queue.keys()[0]]
        message_dict = {}
        for i in message_queue:
            message_dict[i.offset] = i.value
        print message_dict
        return message_dict
    except:
        return {"-1": oldoffset}
Esempio n. 16
0
def cusumer():
    start = time.time()
    n = 0
    _consumer = KafkaConsumer('4.1.1.1.python-test',
                              group_id='test1',
                              bootstrap_servers='192.168.18.134:9092',
                              consumer_timeout_ms=1000)
    print(_consumer.partitions_for_topic('4.1.1.1.python-test'))
    #TopicPartition('4.1.1.1.python-test','0')
    #a = namedtuple("_TopicPartition",["_4.1.1.1.python-test", "_0"])
    offset = _consumer.committed(
        TopicPartition(topic='4.1.1.1.python-test', partition=0))
    #_consumer.seek_to_beginning()
    #_consumer.seek_to_beginning(TopicPartition(topic = '4.1.1.1.python-test',partition = 0))
    #_consumer.assign(TopicPartition(topic = '4.1.1.1.python-test',partition = 0))
    print(_consumer.assignment())
    print(_consumer.subscription())
    print(
        _consumer.beginning_offsets(
            TopicPartition(topic='4.1.1.1.python-test', partition=0)))
    #_consumer.seek(TopicPartition(topic = '4.1.1.1.python-test',partition = 0),offset-1)
    #print(_consumer.position(TopicPartition(topic = '4.1.1.1.python-test',partition = 0)))
    #_consumer.commit()
    return
    while 1:
        try:
            for message in _consumer:
                #yield message
                print(message.value)
                n = n + 1
                stop = time.time()
                if stop - start > 1:
                    print(n / (stop - start))
                    start = time.time()
                    n = 0
            #print('time out')
        except KafkaTimeoutError as e:
            print(e)
        except KafkaError as e:
            print(e)
        finally:
            pass
def process_sec_filings():
    consumer = KafkaConsumer(bootstrap_servers=kafka_url,
                             enable_auto_commit=False,
                             group_id='sec-processor')
    topics = consumer.topics()
    assignments = consumer.assignment()
    metrics = consumer.metrics()
    print metrics
    print assignments
    print topics
    consumer.subscribe(topics)

    while True:
        i = 0
        a = consumer.poll(100, 5)
        for msg in a:
            i = i + 1
            print msg
            print i
        print "---------------------"
Esempio n. 18
0
class KafkaPythonConsumer(BaseConsumer, ConsumerRebalanceListener):
    '''KafkaPythonConsumer'''
    def __init__(self, consumer_id, config, logger):
        BaseConsumer.__init__(self, consumer_id, config, logger)

    def run_consumer(self):
        '''core consumer code'''
        bootstrap_server = self.config.get('consumer', 'kafka_bootstrap')
        consumer_group = self.config.get('consumer', 'kafka_consumer_group')

        offset_reset = self.config.get('consumer', 'kafka_auto_offset_reset')
        self.consumer = KafkaConsumer(bootstrap_servers=bootstrap_server,\
                                        consumer_timeout_ms=60000,\
                                        group_id=consumer_group,\
                                        auto_offset_reset=offset_reset)
        topic_whitelist = self.config.get('consumer', 'topic_whitelist')
        self.logger.info("Topic list is " + topic_whitelist)

        self.consumer.subscribe(topic_whitelist.split(","), None, self)

        self.logger.info("Consumer " + self.consumer_id + " starting.... " +
                         str(self.consumer.assignment()))

        signal.signal(signal.SIGINT, self.exit_gracefully)
        signal.signal(signal.SIGTERM, self.exit_gracefully)

        while not self.shutting_down:
            for message in self.consumer:

                consumer_message = MessageInfo(message.topic, message.partition, message.key,\
                                               message.value, message.offset)
                self.process_message(consumer_message)
                if self.shutting_down:
                    break
            self.check_for_rotation()

        for part in self.partitions:
            self.partitions[part].writer.close()

        self.logger.info("Graceful shutdown of consumer " +
                         str(self.consumer_id) + " successful")
Esempio n. 19
0
def consume_exact_once(host='192.168.11.137:9092',
                       topic='first_topic',
                       only_new=True):
    consumer = KafkaConsumer(group_id='1', bootstrap_servers=host)
    consumer.subscribe(topic, listener=CRL(consumer))
    consumer.poll()

    for tp in consumer.assignment():
        offset = 0
        if tp.partition in CRL.mem_db:
            offset = CRL.mem_db[tp.partition]
            consumer.seek(tp, offset)
        else:
            consumer.seek_to_end(tp)
    while True:
        message_batch = consumer.poll()

        for topic_partition, partition_batch in message_batch.items():
            for message in partition_batch:
                print(message)
                CRL.mem_db[topic.partition] = message.offset
Esempio n. 20
0
    def run(self):
        group_id = "device-manager.monitor#" + str(uuid.uuid4())
        start = time.time()
        LOGGER.debug(
            f' will create consumer {CONFIG.get_kafka_url()} {group_id} {self.topic}'
        )
        consumer = KafkaConsumer(bootstrap_servers=CONFIG.get_kafka_url(),
                                 group_id=group_id)
        consumer.subscribe(topics=[self.topic], listener=Listener(self))
        StatusMonitor.wait_init(consumer)
        LOGGER.debug(
            f' kafka consumer created {self.topic} - {time.time() - start}')
        LOGGER.debug(consumer.assignment())
        for message in consumer:
            LOGGER.debug(f" Got kafka event [{self.topic}] {message}")
            data = None
            try:
                data = json.loads(message.value)
            except Exception as error:
                LOGGER.error(f" Received message is not valid json {error}")
                continue

            metadata = data.get('metadata', None)
            if metadata is None:
                LOGGER.error(
                    f' Invalid kafka event detected - no metadata included')
                continue

            reason = metadata.get('reason', None)
            if reason == 'statusUpdate':
                continue

            deviceid = metadata.get('deviceid', None)
            tenant = metadata.get('tenant', None)
            if (deviceid is None) or (tenant is None):
                LOGGER.warning(f" Missing device identification from event")
                continue

            self.set_online(tenant, deviceid, message.partition,
                            metadata.get('exp', None))
Esempio n. 21
0
def kafka_load_dau(seconds=1800):
    dau = {}

    consumer = KafkaConsumer('posthistory',
                             bootstrap_servers=['54.189.125.21:9092'],
                             auto_offset_reset='earliest',
                             enable_auto_commit=True,
                             auto_commit_interval_ms=1000)

    # Finding end offset so that we could stop the loop.
    next(consumer)
    partition = consumer.assignment().pop()
    end_offset = consumer.end_offsets([partition])[partition] - 1

    for raw_msg in consumer:
        msg = raw_msg.value.decode('utf-8')
        msg_data = json.loads(msg)
        ts = msg_data['_CreationDate'][:16]
        dau[ts] = dau.get(ts, 0) + 1
        if raw_msg.offset == end_offset:
            break
    return dau
    def __run_listen_for_config_changes_forever(self):
        """
        Polls for config update messages.

        When a new message is recieved, locks instance state and applies update.
        """
        # c = KafkaConsumer(topics=[self.scores_config_update_topic_name],
        configs = {
            'bootstrap_servers': [
                'ec2-100-20-18-195.us-west-2.compute.amazonaws.com:9092',
                'ec2-100-20-8-59.us-west-2.compute.amazonaws.com:9092',
                'ec2-100-20-75-14.us-west-2.compute.amazonaws.com:9092'
            ],
            'group_id':
            1,
            'auto_offset_reset':
            'latest',
            'enable_auto_commit':
            True,
            'value_deserializer':
            lambda x: json.loads(x.decode('utf-8'))
        }
        c = KafkaConsumer(**configs)
        c.subscribe(self.scores_config_update_topic_name)

        while True:
            msgs = c.poll(float("Inf"))
            if len(msgs) == 0:
                continue
            p = list(c.assignment())[-1]
            m = list(msgs.values())[-1][-1]
            print(dir(m))
            print("===================================")
            print("Config change received: %s" % m.value)
            self.__update_scoring_function(m.value)
            print("Config change affected")
            print("===================================")
Esempio n. 23
0
    def get_kafka_data(self):
        result = []
        try:
            consumer = KafkaConsumer(KAFKA_TOPICS,
                                     bootstrap_servers=KAFKA_HOSTS,
                                     consumer_timeout_ms=1000)
            topics = consumer.topics()  # 获取主题列表
            assignment = consumer.assignment()  # 获取当前消费者topic、分区信息
            # 获取当前消费者可消费的偏移量
            end_offsets = consumer.end_offsets(assignment)
            offset = end_offsets[end_offsets.keys()[0]]
            offset = offset - 5 if offset >= 5 else offset
            consumer.seek(TopicPartition(topic=KAFKA_TOPICS, partition=0),
                          offset)  # 重置偏移量,从第5个偏移量消费

            for message in consumer:
                content = json.loads(message.value)
                for item in content:
                    result.append({
                        "name":
                        item['event_top_type'],
                        "app":
                        item['event_type'],
                        "ip":
                        item['dev_ip'],
                        "time":
                        datetime.strptime(
                            item['event_time'],
                            "%Y%m%d%H%M%S").strftime("%Y-%m-%d %H:%M:%S")
                    })
                if message.offset == offset - 1:
                    break
            consumer.close()
        except Exception as e:
            logger.error(e)

        return result
Esempio n. 24
0
def kafka_consumer_test():
    topic_name = 'topic_test'
    bootstrap_servers = ['localhost:9092']

    # consumer = KafkaConsumer(topic_name, bootstrap_servers=bootstrap_servers, group_id='test_group', auto_offset_reset='earliest')
    consumer = KafkaConsumer(bootstrap_servers=bootstrap_servers, group_id='test_group', auto_offset_reset='earliest')
    # enable_auto_commit=True(默认)才能断点续消,此时服务端会保存该group_id的offset
    # auto_offset_reset='earliest',默认值是latest,只在offset发生异常是起作用,

    partition_set = consumer.partitions_for_topic(topic_name)
    partitions = [TopicPartition(topic_name, partition_idx) for partition_idx in partition_set]
    consumer.assign(partitions)
    topic_partition_set = consumer.assignment()

    # consumer.seek_to_beginning()    # 设置offset到集群中保存的第一个值,不一定是0, 没有参数则,对consumer的每一个partition设置
    # consumer.seek_to_end()    # 设置offset到当前没有消费的第一个值, 没有参数则,对consumer的每一个partition设置
    for topic_partition in topic_partition_set:
        offset = consumer.position(topic_partition)
        print("partition: %d, offset: %d" % (topic_partition.partition, offset))
        # consumer.seek(topicTopicPartition, offset)     # 尽量不要手动设置这个值

    for msg in consumer:
        print("topic:%s, partition:%d, offset:%d: key=%s value=%s" % (
            msg.topic, msg.partition, msg.offset, msg.key, msg.value.decode("utf-8")))
Esempio n. 25
0
def kafka_consumer_test():
    topic_name = 'topic_test'
    bootstrap_servers = ['localhost:9092']

    # consumer = KafkaConsumer(topic_name, bootstrap_servers=bootstrap_servers, group_id='test_group', auto_offset_reset='earliest')
    consumer = KafkaConsumer(bootstrap_servers=bootstrap_servers, group_id='test_group', auto_offset_reset='earliest')
        # enable_auto_commit=True(默认)才能断点续消,此时服务端会保存该group_id的offset
        # auto_offset_reset='earliest',默认值是latest,只在offset发生异常是起作用,
    
    partition_set = consumer.partitions_for_topic(topic_name)
    partitions = [ TopicPartition(topic_name, partition_idx) for partition_idx in partition_set ]
    consumer.assign(partitions)
    topic_partition_set = consumer.assignment()

    #consumer.seek_to_beginning()    # 设置offset到集群中保存的第一个值,不一定是0, 没有参数则,对consumer的每一个partition设置
    #consumer.seek_to_end()    # 设置offset到当前没有消费的第一个值, 没有参数则,对consumer的每一个partition设置
    for topic_partition in topic_partition_set:
        offset = consumer.position(topic_partition)
        print "partition: %d, offset: %d" % (topic_partition.partition, offset)
        #consumer.seek(topicTopicPartition, offset)     # 尽量不要手动设置这个值


    for msg in consumer:
        print ("topic:%s, partition:%d, offset:%d: key=%s value=%s" % (msg.topic, msg.partition, msg.offset, msg.key, msg.value.decode("utf-8")))
                    print (i)
                    print (message.offset)
                    Tag = False
                    break
                if i % 1000 == 0:
                    print (i,message.offset,message.partition)
    consumer.close()
    lock.release()


if __name__ == '__main__':
    client = InsecureClient('http://lg-11-152.ko.cn:50070', user='******')

    consumer = KafkaConsumer('kzmg_hunter_login', bootstrap_servers=['172.23.11.150:9092'])
    consumer.topics()
    for key, value in consumer.end_offsets(consumer.assignment()).items():
        print (key, value)
        name_mid_num = str(key)[53:54]
        # name_mid_num=str(key)[51:52]
        print(name_mid_num)

        locals()['consumer' + name_mid_num]=KafkaConsumer('kzmg_hunter_login', bootstrap_servers=['172.23.11.150:9092'])
        locals()['consumer' + name_mid_num].topics()
        locals()['consumer' + name_mid_num].seek(key, 0)
        datanum = value
        try:
            lock = thread.allocate_lock()
            lock.acquire()
            locks.append(lock)
            thread.start_new_thread(save_jsondata, (client, locals()['consumer'+name_mid_num], datanum, name_mid_num,lock))
        except:
Esempio n. 27
0
class CheckKafka(PubSubNagiosPlugin):
    def __init__(self):
        # Python 2.x
        super(CheckKafka, self).__init__()
        # Python 3.x
        # super().__init__()
        self.name = 'Kafka'
        self.default_port = 9092
        self.producer = None
        self.consumer = None
        self.topic = None
        self.client_id = 'Hari Sekhon ' + os.path.basename(
            get_topfile()) + ' ' + __version__
        self.group_id = self.client_id + ' ' + str(
            os.getpid()) + ' ' + random_alnum(10)
        self.acks = '1'
        self.retries = 0
        self.partition = None
        self.topic_partition = None
        self.brokers = None
        self.timeout_ms = None
        self.start_offset = None

    def add_options(self):
        # super(CheckKafka, self).add_options()
        # TODO: (host_envs, default_host) = getenvs2('HOST', default_host, name)
        # TODO: env support for Kafka brokers
        self.add_opt('-H', '--host', \
                     '-B', '--brokers', \
                     dest='brokers', metavar='broker_list', default='localhost:9092',
                     help='Kafka Broker seed list in form host[:port],host2[:port2]... (default: localhost:9092)')
        self.add_opt('-T', '--topic', help='Kafka Topic')
        self.add_opt('-p',
                     '--partition',
                     type=int,
                     help='Kafka Partition (default: 0)',
                     default=0)
        self.add_opt(
            '-a',
            '--acks',
            default=1,
            choices=[1, 'all'],
            help=
            'Acks to require from Kafka. Valid options are \'1\' for Kafka ' +
            'partition leader, or \'all\' for all In-Sync Replicas (may block causing '
            + 'timeout if replicas aren\'t available, default: 1)')
        self.add_opt(
            '-s',
            '--sleep',
            metavar='secs',
            help=
            'Sleep in seconds between producing and consuming from given topic (default: 0.5)'
        )
        self.add_opt('--list-topics',
                     action='store_true',
                     help='List Kafka topics from broker(s) and exit')
        self.add_opt('--list-partitions',
                     action='store_true',
                     help='List Kafka topic paritions from broker(s) and exit')
        self.add_thresholds(default_warning=1, default_critical=2)

    def run(self):
        try:
            super(CheckKafka, self).run()
        #except KafkaError as _:
        #raise CriticalError(_)
        except KafkaError:
            raise CriticalError(self.exception_msg())

    @staticmethod
    def exception_msg():
        return traceback.format_exc().split('\n')[-2]

    def get_topics(self):
        self.consumer = KafkaConsumer(bootstrap_servers=self.brokers,
                                      client_id=self.client_id,
                                      request_timeout_ms=self.timeout_ms)
        return self.consumer.topics()

    def print_topics(self):
        print('Kafka Topics:\n')
        for topic in self.get_topics():
            print(topic)

    def get_topic_partitions(self, topic):
        self.consumer = KafkaConsumer(topic,
                                      bootstrap_servers=self.brokers,
                                      client_id=self.client_id,
                                      request_timeout_ms=self.timeout_ms)
        if topic not in self.get_topics():
            raise CriticalError(
                "topic '{0}' does not exist on Kafka broker".format(topic))
        partitions = self.consumer.partitions_for_topic(topic)
        assert isSet(partitions)
        return partitions

    def print_topic_partitions(self, topic):
        print('Kafka topic \'{0}\' partitions:\n'.format(topic))
        #for partition in self.get_topic_partitions(topic):
        #    print(partition)
        print(list(self.get_topic_partitions(topic)))
        print()

    def process_args(self):
        self.brokers = self.get_opt('brokers')
        # TODO: add broker list validation back in
        # validate_hostport(self.brokers)
        log_option('brokers', self.brokers)
        self.timeout_ms = max((self.timeout * 1000 - 1000) / 2, 1000)

        try:
            list_topics = self.get_opt('list_topics')
            list_partitions = self.get_opt('list_partitions')
            if list_topics:
                self.print_topics()
                sys.exit(ERRORS['UNKNOWN'])
            self.topic = self.get_opt('topic')
        except KafkaError:
            raise CriticalError(self.exception_msg())

        if self.topic:
            validate_chars(self.topic, 'topic', 'A-Za-z-')
        elif list_topics or list_partitions:
            pass
        else:
            self.usage('--topic not specified')

        try:
            if list_partitions:
                if self.topic:
                    self.print_topic_partitions(self.topic)
                else:
                    for topic in self.get_topics():
                        self.print_topic_partitions(topic)
                sys.exit(ERRORS['UNKNOWN'])
        except KafkaError:
            raise CriticalError(self.exception_msg())

        self.partition = self.get_opt('partition')
        # technically optional, will hash to a random partition, but need to know which partition to get offset
        # if self.partition is not None:
        validate_int(self.partition, "partition", 0, 10000)
        self.topic_partition = TopicPartition(self.topic, self.partition)
        self.acks = self.get_opt('acks')
        log_option('acks', self.acks)
        self.validate_thresholds()

    def subscribe(self):
        self.consumer = KafkaConsumer(
            #self.topic,
            bootstrap_servers=self.brokers,
            # client_id=self.client_id,
            # group_id=self.group_id,
            request_timeout_ms=self.timeout_ms)
        #key_serializer
        #value_serializer
        log.debug('partition assignments: {0}'.format(
            self.consumer.assignment()))

        # log.debug('subscribing to topic \'{0}\' parition \'{1}\''.format(self.topic, self.partition))
        # self.consumer.subscribe(TopicPartition(self.topic, self.partition))
        # log.debug('partition assignments: {0}'.format(self.consumer.assignment()))

        log.debug('assigning partition {0} to consumer'.format(self.partition))
        # self.consumer.assign([self.partition])
        self.consumer.assign([self.topic_partition])
        log.debug('partition assignments: {0}'.format(
            self.consumer.assignment()))

        log.debug('getting current offset')
        # see also highwater, committed, seek_to_end
        self.start_offset = self.consumer.position(self.topic_partition)
        if self.start_offset is None:
            # don't do this, I've seen scenario where None is returned and all messages are read again, better to fail
            # log.warn('consumer position returned None, resetting to zero')
            # self.start_offset = 0
            raise UnknownError(
                'Kafka Consumer reported current starting offset = {0}'.format(
                    self.start_offset))
        log.debug('recorded starting offset \'{0}\''.format(self.start_offset))
        # self.consumer.pause()

    def publish(self):
        log.debug('creating producer')
        self.producer = KafkaProducer(bootstrap_servers=self.brokers,
                                      client_id=self.client_id,
                                      acks=self.acks,
                                      batch_size=0,
                                      max_block_ms=self.timeout_ms,
                                      request_timeout_ms=self.timeout_ms)
        #key_serializer
        #value_serializer
        log.debug('producer.send()')
        self.producer.send(self.topic,
                           key=self.key,
                           partition=self.partition,
                           value=self.publish_message)
        log.debug('producer.flush()')
        self.producer.flush()

    def consume(self):
        self.consumer.assign([self.topic_partition])
        log.debug('consumer.seek({0})'.format(self.start_offset))
        self.consumer.seek(self.topic_partition, self.start_offset)
        # self.consumer.resume()
        log.debug('consumer.poll(timeout_ms={0})'.format(self.timeout_ms))
        obj = self.consumer.poll(timeout_ms=self.timeout_ms)
        log.debug('msg object returned: %s', obj)
        msg = None
        try:
            for consumer_record in obj[self.topic_partition]:
                if consumer_record.key == self.key:
                    msg = consumer_record.value
                    break
        except KeyError:
            raise UnknownError('TopicPartition key was not found in response')
        if msg is None:
            raise UnknownError(
                "failed to find matching consumer record with key '{0}'".
                format(self.key))
        return msg
class WarriorKafkaConsumer():
    """
    This class contains all kafka consumer methods
    """
    def __init__(self, *topics, **configs):
        """
        Create Kafka Consumer object
        """
        print_info("creating kafka consumer")
        try:
            self.kafka_consumer = KafkaConsumer(*topics, **configs)
        except KafkaError as exc:
            print_error("Kafka consumer - Exception during connecting to broker - {}".format(exc))

    def subscribe_to_topics(self, topics, **kwargs):
        """
        Subscribe to list of specified topics.
        Arguments:
          topics(list): list of topic names to subscribe
          pattern(list): list of topic name patterns to subscribe
          listener(func): callback function
        Returns:
          result(bool) : False if exception occures, True otherwise
        """
        pattern = kwargs.get("pattern", None)
        listener = kwargs.get("listener", None)
        print_info("subscribe to topics {}".format(topics))
        try:
            self.kafka_consumer.subscribe(topics=topics,
                                          pattern=pattern,
                                          listener=listener)
            result = True
        except KafkaError as exc:
            print_error("Exception during subscribing to topics - {}".format(exc))
            result = False
        return result

    def unsubscribe_to_topics(self):
        """
        Unsubscribe to all topics.
        Arguments: None.
        Returns:
          result(bool) : False if exception occures, True otherwise
        """
        print_info("unsubscribe to all topics")
        try:
            self.kafka_consumer.unsubscribe()
            result = True
        except KafkaError as exc:
            print_error("Exception during unsubscibing to topics - {}".format(exc))
            result = False
        return result

    def assign_partitions(self, partitions):
        """
        Assign partitions to consumer.
        Arguments:
          partitions(list) : list of [topic, partition] lists
            example : [[topic1,1], [topic2,1]]
        Returns:
            None.
        """
        print_info("assigning partitions to consumer {}".format(partitions))
        topic_partitions = [TopicPartition(topic=tup[0], partition=tup[1]) for tup in partitions]
        try:
            self.kafka_consumer.assign(topic_partitions)
            result = True
        except KafkaError as exc:
            print_error("Exception during assiging partitions - {}".format(exc))
            result = False
        return result

    def seek_to_position(self, topic, partition, offset):
        """
        Seek to the given offset.
        Arguments:
          topic(str): topic name
          partition(int): partition number
          offset(int): offset number
        Returns:
          result(bool) : False if exception occures, True otherwise
        """
        print_info("seeking to position {}:{}:{}".format(topic, partition, offset))
        topic_partition = TopicPartition(topic=topic, partition=partition)
        try:
            self.kafka_consumer.seek(partition=topic_partition, offset=offset)
            result = True
        except KafkaError as exc:
            print_error("Exception during seek - {}".format(exc))
            result = False
        return result

    def get_messages(self, get_all_messages=False, **kwargs):
        """
        Get messages from consumer.
        Arguments:
          get_all_messages(bool): set this to True to get all the messages, seeks to the beginning.
                                   Defaults to False.
          timeout(int): timeout in milliseconds
          max_records(int): maximum messages to fetch
        Returns:
          messages(list): messages from the consumer
        """
        timeout_ms = kwargs.get("timeout", 0)
        max_records = kwargs.get("max_records", None)
        messages = []
        msg_pack = {}
        print_info("get messages published to subscribed topics")
        try:
            if get_all_messages:
                self.kafka_consumer.seek_to_beginning()
            msg_pack = self.kafka_consumer.poll(timeout_ms, max_records)
        except KafkaError as exc:
            print_error("Exception occured in get_messages - {}".format(exc))

        for topic, message_list in msg_pack.items():
            for message in message_list:
                messages.append(message.value)
        return messages

    def get_topics(self):
        """
        Get subscribed topics of the consumer.
        Arguments:
          None.
        Returns:
          topic_list(list of lists): list of [topic, partition] lists
            example : [[topic1,1], [topic2,2]]
        """
        print_info("get all the topics consumer is subscribed to")
        try:
            topic_partitions = self.kafka_consumer.assignment()
            topic_list = [[topic_partition.topic, topic_partition.partition] \
                       for topic_partition in topic_partitions]
        except KafkaError as exc:
            print_error("Exception during getting assigned partitions - {}".format(exc))
            topic_list = None
        return topic_list
Esempio n. 29
0
class ConsumerTimeStampWindow:
    def __init__(self,
                 broker_list,
                 group_name,
                 topic,
                 enable_auto_commit=True,
                 auto_offset_reset='latest'):
        self.topic = topic
        self.consumer = KafkaConsumer(group_id=group_name,
                                      bootstrap_servers=broker_list,
                                      enable_auto_commit=enable_auto_commit,
                                      auto_offset_reset=auto_offset_reset)

    def consumer_from_offset_window(self, process_msg, begin_time, end_time):
        begin_offset_dic, end_offset_dic = self.get_offset_time_window(
            begin_time, end_time)
        for topic_partition, offset_and_timestamp in begin_offset_dic.items():
            self.consumer.seek(topic_partition, offset_and_timestamp[0])

        self.consumer.subscribe(self.topic)
        self.consumer.poll(0)

        topic_partition_info = self.consumer.assignment()
        partition_consumer_finish_flag = dict(
            zip(topic_partition_info, [False] * len(topic_partition_info)))

        while True:
            if False not in partition_consumer_finish_flag.values():
                return
            consumer_records = self.consumer.poll(100)
            for partition_info, records in consumer_records.items():
                if partition_consumer_finish_flag[partition_info]:
                    print('-------------- {0} consumer finish --------------'.
                          format(partition_info))
                    break
                for record in records:
                    if record.offset <= end_offset_dic[partition_info][0]:
                        process_msg(record)
                    else:
                        partition_consumer_finish_flag[partition_info] = True

    def get_offset_time_window(self, begin_time, end_time):
        partitions_structs = []

        for partition_id in self.consumer.partitions_for_topic(self.topic):
            partitions_structs.append(TopicPartition(self.topic, partition_id))

        begin_search = {}
        for partition in partitions_structs:
            begin_search[partition] = begin_time if isinstance(
                begin_time, int) else self.__str_to_timestamp(begin_time)
        begin_offset = self.consumer.offsets_for_times(begin_search)

        end_search = {}
        for partition in partitions_structs:
            end_search[partition] = end_time if isinstance(
                end_time, int) else self.__str_to_timestamp(end_time)
        end_offset = self.consumer.offsets_for_times(end_search)

        for topic_partition, offset_and_timestamp in begin_offset.items():
            b_offset = 'null' if offset_and_timestamp is None else offset_and_timestamp[
                0]
            e_offset = 'null' if end_offset[
                topic_partition] is None else end_offset[topic_partition][0]
            print('Between {0} and {1}, {2} offset range = [{3}, {4}]'.format(
                begin_time, end_time, topic_partition, b_offset, e_offset))
        return begin_offset, end_offset

    @staticmethod
    def __str_to_timestamp(str_time, format_type='%Y-%m-%d %H:%M:%S'):
        time_array = time.strptime(str_time, format_type)
        return int(time.mktime(time_array)) * 1000
Esempio n. 30
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("topic")
    parser.add_argument("-H", "--host", type=str, help="Kafka server and port.",
                        default="localhost:9092")
    parser.add_argument("-r", "--replay", action="store_true",
                        help="Display all available log entries.",
                        default=False)
    parser.add_argument("-m", "--match", type=str, help="Initial match pattern.",
                        default=None)
    args = parser.parse_args()

    pattern = args.match

    if args.replay:
        auto_offset_reset = 'earliest'
    else:
        auto_offset_reset = 'latest'

    if args.topic[-5:] == '.json':
        value_deserializer = json_value_deserializer
    else:
        value_deserializer = None

    consumer = KafkaConsumer(args.topic,
                             group_id=None,
                             bootstrap_servers=args.host,
                             value_deserializer=value_deserializer,
                             auto_offset_reset=auto_offset_reset)

    while True:
        messages = consumer.poll(250)
        for tp in six.itervalues(messages):
            for message in tp:
                if isinstance(message.value, dict):
                    if message.value['klog_level'] in colors:
                        c = colors[message.value['klog_level']]
                    else:
                        c = attr(0)

                    params = {'topic': message.topic,
                              'offset': message.offset,
                              'level': message.value['klog_level'].upper()}
                    params['time'] = str(datetime.datetime.fromtimestamp(float(message.value['klog_time'])))

                    params['msg'] = message.value['klog_message']

                    if pattern and re.search(pattern, params['msg']) is not None:
                        c += match_color

                    msg = msg_format.format(**params)
                else:
                    c = attr(0)
                    msg = message.value

                print(c+msg+attr(0))

        po = select.poll()
        po.register(sys.stdin, select.POLLIN)
        if po.poll(0):
            ch = sys.stdin.read(1)
            if ch == 'm':
                pattern = sys.stdin.readline().rstrip('\n').encode('utf-8')
                pattern = pattern.rstrip('\n').encode('utf-8')
            elif ch == 'r':
                offset = sys.stdin.readline().rstrip('\n').encode('utf-8')
                offset = int(offset)
                for tp in consumer.assignment():
                    position = consumer.position(tp)
                    consumer.seek(tp, max(0, position-offset))
            elif ch == 'R':
                for tp in consumer.assignment():
                    consumer.seek_to_beginning(tp)
            elif ch == 'p':
                for tp in consumer.assignment():
                    consumer.pause(tp)
            elif ch == 'P':
                for tp in consumer.assignment():
                    consumer.resume(tp)
            elif ch == 'q':
                # FIXME: kafka currently (1.0.1) raises an exception on close
                #consumer.close()
                exit()
class LiveTable:
    """
    LiveTable uses event sourcing on a "KTable" topic to reconstitute
    a full table for taking "snapshots" as reports.

    The constructor requires a KafkaConsumer to read "table updates" from.

    The windowing on the table is configurable.
    A user defined function can be supplied/updated that uses attributes
    of the post to create a new, derived column.
    """
    def __init__(self,
                 input_topic_name: str,
                 bootstrap_servers: Sequence[str],
                 time_window_size=datetime.timedelta(days=3),
                 scoring_function=ScoringFunction()):
        """

        :param input_topic_name:
        :param bootstrap_servers:
        :param time_window_size:
        :param scoring_function:
        """
        self.producer = KafkaProducer(
            bootstrap_servers=bootstrap_servers,
            value_serializer=lambda x: json.dumps(x).encode('utf-8'))
        self.consumer = KafkaConsumer(
            input_topic_name,
            bootstrap_servers=list(bootstrap_servers),
            auto_offset_reset='earliest',
            enable_auto_commit=True,
            group_id=random.randint(0, 999999),
            value_deserializer=lambda x: json.loads(x.decode('utf-8')))
        self.scoring_function = scoring_function
        self.time_window_size = time_window_size
        self.rolling_events_processed = 0
        self.rolling_sum_ingest_latency = 0
        self.rolling_sum_click_latency = 0
        self.time_window_start = None
        self.time_window_start_epoch = None
        self.topic_partition = None
        self.__seek_to_window_start(
        )  #initializes time_window_start, time_window_start_epoch, and topic_partition
        self.posts = {}
        self.__bulk_consume_new_events()
        self.scoring_function_lock = threading.Lock()

    def update(self):
        """
        Plays updates to the table from kafka topic.
        Purges table entries that are past their expiration date.
        Enriches all entries by applying scoring function

        :return:
        """
        self.__garbage_collect_old()
        self.__bulk_consume_new_events()

    def get_snapshot(self):
        """

        :return: return a copy of current state of table
        """
        self.__apply_score()
        return self.posts.copy()

    def update_scoring_function(self, scoring_function: ScoringFunction):
        """
        Updates scoring function for this table.
        Applies scoring function on all current entries in table

        :param scoring_function:
        :return:
        """
        print("updating scoring fn")
        self.scoring_function_lock.acquire()
        self.scoring_function = scoring_function
        self.scoring_function_lock.release()
        print("updated")
        # self.__apply_score()

    def __apply_score(self):
        """
        Applies scoring function on all entries in table
        """
        self.scoring_function_lock.acquire()
        for key, json_dict in self.posts.items():
            json_dict['score'] = self.scoring_function.score(
                json_dict['PREVIEW'], json_dict['FULL_VIEW'])
            json_dict['coldness_score'] = self.scoring_function.coldness_score(
                json_dict['PREVIEW'])
            json_dict['hotness_score'] = self.scoring_function.hotness_score(
                json_dict['PREVIEW'], json_dict['FULL_VIEW'])
        self.scoring_function_lock.release()

    def __bulk_consume_new_events(self):
        """
        Reads kafka topic as an event source to reconstitute a "snapshot" of
        scores for all posts by replaying them into a dictionary.

        """
        # end_offset = self.consumer.end_offsets([self.topic_partition])[self.topic_partition] - 1
        end_offsets = {}
        partitions = {}
        topics_consumed = 0
        for p in self.assignments:
            partitions[p.partition] = p
            end_offsets[p.partition] = self.consumer.end_offsets([p])[p] - 1
            if self.consumer.committed(
                    p) is not None and self.consumer.committed(
                        p) >= end_offsets[p.partition]:
                topics_consumed += 1
            else:
                self.consumer.resume(p)

        if topics_consumed >= len(partitions):
            print("no new data")
            return

        print("updates in prog")

        for m in self.consumer:
            if m is not None and m.value[
                    'POST_TIMESTAMP'] > self.time_window_start_epoch:
                self.posts[m.value['PROPERTIES_SHOPPABLE_POST_ID']] = m.value
                # self.__track_latency(m)
            if m.offset >= end_offsets[m.partition]:
                self.consumer.pause(partitions[m.partition])
                topics_consumed += 1
                print(topics_consumed)
                if topics_consumed >= len(partitions):
                    break
        self.consumer.commit()

    def __track_latency(self, m):
        if 'LAST_CLICK_TIMESTAMP' not in m.value or 'INGEST_TIMESTAMP' not in m.value:
            return
        click_timestamp = m.value['LAST_CLICK_TIMESTAMP']
        ingest_timestamp = m.value['INGEST_TIMESTAMP']
        if click_timestamp is None or ingest_timestamp is None:
            return
        now = round(time.time() * 1000)
        self.rolling_events_processed += 1
        self.rolling_sum_ingest_latency += now - ingest_timestamp
        self.rolling_sum_click_latency += now - click_timestamp
        if self.rolling_events_processed >= 1000:
            metrics = {
                'average_latency_ingest':
                self.rolling_sum_ingest_latency /
                self.rolling_events_processed,
                'average_latency_click':
                self.rolling_sum_click_latency / self.rolling_events_processed
            }
            self.producer.send(topic="average_latency", value=metrics)
            #self.producer.flush()
            self.rolling_events_processed = 0
            self.rolling_sum_ingest_latency = 0
            self.rolling_sum_click_latency = 0
            #print("===================================")
            #print("         PUSH LATENCY METRICS      ")
            #print(metrics)
            #print("===================================")

    def __garbage_collect_old(self):
        """
        Removes all expired table entries

        """
        for post_id in list(self.posts.keys()):
            if self.posts[post_id][
                    'POST_TIMESTAMP'] < self.time_window_start_epoch:
                self.posts.pop(post_id)

    def __seek_to_window_start(self):
        """
        This function mutates the consumer to "seek" the kafka topic offset to that of the earliest event that
        is inside the time_window.
        """
        self.__update_time_window_start()
        if len(self.consumer.assignment()) == 0:
            # poll consumer to generate a topic partition assignment
            message = self.consumer.poll(1, 1)
            while len(message) == 0:
                message = self.consumer.poll(1, 1)
        self.topic_partition = self.consumer.assignment().pop()
        self.assignments = self.consumer.assignment()
        time_window_start_epoch = int(self.time_window_start.timestamp() *
                                      1000)

        # get first offset that is in the time window
        start_offset = self.consumer.offsets_for_times(
            {self.topic_partition:
             time_window_start_epoch})[self.topic_partition].offset
        # set the consumer to consume from this offset
        self.consumer.seek(self.topic_partition, start_offset)

    def __update_time_window_start(self):
        """
        Returns start of time window from now - self.time_window_size.
        """
        self.time_window_start = datetime.datetime.now(
        ) - self.time_window_size
        self.time_window_start_epoch = int(self.time_window_start.timestamp() *
                                           1000)
Esempio n. 32
0
import re
import time
import pandas as pd
import json
from kafka import KafkaConsumer, TopicPartition


datalist = []
i = 0

"""消费者(手动设置偏移量)"""
consumer = KafkaConsumer('phone-game-userinfo', bootstrap_servers=['172.23.11.150:9092'])
print (consumer.partitions_for_topic("phone-game-userinfo"))  # 获取phone-game-userinfo主题的分区信息
print (consumer.topics())  # 获取主题列表
print (consumer.subscription())  # 获取当前消费者订阅的主题
print (consumer.assignment())  # 获取当前消费者topic、分区信息
print (consumer.beginning_offsets(consumer.assignment()))  # 获取当前消费者可消费的偏移量
consumer.seek(TopicPartition(topic=u'phone-game-userinfo', partition=0), 202025)  # 重置偏移量,从第50个偏移量消费
print(consumer.end_offsets(consumer.assignment()))    # Get the last offset for the given partitions
print(consumer.end_offsets([TopicPartition(topic='phone-game-userinfo', partition=0)])) # 同上一句等价
t= '2018-05-10'
timeArray =time.strptime(t,'%Y-%m-%d')
timeStamp=int(time.mktime(timeArray))
print(consumer.offsets_for_times({TopicPartition(topic='phone-game-userinfo', partition=0):timeStamp}))
for message in consumer:
    print ("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
                                          message.offset, message.key,
                                          message.value.decode('utf-8')))
#     # print (message.value.decode('utf-8'))
#     # print (message.offset)
#     data = message.value.split(',')
Esempio n. 33
0
class CheckKafka(PubSubNagiosPlugin):

    def __init__(self):
        # Python 2.x
        super(CheckKafka, self).__init__()
        # Python 3.x
        # super().__init__()
        self.name = 'Kafka'
        self.default_port = 9092
        self.producer = None
        self.consumer = None
        self.topic = None
        self.client_id = 'Hari Sekhon ' + os.path.basename(get_topfile()) + ' ' + __version__
        self.group_id = self.client_id + ' ' + str(os.getpid()) + ' ' + random_alnum(10)
        self.acks = '1'
        self.retries = 0
        self.partition = None
        self.topic_partition = None
        self.brokers = None
        self.timeout_ms = None
        self.start_offset = None

    def add_options(self):
        # super(CheckKafka, self).add_options()
        # TODO: (host_envs, default_host) = getenvs2('HOST', default_host, name)
        # TODO: env support for Kafka brokers
        self.add_opt('-H', '--host', \
                     '-B', '--brokers', \
                     dest='brokers', metavar='broker_list', default='localhost:9092',
                     help='Kafka Broker seed list in form host[:port],host2[:port2]... (default: localhost:9092)')
        self.add_opt('-T', '--topic', help='Kafka Topic')
        self.add_opt('-p', '--partition', type=int, help='Kafka Partition (default: 0)', default=0)
        self.add_opt('-a', '--acks', default=1, choices=['1', 'all'],
                     help='Acks to require from Kafka. Valid options are \'1\' for Kafka ' +
                     'partition leader, or \'all\' for all In-Sync Replicas (may block causing ' +
                     'timeout if replicas aren\'t available, default: 1)')
        self.add_opt('-s', '--sleep', metavar='secs',
                     help='Sleep in seconds between producing and consuming from given topic (default: 0.5)')
        self.add_opt('--list-topics', action='store_true', help='List Kafka topics from broker(s) and exit')
        self.add_opt('--list-partitions', action='store_true',
                     help='List Kafka topic paritions from broker(s) and exit')
        self.add_thresholds(default_warning=1, default_critical=2)

    def run(self):
        try:
            super(CheckKafka, self).run()
        #except KafkaError as _:
            #raise CriticalError(_)
        except KafkaError:
            err = self.exception_msg()
            if 'NoBrokersAvailable' in err:
                err += ' ({0})'.format(self.brokers)
            raise CriticalError(err)

    @staticmethod
    def exception_msg():
        return traceback.format_exc().split('\n')[-2]

    def get_topics(self):
        self.consumer = KafkaConsumer(
            bootstrap_servers=self.brokers,
            client_id=self.client_id,
            request_timeout_ms=self.timeout_ms
            )
        return self.consumer.topics()

    def print_topics(self):
        print('Kafka Topics:\n')
        for topic in self.get_topics():
            print(topic)

    def get_topic_partitions(self, topic):
        self.consumer = KafkaConsumer(
            topic,
            bootstrap_servers=self.brokers,
            client_id=self.client_id,
            request_timeout_ms=self.timeout_ms
            )
        if topic not in self.get_topics():
            raise CriticalError("topic '{0}' does not exist on Kafka broker".format(topic))
        partitions = self.consumer.partitions_for_topic(topic)
        assert isSet(partitions)
        return partitions

    def print_topic_partitions(self, topic):
        print('Kafka topic \'{0}\' partitions:\n'.format(topic))
        #for partition in self.get_topic_partitions(topic):
        #    print(partition)
        print(list(self.get_topic_partitions(topic)))
        print()

    def process_args(self):
        self.brokers = self.get_opt('brokers')
        # TODO: add broker list validation back in
        # validate_hostport(self.brokers)
        log_option('brokers', self.brokers)
        self.timeout_ms = max((self.timeout * 1000 - 1000) / 2, 1000)

        try:
            list_topics = self.get_opt('list_topics')
            list_partitions = self.get_opt('list_partitions')
            if list_topics:
                self.print_topics()
                sys.exit(ERRORS['UNKNOWN'])
            self.topic = self.get_opt('topic')
        except KafkaError:
            raise CriticalError(self.exception_msg())

        if self.topic:
            validate_chars(self.topic, 'topic', 'A-Za-z-')
        elif list_topics or list_partitions:
            pass
        else:
            self.usage('--topic not specified')

        try:
            if list_partitions:
                if self.topic:
                    self.print_topic_partitions(self.topic)
                else:
                    for topic in self.get_topics():
                        self.print_topic_partitions(topic)
                sys.exit(ERRORS['UNKNOWN'])
        except KafkaError:
            raise CriticalError(self.exception_msg())

        self.partition = self.get_opt('partition')
        # technically optional, will hash to a random partition, but need to know which partition to get offset
        # if self.partition is not None:
        validate_int(self.partition, "partition", 0, 10000)
        self.topic_partition = TopicPartition(self.topic, self.partition)
        self.acks = self.get_opt('acks')
        try:
            self.acks = int(self.acks)
        except ValueError:
            pass
        log_option('acks', self.acks)
        self.validate_thresholds()

    def subscribe(self):
        self.consumer = KafkaConsumer(
            #self.topic,
            bootstrap_servers=self.brokers,
            # client_id=self.client_id,
            # group_id=self.group_id,
            request_timeout_ms=self.timeout_ms
            )
            #key_serializer
            #value_serializer
        # this is only a guess as Kafka doesn't expose it's API version
        #log.debug('kafka api version: %s', self.consumer.config['api_version'])
        log.debug('partition assignments: {0}'.format(self.consumer.assignment()))

        # log.debug('subscribing to topic \'{0}\' parition \'{1}\''.format(self.topic, self.partition))
        # self.consumer.subscribe(TopicPartition(self.topic, self.partition))
        # log.debug('partition assignments: {0}'.format(self.consumer.assignment()))

        log.debug('assigning partition {0} to consumer'.format(self.partition))
        # self.consumer.assign([self.partition])
        self.consumer.assign([self.topic_partition])
        log.debug('partition assignments: {0}'.format(self.consumer.assignment()))

        log.debug('getting current offset')
        # see also highwater, committed, seek_to_end
        self.start_offset = self.consumer.position(self.topic_partition)
        if self.start_offset is None:
            # don't do this, I've seen scenario where None is returned and all messages are read again, better to fail
            # log.warn('consumer position returned None, resetting to zero')
            # self.start_offset = 0
            raise UnknownError('Kafka Consumer reported current starting offset = {0}'.format(self.start_offset))
        log.debug('recorded starting offset \'{0}\''.format(self.start_offset))
        # self.consumer.pause()

    def publish(self):
        log.debug('creating producer')
        self.producer = KafkaProducer(
            bootstrap_servers=self.brokers,
            client_id=self.client_id,
            acks=self.acks,
            batch_size=0,
            max_block_ms=self.timeout_ms,
            request_timeout_ms=self.timeout_ms
            )
            #key_serializer
            #value_serializer
        log.debug('producer.send()')
        self.producer.send(
            self.topic,
            key=self.key,
            partition=self.partition,
            value=self.publish_message
            )
        log.debug('producer.flush()')
        self.producer.flush()

    def consume(self):
        self.consumer.assign([self.topic_partition])
        log.debug('consumer.seek({0})'.format(self.start_offset))
        self.consumer.seek(self.topic_partition, self.start_offset)
        # self.consumer.resume()
        log.debug('consumer.poll(timeout_ms={0})'.format(self.timeout_ms))
        obj = self.consumer.poll(timeout_ms=self.timeout_ms)
        log.debug('msg object returned: %s', obj)
        msg = None
        try:
            for consumer_record in obj[self.topic_partition]:
                if consumer_record.key == self.key:
                    msg = consumer_record.value
                    break
        except KeyError:
            raise UnknownError('TopicPartition key was not found in response')
        if msg is None:
            raise UnknownError("failed to find matching consumer record with key '{0}'".format(self.key))
        return msg