예제 #1
0
def consume_topic(topic, group, output_dir, frequency):
    global timestamp, tempfile_path, tempfile
    print "Consumer Loading topic '%s' in consumer group %s into %s..." % (topic, group, output_dir)
    timestamp = standardized_timestamp(frequency)
    kafka_consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=1310720000)

    #open file for writing
    tempfile_path = "/tmp/kafka_%s_%s_%s_%s.txt" % (topic, group, timestamp, batch_counter)
    tempfile = open(tempfile_path, "w")
    #log_has_at_least_one = False #did we log at least one entry?
    while True:
        # get 1000 messages at a time, non blocking
        messages = kafka_consumer.get_messages(count=100, block=False)
        if not messages:
            #print "no messages to read"
            continue   # If no messages are received, wait until there are more
        for message in messages:
            #log_has_at_least_one = True
            #print(message.message.value)
            #tempfile.write(message.message.value + "\n")    # lose the '\n'?
            tempfile.write(message.message.value)
        if tempfile.tell() > 120000000:  # file size > 120MB
            print "Note: file is large enough to write to hdfs. Writing now..."
            flush_to_hdfs(output_dir, topic)
        kafka_consumer.commit()  # inform zookeeper of position in the kafka queue
def consume_topic(topic, group, output_dir, frequency):
    global timestamp, tempfile_path, tempfile
    print "Consumer Loading topic '%s' in consumer group %s into %s..." % (
        topic, group, output_dir)
    timestamp = standardized_timestamp(frequency)
    kafka_consumer = SimpleConsumer(kafka,
                                    group,
                                    topic,
                                    max_buffer_size=1310720000)

    #open file for writing
    tempfile_path = "/tmp/kafka_%s_%s_%s_%s.txt" % (topic, group, timestamp,
                                                    batch_counter)
    tempfile = open(tempfile_path, "w")
    #log_has_at_least_one = False #did we log at least one entry?
    while True:
        # get 1000 messages at a time, non blocking
        messages = kafka_consumer.get_messages(count=100, block=False)
        if not messages:
            #print "no messages to read"
            continue  # If no messages are received, wait until there are more
        for message in messages:
            #log_has_at_least_one = True
            print(message.message.value)
            #tempfile.write(message.message.value + "\n")    # lose the '\n'?
            tempfile.write(message.message.value)
            tempfile.write("\n")
        if tempfile.tell() > 12000:  # file size > 120MB
            print "Note: file is large enough to write to hdfs. Writing now..."
            flush_to_hdfs(output_dir, topic)
        kafka_consumer.commit(
        )  # inform zookeeper of position in the kafka queue
def consume_topic(topic, group, output_dir, frequency):
    global timestamp, tempfile_path, tempfile
    print "Consuming from topic '%s' in consumer group %s into %s..." % (topic, group, output_dir)

    #get timestamp
    timestamp = standardized_timestamp(frequency)
    kafka_consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=1310720000)
    
    #open file for writing
    tempfile_path = "/tmp/kafka_stockTwits_%s_%s_%s_%s.dat" % (topic, group, timestamp, batch_counter)
    tempfile = open(tempfile_path,"w")
    log_has_at_least_one = False #did we log at least one entry?
    while True:
        messages = kafka_consumer.get_messages(count=1000, block=False) #get 5000 messages at a time, non blocking
        if not messages:
	       os.system("sleep 300s") # sleep 5mins
	       continue
           
        for message in messages: #OffsetAndMessage(offset=43, message=Message(magic=0, attributes=0, key=None, value='some message'))
            log_has_at_least_one = True
            #print(message.message.value)
            tempfile.write(message.message.value + "\n")
        if tempfile.tell() > 10000000: #10000000: #file size > 10MB
            flush_to_hdfs(output_dir, topic)
        kafka_consumer.commit() #save position in the kafka queue
    #exit loop
    if log_has_at_least_one:
        flush_to_hdfs(output_dir, topic)
    kafka_consumer.commit() #save position in the kafka queue
    return 0
def consume_topic(topic, group, output_dir, frequency):
    global timestamp, tempfile_path, tempfile
    print "Consuming from topic '%s' in consumer group %s into %s..." % (
        topic, group, output_dir)
    #get timestamp
    kafka_consumer = SimpleConsumer(kafka,
                                    group,
                                    topic,
                                    max_buffer_size=1310720000)

    while True:
        messages = kafka_consumer.get_messages(
            count=1000,
            block=False)  #get 5000 messages at a time, non blocking
        if not messages:
            os.system("sleep 30s")
        continue
        #break
        for message in messages:  #OffsetAndMessage(offset=43, message=Message(magic=0, attributes=0, key=None, value='some message'))
            print message
        kafka_consumer.commit()  #save position in the kafka queue
    #exit loop
    if log_has_at_least_one:
        flush_to_hdfs(output_dir, topic)
    kafka_consumer.commit()  #save position in the kafka queue
    return 0
예제 #5
0
def consume_topic(topic, group, output_dir, frequency):
    global timestamp, tempfile_path, tempfile
    print "Consuming from topic '%s' in consumer group %s into %s..." % (
        topic, group, output_dir)
    #get timestamp
    timestamp = standardized_timestamp(frequency)
    kafka_consumer = SimpleConsumer(kafka,
                                    group,
                                    topic,
                                    max_buffer_size=1310720000)

    #open file for writing
    tempfile_path = "/tmp/kafka_%s_%s_%s_%s.dat" % (topic, group, timestamp,
                                                    batch_counter)
    tempfile = open(tempfile_path, "w")
    log_has_at_least_one = False  #did we log at least one entry?
    while True:
        messages = kafka_consumer.get_messages(
            count=1000,
            block=False)  #get 1000 messages at a time, non blocking
        if not messages:
            break
        for message in messages:  #OffsetAndMessage(offset=43, message=Message(magic=0, attributes=0, key=None, value='some message'))
            log_has_at_least_one = True
            #print(message.message.value)
            tempfile.write(message.message.value + "\n")
        if tempfile.tell() > 10000000:  #file size > 10MB
            flush_to_hdfs(output_dir, topic)
        kafka_consumer.commit()
    #exit loop
    if log_has_at_least_one:
        flush_to_hdfs(output_dir, topic)
    kafka_consumer.commit()  #save position in the kafka queue
    return 0
예제 #6
0
        def consume_topic(callback_url, consumer_group, topic):
            consumer = None
            try:
                consumer = SimpleConsumer(self.kafka,
                                          consumer_group,
                                          topic,
                                          auto_commit=False)
                messages_read = 0

                # we can't read messages infinitely here as we have
                # a lot of topics/subscribers (much more than threadpool size)
                while messages_read < self.max_read_messages_per_cycle:

                    # get one message and monitor the time
                    start = monitoring.start_time_measure()
                    message = consumer.get_message(block=False)
                    ms_elapsed = monitoring.stop_time_measure(start)
                    self.metrics['kafka_read'].add({'topic': topic},
                                                   ms_elapsed)

                    # if we don't have messages for this topic/subscriber - quit and give chance to others
                    if message is None:
                        logging.info(
                            'No messages for topic: %s and callback: %s, quiting the thread',
                            topic, callback_url)
                        break

                    try:
                        event = json.loads(
                            message.message.value.decode('utf-8'))
                        response_status = self.forward_event(
                            callback_url, event, topic)

                        # if status is success - mark message as consumed by this subscriber
                        if 200 <= response_status < 300:
                            consumer.commit()
                        else:
                            logging.info(
                                'Received error response fro consumer: %s',
                                response_status)
                    except:
                        logging.error(
                            "Exception while sending event to consumer")
                        logging.error(traceback.format_exc())
                    finally:
                        messages_read += 1
                return messages_read

            except UnknownTopicOrPartitionError:
                logging.error('Adding %s to skip list', topic)
            except:
                logging.exception('failed to create kafka client')
            finally:
                if consumer is not None:
                    consumer.stop()
예제 #7
0
    def consume_topic(self, topic, group, temp_dir):
        '''
        This function receive messages from Friendsquare topic then save it to a temporary
        file: temp_dir, then transfer the file to hdfs.
        Create a kafka receiver to grap messages
        '''

        kafka_receiver = SimpleConsumer(kafka,
                                        group,
                                        topic,
                                        max_buffer_size=1310720000)

        # Create a temp file to store messages
        self.temp_file_path = "%s/%s.txt" % (temp_dir, str(self.count))

        temp_file = open(self.temp_file_path, 'w')

        hdfs_output_dir = "%s/%s" % (self.hdfs_dir, topic)

        # Create a hdfs directory to store output files
        os.system("hdfs dfs -mkdir -p %s" % hdfs_output_dir)

        while self.count < self.max_count:

            # Get 1000 messages each time
            messages = kafka_receiver.get_messages(count=1000, block=False)

            if not messages:
                continue

            # Write the messages to a file, one message per line
            for message in messages:
                temp_file.write(message.message.value + '\n')

            # Set each file size at 20 M
            if temp_file.tell() > 20000000:
                temp_file.close()

                # Put the file to hdfs
                hdfs_path = "%s/%s.txt" % (hdfs_output_dir, self.count)
                os.system("hdfs dfs -put -f %s %s" %
                          (self.temp_file_path, hdfs_path))

                #remove the old file
                os.remove(self.temp_file_path)

                #  Create a new temp file to store messages
                self.count += 1
                self.temp_file_path = "%s/%s.txt" % (temp_dir, str(self.count))
                temp_file = open(self.temp_file_path, 'w')

            # Inform zookeeper of position in the kafka queue
            kafka_receiver.commit()

        temp_file.close()
    def consume_topic(self, topic, group, temp_dir):
        '''
        This function receive messages from Kafka then save it to a temporary
        first, then transfer the file to hdfs.
        '''
        # Create a kafka receiver to grap messages
        kafka_receiver = SimpleConsumer(kafka,
                                        group,
                                        topic,
                                        max_buffer_size=1310720000)

        self.timestamp = self.getTimestamp()
        # Create a temp file to store messages
        self.temp_file_path = "%s/%s_%s.txt" % (temp_dir, self.timestamp,
                                                str(self.count))

        temp_file = open(self.temp_file_path, 'w')

        while self.count < self.max_count:
            # Get 100 messages each time
            messages = kafka_receiver.get_messages(count=100, block=False)
            if not messages:
                continue

            # Write the messages to a file, one message per line
            for message in messages:
                temp_file.write(message.message.value + '\n')

            # For structured streaming, files need to be small at this point, set the size at 2 M
            if temp_file.tell() > 2000000:
                temp_file.close()

                # Copy the file to hdfs
                output_dir = "%s/%s" % (self.hdfs_dir, topic)
                os.system("hdfs dfs -mkdir %s" % output_dir)
                hdfs_path = "%s/%s_%s.txt" % (output_dir, self.timestamp,
                                              self.count)
                os.system("hdfs dfs -put -f %s %s" %
                          (self.temp_file_path, hdfs_path))

                #remove the old file
                os.remove(self.temp_file_path)

                #  Create a new temp file to store messages
                self.count += 1
                self.timestamp = self.getTimestamp()
                self.temp_file_path = "%s/%s_%s.txt" % (
                    temp_dir, self.timestamp, str(self.count))
                temp_file = open(self.temp_file_path, 'w')

            # Inform zookeeper of position in the kafka queue
            kafka_receiver.commit()

        temp_file.close()
예제 #9
0
def consume_save(group,topic):
	tmp_save=open(tmp_file_path,"w")
	kafka_consumer=SimpleConsumer(kafka,group,topic)
	messages= kafka_consumer.get_messages(count=1000, block=False)
	if not messages:
		print "Consumer didn't read any messages"
	for message in messages:
		tmp_save.write( message.message.value+"\n")
#		print message.message.value+"\n"
	kafka_consumer.commit() # inform zookeeper of position in the kafka queu
	print ".... ... .. .."
	print "Message from topic \"%s\" consumed \n" % topic
예제 #10
0
def consume_save(group,topic):
	i=0
	tmp_save=open(tmp_file_path,"w")
	while True:
		kafka_consumer=SimpleConsumer(kafka,group,topic)
		messages= kafka_consumer.get_messages(count=1000, block=False)
#		if not messages:
#			print "Consumer didn't read any messages"
		for message in messages:
			tmp_save.write( message.message.value+"\n")
			print message.message.value+"\n"
		# file size > 20MB
                if tmp_save.tell() > 20000000:
                    push_to_hdfs(tmp_file_path)
		kafka_consumer.commit() # inform zookeeper of position in the kafka queu
예제 #11
0
    def test_simple_consumer_commit_does_not_raise(self):
        client = MagicMock()
        client.get_partition_ids_for_topic.return_value = [0, 1]

        def mock_offset_fetch_request(group, payloads, **kwargs):
            return [
                OffsetFetchResponsePayload(p.topic, p.partition, 0, b'', 0)
                for p in payloads
            ]

        client.send_offset_fetch_request.side_effect = mock_offset_fetch_request

        def mock_offset_commit_request(group, payloads, **kwargs):
            raise FailedPayloadsError(payloads[0])

        client.send_offset_commit_request.side_effect = mock_offset_commit_request

        consumer = SimpleConsumer(client,
                                  group='foobar',
                                  topic='topic',
                                  partitions=[0, 1],
                                  auto_commit=False)

        # Mock internal commit check
        consumer.count_since_commit = 10

        # This should not raise an exception
        self.assertFalse(consumer.commit(partitions=[0, 1]))
예제 #12
0
class KafkaSpout(Spout):

	def initialize(self, stormconf, context):
		# self.words = itertools.cycle(['dog', 'cat',
		# 								'zebra', 'elephant'])
		self.kafka = KafkaClient("cloud.soumet.com:9092")
		self.consumer = SimpleConsumer(self.kafka, "storm", "realtime", max_buffer_size=1310720000)
		



	def next_tuple(self):
		for message in self.consumer.get_messages(count=500, block=False):#, timeout=1):
			#transaction_data = TransactionFull()
			#transaction_data.ParseFromString(base64.b64decode(message.message.value))
			#self.emit([transaction_data])
			self.emit([message.message.value])
		self.consumer.commit()
예제 #13
0
        def consume_topic(callback_url, consumer_group, topic):
            consumer = None
            try:
                consumer = SimpleConsumer(self.kafka, consumer_group, topic, auto_commit=False)
                messages_read = 0

                # we can't read messages infinitely here as we have
                # a lot of topics/subscribers (much more than threadpool size)
                while messages_read < self.max_read_messages_per_cycle:

                    # get one message and monitor the time
                    start = monitoring.start_time_measure()
                    message = consumer.get_message(block=False)
                    ms_elapsed = monitoring.stop_time_measure(start)
                    self.metrics['kafka_read'].add({'topic': topic}, ms_elapsed)

                    # if we don't have messages for this topic/subscriber - quit and give chance to others
                    if message is None:
                        logging.info('No messages for topic: %s and callback: %s, quiting the thread', topic, callback_url)
                        break

                    try:
                        event = json.loads(message.message.value.decode('utf-8'))
                        response_status = self.forward_event(callback_url, event, topic)

                        # if status is success - mark message as consumed by this subscriber
                        if 200 <= response_status < 300:
                            consumer.commit()
                        else:
                            logging.info('Received error response fro consumer: %s', response_status)
                    except:
                        logging.error("Exception while sending event to consumer")
                        logging.error(traceback.format_exc())
                    finally:
                        messages_read += 1
                return messages_read

            except UnknownTopicOrPartitionError:
                logging.error('Adding %s to skip list', topic)
            except:
                logging.exception('failed to create kafka client')
            finally:
                if consumer is not None:
                    consumer.stop()
예제 #14
0
class QueueKafka(QueueBase.QueueBase):
    @QueueBase.catch
    def __init__(self, name, host='web14', port=51092, **kwargs):
        QueueBase.QueueBase.__init__(self, name, host, port)
        self.__queue = []
        self.__kafka = KafkaClient('%s:%d' % (host, port))
        self.__producer = SimpleProducer(self.__kafka, async=kwargs.get('async', False))
        self.__producer.client.ensure_topic_exists(self.name)
        self.__consumer = SimpleConsumer(self.__kafka, self.name + '_consumer', self.name, auto_commit_every_n=1)

    def __del__(self):
        if self.__kafka:
            [self.put(x.message.value) for x in self.__queue]
            self.__kafka.close()

    @QueueBase.catch
    def put(self, value, *args, **kwargs):
        if isinstance(value, dict) or isinstance(value, list):
            self.__producer.send_messages(self.name, json.dumps(value))
        else:
            self.__producer.send_messages(self.name, value.encode('utf-8') if isinstance(value, unicode) else value)

    @QueueBase.catch
    def get(self, *args, **kwargs):
        if not self.__queue:
            self.__consumer._fetch()
            kq = self.__consumer.queue
            while not kq.empty():
                partition, result = kq.get_nowait()
                self.__queue.append(result)
                self.__consumer.offsets[partition] += 1
                self.__consumer.count_since_commit += 1
            self.__consumer.queue = Queue()
            self.__consumer.commit()
        return self.__queue.pop().message.value if self.__queue else None

    @QueueBase.catch
    def size(self, *args, **kwargs):
        count = 0
        for k, v in self.__consumer.offsets.items():
            reqs = [common.OffsetRequest(self.name, k, -1, 1)]
            (resp, ) = self.__consumer.client.send_offset_request(reqs)
            count += (resp.offsets[0] - v)
        return count + len(self.__queue)
def consume_topic(topic, group, output_dir, frequency):
    global timestamp, tempfile_path, tempfile
    print "Consuming from topic '%s' in consumer group %s into %s..." % (topic, group, output_dir)
    #get timestamp
    kafka_consumer = SimpleConsumer(kafka, group, topic, max_buffer_size=1310720000)
    
    while True:
        messages = kafka_consumer.get_messages(count=1000, block=False) #get 5000 messages at a time, non blocking
        if not messages:
	       os.system("sleep 30s")
	continue
            #break
        for message in messages: #OffsetAndMessage(offset=43, message=Message(magic=0, attributes=0, key=None, value='some message'))
            print message
        kafka_consumer.commit() #save position in the kafka queue
    #exit loop
    if log_has_at_least_one:
        flush_to_hdfs(output_dir, topic)
    kafka_consumer.commit() #save position in the kafka queue
    return 0
예제 #16
0
    def test_simple_consumer_commit_does_not_raise(self):
        client = MagicMock()
        client.get_partition_ids_for_topic.return_value = [0, 1]

        def mock_offset_fetch_request(group, payloads, **kwargs):
            return [OffsetFetchResponsePayload(p.topic, p.partition, 0, b'', 0) for p in payloads]

        client.send_offset_fetch_request.side_effect = mock_offset_fetch_request

        def mock_offset_commit_request(group, payloads, **kwargs):
            raise FailedPayloadsError(payloads[0])

        client.send_offset_commit_request.side_effect = mock_offset_commit_request

        consumer = SimpleConsumer(client, group='foobar',
                                  topic='topic', partitions=[0, 1],
                                  auto_commit=False)

        # Mock internal commit check
        consumer.count_since_commit = 10

        # This should not raise an exception
        self.assertFalse(consumer.commit(partitions=[0, 1]))
    def _run(self):
        pcount = 0
        while True:
            try:
                self._logger.info("New KafkaClient %d" % self._partition)
                kafka = KafkaClient(self._brokers, str(os.getpid()))
                try:
                    consumer = SimpleConsumer(kafka,
                                              self._group,
                                              self._topic,
                                              buffer_size=4096 * 4,
                                              max_buffer_size=4096 * 32)
                    #except:
                except Exception as ex:
                    template = "Consumer Failure {0} occured. Arguments:\n{1!r}"
                    messag = template.format(type(ex).__name__, ex.args)
                    self._logger.info("%s" % messag)
                    raise gevent.GreenletExit

                self._logger.info("Starting %d" % self._partition)

                # Find the offset of the last message that has been queued
                consumer.seek(0, 2)
                try:
                    mi = consumer.get_message(timeout=0.1)
                except common.OffsetOutOfRangeError:
                    mi = None
                #import pdb; pdb.set_trace()
                self._logger.info("Last Queued for %d is %s" % \
                                  (self._partition,str(mi)))

                # start reading from last previously processed message
                consumer.seek(0, 1)

                if mi != None:
                    count = 0
                    self._logger.info("Catching Up %d" % self._partition)
                    loff = mi.offset
                    coff = 0
                    while True:
                        try:
                            mm = consumer.get_message(timeout=None)
                            count += 1
                            if not self.msg_handler(mm):
                                self._logger.info("%d could not process %s" %
                                                  (self._partition, str(mm)))
                                raise gevent.GreenletExit
                            consumer.commit()
                            coff = mm.offset
                            self._logger.info("Syncing offset %d" % coff)
                            if coff == loff:
                                break
                        except Exception as ex:
                            self._logger.info("Sync Error %s" % str(ex))
                            break
                    if coff != loff:
                        self._logger.info("Sync Failed for %d count %d" %
                                          (self._partition, count))
                        continue
                    else:
                        self._logger.info("Sync Completed for %d count %d" %
                                          (self._partition, count))

                if self._limit:
                    raise gevent.GreenletExit

                while True:
                    try:
                        mm = consumer.get_message(timeout=None)
                        if mm is None:
                            continue
                        consumer.commit()
                        pcount += 1
                        if not self.msg_handler(mm):
                            self._logger.info("%d could not handle %s" %
                                              (self._partition, str(mm)))
                            raise gevent.GreenletExit
                    except TypeError:
                        gevent.sleep(0.1)
                    except common.FailedPayloadsError as ex:
                        self._logger.info("Payload Error: %s" % str(ex.args))
                        gevent.sleep(0.1)
            except gevent.GreenletExit:
                break
            except Exception as ex:
                template = "An exception of type {0} occured. Arguments:\n{1!r}"
                messag = template.format(type(ex).__name__, ex.args)
                self._logger.info("%s" % messag)
                gevent.sleep(1)
        self._logger.info("Stopping %d pcount %d" % (self._partition, pcount))
        return self._partoffset, self._partdb
예제 #18
0
from __future__ import absolute_import, print_function  #, unicode_literals

import itertools
from streamparse.spout import Spout

import base64
import sys

from kafka import KafkaClient, SimpleProducer, SimpleConsumer
#from kafka.client import KafkaClient
#from kafka.consumer import SimpleConsumer

kafka = KafkaClient("cloud.soumet.com:9092")
kafka_consumer = SimpleConsumer(
    kafka, "storm", "realtime",
    max_buffer_size=1310720000)  #, max_buffer_size=1310720000)

for message in kafka_consumer.get_messages(
        count=5000, block=False):  #, block=True, timeout=4):
    print(message.message.value)

kafka_consumer.commit()
예제 #19
0
    def _run(self):
        pcount = 0
        while True:
            try:
                self._logger.error("New KafkaClient %d" % self._partition)
                self._kfk = KafkaClient(self._brokers, str(os.getpid()))
                try:
                    consumer = SimpleConsumer(self._kfk,
                                              self._group,
                                              self._topic,
                                              buffer_size=4096 * 4,
                                              max_buffer_size=4096 * 32)
                    #except:
                except Exception as ex:
                    template = "Consumer Failure {0} occured. Arguments:\n{1!r}"
                    messag = template.format(type(ex).__name__, ex.args)
                    self._logger.info("%s" % messag)
                    raise RuntimeError(messag)

                self._logger.error("Starting %d" % self._partition)

                # Find the offset of the last message that has been queued
                consumer.seek(0, 2)
                try:
                    mi = consumer.get_message(timeout=0.1)
                    consumer.commit()
                except common.OffsetOutOfRangeError:
                    mi = None
                #import pdb; pdb.set_trace()
                self._logger.info("Last Queued for %d is %s" % \
                                  (self._partition,str(mi)))

                # start reading from last previously processed message
                if mi != None:
                    consumer.seek(0, 1)
                else:
                    consumer.seek(0, 0)

                if self._limit:
                    raise gevent.GreenletExit

                while True:
                    try:
                        self.resource_check()
                        mlist = consumer.get_messages(10, timeout=0.2)
                        for mm in mlist:
                            if mm is None:
                                continue
                            self._logger.debug("%d Reading offset %d" % \
                                    (self._partition, mm.offset))
                            consumer.commit()
                            pcount += 1
                            if not self.msg_handler(mm):
                                self._logger.info("%d could not handle %s" %
                                                  (self._partition, str(mm)))
                                raise gevent.GreenletExit
                    except TypeError as ex:
                        self._logger.error("Type Error: %s trace %s" % \
                                (str(ex.args), traceback.format_exc()))
                        gevent.sleep(0.1)
                    except common.FailedPayloadsError as ex:
                        self._logger.error("Payload Error: %s" % str(ex.args))
                        gevent.sleep(0.1)
            except gevent.GreenletExit:
                break
            except AssertionError as ex:
                self._partoffset = ex
                break
            except Exception as ex:
                template = "An exception of type {0} occured. Arguments:\n{1!r}"
                messag = template.format(type(ex).__name__, ex.args)
                self._logger.error("%s : traceback %s" % \
                                  (messag, traceback.format_exc()))
                self.stop_partition()
                gevent.sleep(2)

        partdb = {}
        for coll in self._uvedb.keys():
            partdb[coll] = {}
            for gen in self._uvedb[coll].keys():
                partdb[coll][gen] = {}
                for tab in self._uvedb[coll][gen].keys():
                    for rkey in self._uvedb[coll][gen][tab].keys():
                        uk = tab + ":" + rkey
                        partdb[coll][gen][uk] = \
                            set(self._uvedb[coll][gen][tab][rkey].keys())

        self._logger.error("Stopping %d pcount %d" % (self._partition, pcount))
        self.stop_partition()
        return self._partoffset, partdb
예제 #20
0
                        ph.start()
                        workers[int(mm.key)] = ph
                elif mm.value == "stop":
                    #import pdb; pdb.set_trace()
                    if workers.has_key(int(mm.key)):
                        ph = workers[int(mm.key)]
                        gevent.kill(ph)
                        res, db = ph.get()
                        print "Returned " + str(res)
                        print "State :"
                        for k, v in db.iteritems():
                            print "%s -> %s" % (k, str(v))
                        del workers[int(mm.key)]
                else:
                    end_ready = True
                    cons.commit()
                    gevent.sleep(2)
                    break
        except TypeError:
            gevent.sleep(0.1)
        except common.FailedPayloadsError as ex:
            print "Payload Error: " + str(ex.args)
            gevent.sleep(0.1)
    lw = []
    for key, value in workers.iteritems():
        gevent.kill(value)
        lw.append(value)

    gevent.joinall(lw)
    print "Ending Consumers"
    def _run(self):
	pcount = 0
        pause = False
        while True:
            try:
                if pause:
                    gevent.sleep(2)
                    pause = False
                self._logger.error("New KafkaClient %s" % self._topic)
                self._kfk = KafkaClient(self._brokers , "kc-" + self._topic)
                try:
                    consumer = SimpleConsumer(self._kfk, self._group, self._topic, buffer_size = 4096*4, max_buffer_size=4096*32)
                    #except:
                except Exception as ex:
                    template = "Consumer Failure {0} occured. Arguments:\n{1!r}"
                    messag = template.format(type(ex).__name__, ex.args)
                    self._logger.error("Error: %s trace %s" % \
                        (messag, traceback.format_exc()))
                    raise RuntimeError(messag)

                self._logger.error("Starting %s" % self._topic)

                # Find the offset of the last message that has been queued
                consumer.seek(-1,2)
                try:
                    mi = consumer.get_message(timeout=0.1)
                    consumer.commit()
                except common.OffsetOutOfRangeError:
                    mi = None
                #import pdb; pdb.set_trace()
                self._logger.info("Last Queued for %s is %s" % \
                                  (self._topic,str(mi)))

                # start reading from last previously processed message
                if mi != None:
                    consumer.seek(-1,1)
                else:
                    consumer.seek(0,0)

                if self._limit:
                    raise gevent.GreenletExit

                while True:
                    try:
                        mlist = consumer.get_messages(10,timeout=0.5)
                        if not self.msg_handler(mlist):
                            raise gevent.GreenletExit
                        consumer.commit()
                        pcount += len(mlist) 
                    except TypeError as ex:
                        self._logger.error("Type Error: %s trace %s" % \
                                (str(ex.args), traceback.format_exc()))
                        gevent.sleep(0.1)
                    except common.FailedPayloadsError as ex:
                        self._logger.error("Payload Error: %s" %  str(ex.args))
                        gevent.sleep(0.1)
            except gevent.GreenletExit:
                break
            except AssertionError as ex:
                self._partoffset = ex
                break
            except Exception as ex:
                template = "An exception of type {0} occured. Arguments:\n{1!r}"
                messag = template.format(type(ex).__name__, ex.args)
                self._logger.error("%s : traceback %s" % \
                                  (messag, traceback.format_exc()))
                self.stop_partition()
                pause = True

        self._logger.error("Stopping %s pcount %d" % (self._topic, pcount))
        partdb = self.stop_partition()
        return self._partoffset, partdb
                        ph.start()
                        workers[int(mm.key)] = ph
                elif mm.value == "stop":
                    #import pdb; pdb.set_trace()
                    if workers.has_key(int(mm.key)):
                        ph = workers[int(mm.key)]
                        gevent.kill(ph)
                        res,db = ph.get()
                        print "Returned " + str(res)
                        print "State :"
                        for k,v in db.iteritems():
                            print "%s -> %s" % (k,str(v)) 
                        del workers[int(mm.key)]
                else:
                    end_ready = True
                    cons.commit()
		    gevent.sleep(2)
                    break
	except TypeError:
	    gevent.sleep(0.1)
	except common.FailedPayloadsError as ex:
	    print "Payload Error: " + str(ex.args)
	    gevent.sleep(0.1)
    lw=[]
    for key, value in workers.iteritems():
        gevent.kill(value)
        lw.append(value)

    gevent.joinall(lw)
    print "Ending Consumers"
예제 #23
0
from __future__ import absolute_import, print_function#, unicode_literals

import itertools
from streamparse.spout import Spout

import base64
import sys



from kafka import KafkaClient, SimpleProducer, SimpleConsumer
#from kafka.client import KafkaClient
#from kafka.consumer import SimpleConsumer


kafka = KafkaClient("cloud.soumet.com:9092")
kafka_consumer = SimpleConsumer(kafka, "storm", "realtime", max_buffer_size=1310720000)#, max_buffer_size=1310720000)
		
for message in kafka_consumer.get_messages(count=5000, block=False):#, block=True, timeout=4):
	print(message.message.value)

kafka_consumer.commit()
예제 #24
0
class KafkaSimpleConsumer(object):
    """ Base class for consuming from kafka.
    Implement the logic to connect to kafka and consume messages.
    KafkaSimpleConsumer is a wrapper around kafka-python SimpleConsumer.
    KafkaSimpleConsumer relies on it in order to consume messages from kafka.
    KafkaSimpleConsumer does not catch exceptions raised by kafka-python.

    An instance of this class can be used as iterator
    to consume messages from kafka.

    .. warning:: This class is considered deprecated in favor of
                 K:py:class:`yelp_kafka.consumer_group.KafkaConsumerGroup`.

    :param topic: topic to consume from.
    :type topic: string.
    :param config: consumer configuration.
    :type config: dict.
    :param partitions: topic partitions to consumer from.
    :type partitions: list.
    """
    def __init__(self, topic, config, partitions=None):
        self.log = logging.getLogger(self.__class__.__name__)
        if not isinstance(topic, six.string_types):
            raise TypeError("Topic must be a string")
        self.topic = kafka_bytestring(topic)
        if partitions and not isinstance(partitions, list):
            raise TypeError("Partitions must be a list")
        self.partitions = partitions
        self.kafka_consumer = None
        self.config = config

    def connect(self):
        """ Connect to kafka and create a consumer.
        It uses config parameters to create a kafka-python
        KafkaClient and SimpleConsumer.
        """
        # Instantiate a kafka client connected to kafka.
        self.client = KafkaClient(self.config.broker_list,
                                  client_id=self.config.client_id)

        # Create a kafka SimpleConsumer.
        self.kafka_consumer = SimpleConsumer(
            client=self.client,
            topic=self.topic,
            partitions=self.partitions,
            **self.config.get_simple_consumer_args())
        self.log.debug(
            "Connected to kafka. Topic %s, partitions %s, %s", self.topic,
            self.partitions, ','.join([
                '{0} {1}'.format(k, v) for k, v in six.iteritems(
                    self.config.get_simple_consumer_args())
            ]))
        self.kafka_consumer.provide_partition_info()

    def __iter__(self):
        for partition, kafka_message in self.kafka_consumer:
            yield Message(
                partition=partition,
                offset=kafka_message[0],
                key=kafka_message[1].key,
                value=kafka_message[1].value,
            )

    def __enter__(self):
        self.connect()

    def __exit__(self, type, value, tb):
        self.close()

    def close(self):
        """Disconnect from kafka.
        If auto_commit is enabled commit offsets before disconnecting.
        """
        if self.kafka_consumer.auto_commit is True:
            try:
                self.commit()
            except:
                self.log.exception("Commit error. "
                                   "Offsets may not have been committed")
        # Close all the connections to kafka brokers. KafkaClient open
        # connections to all the partition leaders.
        self.client.close()

    def get_message(self, block=True, timeout=0.1):
        """Get message from kafka. It supports the same arguments of get_message
        in kafka-python SimpleConsumer.

        :param block: If True, the API will block till at least a message is fetched.
        :type block: boolean
        :param timeout: If block is True, the function will block for the specified
                        time (in seconds).
                        If None, it will block forever.

        :returns: a Kafka message
        :rtype: Message namedtuple, which consists of: partition number,
                offset, key, and message value
        """
        fetched_message = self.kafka_consumer.get_message(block, timeout)
        if fetched_message is None:
            # get message timed out returns None
            return None
        else:
            partition, kafka_message = fetched_message
            return Message(
                partition=partition,
                offset=kafka_message[0],
                key=kafka_message[1].key,
                value=kafka_message[1].value,
            )

    def commit(self, partitions=None):
        """Commit offset for this consumer group
        :param partitions: list of partitions to commit, default commits to all
        partitions.
        :return: True on success, False on failure.
        """
        if partitions:
            return self.kafka_consumer.commit(partitions)
        else:
            return self.kafka_consumer.commit()

    def commit_message(self, message):
        """Commit the message offset for this consumer group. This function does not
        take care of the consumer offset tracking. It should only be used if
        auto_commit is disabled and the commit function never called.

        .. note:: all the messages received before message itself will be committed
                  as consequence.

        :param message: message to commit.
        :type message: Message namedtuple, which consists of: partition number,
                       offset, key, and message value
        :return: True on success, False on failure.
        """
        reqs = [
            OffsetCommitRequest(
                self.topic,
                message.partition,
                message.offset,
                None,
            )
        ]

        try:
            if self.config.offset_storage in [None, 'zookeeper', 'dual']:
                self.client.send_offset_commit_request(self.config.group_id,
                                                       reqs)
            if self.config.offset_storage in ['kafka', 'dual']:
                self.client.send_offset_commit_request_kafka(
                    self.config.group_id, reqs)
        except KafkaError as e:
            self.log.error("%s saving offsets: %s", e.__class__.__name__, e)
            return False
        else:
            return True
    def _run(self):
	pcount = 0
        pause = False
        while True:
            try:
                if pause:
                    gevent.sleep(2)
                    pause = False
                self._logger.error("New KafkaClient %s" % self._topic)
                self._kfk = KafkaClient(self._brokers , "kc-" + self._topic)
		self._failed = False
                try:
                    consumer = SimpleConsumer(self._kfk, self._group, self._topic, buffer_size = 4096*4, max_buffer_size=4096*32)
                    #except:
                except Exception as ex:
                    template = "Consumer Failure {0} occured. Arguments:\n{1!r}"
                    messag = template.format(type(ex).__name__, ex.args)
                    self._logger.error("Error: %s trace %s" % \
                        (messag, traceback.format_exc()))
		    self._failed = True
                    raise RuntimeError(messag)

                self._logger.error("Starting %s" % self._topic)

                # Find the offset of the last message that has been queued
                consumer.seek(-1,2)
                try:
                    mi = consumer.get_message(timeout=0.1)
                    consumer.commit()
                except common.OffsetOutOfRangeError:
                    mi = None
                #import pdb; pdb.set_trace()
                self._logger.info("Last Queued for %s is %s" % \
                                  (self._topic,str(mi)))

                # start reading from last previously processed message
                if mi != None:
                    consumer.seek(-1,1)
                else:
                    consumer.seek(0,0)

                if self._limit:
                    raise gevent.GreenletExit

                while True:
                    try:
                        mlist = consumer.get_messages(10,timeout=0.5)
                        if not self.msg_handler(mlist):
                            raise gevent.GreenletExit
                        consumer.commit()
                        pcount += len(mlist) 
                    except TypeError as ex:
                        self._logger.error("Type Error: %s trace %s" % \
                                (str(ex.args), traceback.format_exc()))
                        gevent.sleep(0.1)
                    except common.FailedPayloadsError as ex:
                        self._logger.error("Payload Error: %s" %  str(ex.args))
                        gevent.sleep(0.1)
            except gevent.GreenletExit:
                break
            except AssertionError as ex:
                self._partoffset = ex
                break
            except Exception as ex:
                template = "An exception of type {0} occured. Arguments:\n{1!r}"
                messag = template.format(type(ex).__name__, ex.args)
                self._logger.error("%s : traceback %s" % \
                                  (messag, traceback.format_exc()))
                self.stop_partition()
		self._failed = True
                pause = True

        self._logger.error("Stopping %s pcount %d" % (self._topic, pcount))
        partdb = self.stop_partition()
        return self._partoffset, partdb
    def _run(self):
	pcount = 0
        while True:
            try:
                self._logger.info("New KafkaClient %d" % self._partition)
                kafka = KafkaClient(self._brokers ,str(os.getpid()))
                try:
                    consumer = SimpleConsumer(kafka, self._group, self._topic, buffer_size = 4096*4, max_buffer_size=4096*32)
                    #except:
                except Exception as ex:
                    template = "Consumer Failure {0} occured. Arguments:\n{1!r}"
                    messag = template.format(type(ex).__name__, ex.args)
                    self._logger.info("%s" % messag)
                    raise RuntimeError(messag)

                self._logger.info("Starting %d" % self._partition)

                # Find the offset of the last message that has been queued
                consumer.seek(0,2)
                try:
                    mi = consumer.get_message(timeout=0.1)
                    consumer.commit()
                except common.OffsetOutOfRangeError:
                    mi = None
                #import pdb; pdb.set_trace()
                self._logger.info("Last Queued for %d is %s" % \
                                  (self._partition,str(mi)))
                self.start_partition()

                # start reading from last previously processed message
                consumer.seek(0,1)

                if self._limit:
                    raise gevent.GreenletExit

                while True:
                    try:
                        mm = consumer.get_message(timeout=None)
                        if mm is None:
                            continue
                        self._logger.debug("%d Reading offset %d" % (self._partition, mm.offset))
                        consumer.commit()
                        pcount += 1
		        if not self.msg_handler(mm):
                            self._logger.info("%d could not handle %s" % (self._partition, str(mm)))
                            raise gevent.GreenletExit
                    except TypeError:
                        gevent.sleep(0.1)
                    except common.FailedPayloadsError as ex:
                        self._logger.info("Payload Error: %s" %  str(ex.args))
                        gevent.sleep(0.1)
            except gevent.GreenletExit:
                break
            except Exception as ex:
                template = "An exception of type {0} occured. Arguments:\n{1!r}"
                messag = template.format(type(ex).__name__, ex.args)
                self._logger.info("%s : traceback %s" % \
                                  (messag, traceback.format_exc()))
                self.stop_partition()
                gevent.sleep(2)
        self._logger.info("Stopping %d pcount %d" % (self._partition, pcount))
        return self._partoffset, self._partdb
    def _run(self):
	pcount = 0
        while True:
            try:
                self._logger.error("New KafkaClient %d" % self._partition)
                self._kfk = KafkaClient(self._brokers ,str(os.getpid()))
                try:
                    consumer = SimpleConsumer(self._kfk, self._group, self._topic, buffer_size = 4096*4, max_buffer_size=4096*32)
                    #except:
                except Exception as ex:
                    template = "Consumer Failure {0} occured. Arguments:\n{1!r}"
                    messag = template.format(type(ex).__name__, ex.args)
                    self._logger.info("%s" % messag)
                    raise RuntimeError(messag)

                self._logger.error("Starting %d" % self._partition)

                # Find the offset of the last message that has been queued
                consumer.seek(0,2)
                try:
                    mi = consumer.get_message(timeout=0.1)
                    consumer.commit()
                except common.OffsetOutOfRangeError:
                    mi = None
                #import pdb; pdb.set_trace()
                self._logger.info("Last Queued for %d is %s" % \
                                  (self._partition,str(mi)))

                # start reading from last previously processed message
                if mi != None:
                    consumer.seek(0,1)
                else:
                    consumer.seek(0,0)

                if self._limit:
                    raise gevent.GreenletExit

                while True:
                    try:
                        self.resource_check()
                        mlist = consumer.get_messages(10,timeout=0.2)
                        for mm in mlist:
                            if mm is None:
                                continue
                            self._logger.debug("%d Reading offset %d" % \
                                    (self._partition, mm.offset))
                            consumer.commit()
                            pcount += 1
                            if not self.msg_handler(mm):
                                self._logger.info("%d could not handle %s" % (self._partition, str(mm)))
                                raise gevent.GreenletExit
                    except TypeError as ex:
                        self._logger.error("Type Error: %s trace %s" % \
                                (str(ex.args), traceback.format_exc()))
                        gevent.sleep(0.1)
                    except common.FailedPayloadsError as ex:
                        self._logger.error("Payload Error: %s" %  str(ex.args))
                        gevent.sleep(0.1)
            except gevent.GreenletExit:
                break
            except AssertionError as ex:
                self._partoffset = ex
                break
            except Exception as ex:
                template = "An exception of type {0} occured. Arguments:\n{1!r}"
                messag = template.format(type(ex).__name__, ex.args)
                self._logger.error("%s : traceback %s" % \
                                  (messag, traceback.format_exc()))
                self.stop_partition()
                gevent.sleep(2)

        partdb = {}
        for coll in self._uvedb.keys():
            partdb[coll] = {}
            for gen in self._uvedb[coll].keys():
                partdb[coll][gen] = {}
                for tab in self._uvedb[coll][gen].keys():
                    for rkey in self._uvedb[coll][gen][tab].keys():
                        uk = tab + ":" + rkey
                        partdb[coll][gen][uk] = \
                            set(self._uvedb[coll][gen][tab][rkey].keys())

        self._logger.error("Stopping %d pcount %d" % (self._partition, pcount))
        self.stop_partition()
        return self._partoffset, partdb
예제 #28
0
# stdlib
from collections import defaultdict

# 3p
from kafka import SimpleClient, SimpleConsumer

kafka_conn = SimpleClient("192.168.208.2:9092")
consumer = SimpleConsumer(kafka_conn,
                          "sample_check",
                          "test-topic",
                          auto_commit=True)

for message in consumer.get_messages(count=10):
    print message.offset
    consumer.commit()
예제 #29
0
class AbstractPersister(threading.Thread):
    def __init__(self, kafka_conf, influxdb_conf):

        super(AbstractPersister, self).__init__()

        kafka = KafkaClient(kafka_conf.uri)
        self._consumer = SimpleConsumer(
            kafka,
            kafka_conf.group_id,
            kafka_conf.topic,
            # Set to true even though we actually do
            # the commits manually. Needed to
            # initialize
            # offsets correctly.
            auto_commit=True,
            # Make these values None so that the
            # manual commit will do the actual
            # commit.
            # Needed so that offsets are initialized
            # correctly. If not done, then restarts
            # will reread messages from beginning of
            # the queue.
            auto_commit_every_n=None,
            auto_commit_every_t=None,
            iter_timeout=1)

        self._influxdb_client = InfluxDBClient(influxdb_conf.ip_address,
                                               influxdb_conf.port,
                                               influxdb_conf.user,
                                               influxdb_conf.password,
                                               influxdb_conf.database_name)

        self._max_wait_time_secs = kafka_conf.max_wait_time_seconds
        self._batch_size = kafka_conf.batch_size
        self._kafka_topic = kafka_conf.topic

        self._json_body = []
        self._last_flush = datetime.now()

    @abc.abstractmethod
    def process_message(self, message):
        pass

    def _flush(self):

        if self._json_body:
            self._influxdb_client.write_points(self._json_body)
            self._consumer.commit()
            LOG.info("processed {} messages from topic '{}'".format(
                len(self._json_body), self._kafka_topic))
            self._json_body = []
        self._last_flush = datetime.now()

    def run(self):

        try:

            while True:

                delta_time = datetime.now() - self._last_flush
                if delta_time.seconds > self._max_wait_time_secs:
                    self._flush()

                for message in self._consumer:
                    try:
                        self._json_body.append(self.process_message(message))
                    except Exception:
                        LOG.exception('Error processing message. Message is '
                                      'being dropped. {}'.format(message))
                    if len(self._json_body) >= self._batch_size:
                        self._flush()

        except:
            LOG.exception(
                'Persister encountered fatal exception processing messages. '
                'Shutting down all threads and exiting')
            os._exit(1)
예제 #30
0
class kafka:
    def __init__(self, host, port, table_name, **args):
        """
        :param host
        :param port
        :param table_name
        :return: kafka
        """
        self.queue = []
        self.queue_name = table_name.replace(":", "_")
        # self.kafka = KafkaClient('%s:%d' % (host, port))
        self.kafka = KafkaClient(hosts=host, client_id=self.queue_name)
        self.producer = SimpleProducer(
            self.kafka,
            async=args['async'] if args.has_key('async') else False)
        self.producer.client.ensure_topic_exists(self.queue_name)
        self.consumer = SimpleConsumer(self.kafka,
                                       self.queue_name + "_consumer",
                                       self.queue_name,
                                       auto_commit_every_n=1,
                                       max_buffer_size=None)

        print 'success init kafka connection'

    def __del__(self):
        if self.kafka:
            [self.save(x.message.value) for x in self.queue]
            self.kafka.close()

    def save(self, data, **args):
        try:
            if isinstance(data, dict) or isinstance(data, list):
                self.producer.send_messages(self.queue_name, json.dumps(data))
            elif isinstance(data, unicode):
                self.producer.send_messages(self.queue_name,
                                            data.encode('utf-8'))
            else:
                self.producer.send_messages(self.queue_name, data)
        except Exception as e:
            print e
            time.sleep(60)

    def get(self, **args):
        #         self.consumer.seek(369600, 0)
        if not self.queue:
            try:
                self.consumer._fetch()
            except Exception as e:
                print e
            kq = self.consumer.queue
            while not kq.empty():
                partition, result = kq.get_nowait()
                self.queue.append(result)
                self.consumer.offsets[partition] += 1
                self.consumer.count_since_commit += 1

            self.consumer.queue = Queue()
            self.consumer.commit()

        if self.queue:
            return self.queue.pop().message.value
        else:
            return None

    def size(self, **args):
        count = 0
        for k, v in self.consumer.offsets.items():
            reqs = [common.OffsetRequest(self.queue_name, k, -1, 1)]
            (resp, ) = self.consumer.client.send_offset_request(reqs)
            count += (resp.offsets[0] - v)
        return count + len(self.queue)

    # 切换队列
    def select_queue(self, name):
        self.queue_name = name.replace(":", "_")
        self.consumer = SimpleConsumer(self.kafka,
                                       self.queue_name + "_consumer",
                                       self.queue_name,
                                       max_buffer_size=None)
예제 #31
0
class AbstractPersister(threading.Thread):
    def __init__(self, kafka_conf, influxdb_conf):

        super(AbstractPersister, self).__init__()

        kafka = KafkaClient(kafka_conf.uri)
        self._consumer = SimpleConsumer(kafka,
                                        kafka_conf.group_id,
                                        kafka_conf.topic,
                                        # Set to true even though we actually do
                                        # the commits manually. Needed to
                                        # initialize
                                        # offsets correctly.
                                        auto_commit=True,
                                        # Make these values None so that the
                                        # manual commit will do the actual
                                        # commit.
                                        # Needed so that offsets are initialized
                                        # correctly. If not done, then restarts
                                        # will reread messages from beginning of
                                        # the queue.
                                        auto_commit_every_n=None,
                                        auto_commit_every_t=None,
                                        iter_timeout=1)

        self._influxdb_client = InfluxDBClient(influxdb_conf.ip_address,
                                               influxdb_conf.port,
                                               influxdb_conf.user,
                                               influxdb_conf.password,
                                               influxdb_conf.database_name)

        self._max_wait_time_secs = kafka_conf.max_wait_time_seconds
        self._batch_size = kafka_conf.batch_size
        self._kafka_topic = kafka_conf.topic

        self._json_body = []
        self._last_flush = datetime.now()

    @abc.abstractmethod
    def process_message(self, message):
        pass

    def _flush(self):

        if self._json_body:
            self._influxdb_client.write_points(self._json_body)
            self._consumer.commit()
            LOG.info("processed {} messages from topic '{}'".format(
                len(self._json_body), self._kafka_topic))
            self._json_body = []
        self._last_flush = datetime.now()

    def run(self):

        try:

            while True:

                delta_time = datetime.now() - self._last_flush
                if delta_time.seconds > self._max_wait_time_secs:
                    self._flush()

                for message in self._consumer:
                    try:
                        self._json_body.append(self.process_message(message))
                    except Exception:
                        LOG.exception('Error processing message. Message is '
                                      'being dropped. {}'.format(message))
                    if len(self._json_body) >= self._batch_size:
                        self._flush()

        except:
            LOG.exception(
                'Persister encountered fatal exception processing messages. '
                'Shutting down all threads and exiting')
            os._exit(1)