Esempio n. 1
0
    def __init__(self, *args, **kwargs):
        import kafka
        super(KafkaRandomReader, self).__init__(*args, **kwargs)
        brokers = self.read_option('brokers')
        group = self.read_option('group')
        topic = self.read_option('topic')

        client = kafka.KafkaClient(map(bytes, brokers))

        # TODO: Remove this comments when next steps are decided.
        # If resume is set to true, then child should not load initial offsets
        # child_loads_initial_offsets = False if settings.get('RESUME') else True

        # self.consumer = kafka.MultiProcessConsumer(client, group, topic, num_procs=1,
        #                                             child_loads_initial_offsets=child_loads_initial_offsets,
        #                                             auto_commit=False)

        self.consumer = kafka.SimpleConsumer(client,
                                             group,
                                             topic,
                                             auto_commit=False)

        self.decompress_fun = zlib.decompress
        self.processor = self.create_processor()
        self.partitions = client.get_partition_ids_for_topic(topic)

        self.logger.info('KafkaRandomReader has been initiated. '
                         'Topic: {}. Group: {}'.format(
                             self.read_option('topic'),
                             self.read_option('group')))

        self.logger.info('Running random sampling')
        self._reservoir = self.fill_reservoir()
        self.logger.info('Random sampling completed, ready to process batches')
Esempio n. 2
0
 def consumer(self, topic):
     try:
         return kafka.SimpleConsumer(self.client, self.group, str(topic))
     except KeyError:
         # topic does not exist, hack in a back off period
         time.sleep(5)
         return []
    def testStart(self):
        self.thread.start()
        time.sleep(15)
        self.producer.stop()

        message = kafka.SimpleConsumer(kafka.KafkaClient(self.url), "group1",
                                       self.topic).get_message()
        assert message is not None
Esempio n. 4
0
 def initialize(self):
     cluster = kafka.KafkaClient(leaders[0])
     self.consumer = kafka.SimpleConsumer(cluster,
                                          "default_group",
                                          "WikiTest",
                                          buffer_size=16384,
                                          max_buffer_size=(10 * 1024 *
                                                           1024))
     self.consumer.seek(0)
     self.counter = 0
Esempio n. 5
0
 def initialize(self):
     cluster = kafka.KafkaClient(leaders[0])
     self.consumer = kafka.SimpleConsumer(cluster,
                                          "default_group",
                                          "WikiTest",
                                          buffer_size=8192,
                                          max_buffer_size=(10 * 1024 *
                                                           1024))
     self.consumer.seek(0)
     self.counter = 0
     log.debug("Starting Kafka Consumer")
Esempio n. 6
0
    def consume_forever(self, logger):
        """ consumer process receiving messages from the brokers """
        # get Kafka connection
        consumer_group = 'default_group'
        self.consumer = kafka.SimpleConsumer(self.con, consumer_group,
                                             self.topic)

        # read from Kafka
        for raw in self.consumer:
            consumed_at = datetime.datetime.now()
            msg = Message.from_string(raw.message.value)

            # log
            logger.log(msg, consumed_at)
Esempio n. 7
0
import io
from datetime import datetime
from collections import Counter

import kafka
import avro.schema
from avro.datafile import DataFileWriter
from avro.io import DatumWriter

kafka_endpoint = "ip-172-31-23-112:9092"
topics = ["test02"]
consumer_group = "test_kafka_consumer"
kafka_client = kafka.KafkaClient(kafka_endpoint)

topic = topics[0]
consumer = kafka.SimpleConsumer(kafka_client, consumer_group, topic)

# reeeeeewiiiiiiind
#consumer.seek(0, 0)


def dump_message(message):
    print "****"
    print(message)
    print "Message length: %s" % (len(message))
    print "* Offset *"
    print message[0]
    # get the value back out of the kafka consumer's fetched message
    print "* Message *"
    print message[1].value
    print len(message[1].value)
Esempio n. 8
0
def get_kafka_consumer(group, topic):
    client = get_kafka_client()
    return kafka.SimpleConsumer(
        client, group, topic,
        iter_timeout=app.config["CONSUMER_TIMEOUT"]
    )