def __init__(self, *args, **kwargs): import kafka super(KafkaRandomReader, self).__init__(*args, **kwargs) brokers = self.read_option('brokers') group = self.read_option('group') topic = self.read_option('topic') client = kafka.KafkaClient(map(bytes, brokers)) # TODO: Remove this comments when next steps are decided. # If resume is set to true, then child should not load initial offsets # child_loads_initial_offsets = False if settings.get('RESUME') else True # self.consumer = kafka.MultiProcessConsumer(client, group, topic, num_procs=1, # child_loads_initial_offsets=child_loads_initial_offsets, # auto_commit=False) self.consumer = kafka.SimpleConsumer(client, group, topic, auto_commit=False) self.decompress_fun = zlib.decompress self.processor = self.create_processor() self.partitions = client.get_partition_ids_for_topic(topic) self.logger.info('KafkaRandomReader has been initiated. ' 'Topic: {}. Group: {}'.format( self.read_option('topic'), self.read_option('group'))) self.logger.info('Running random sampling') self._reservoir = self.fill_reservoir() self.logger.info('Random sampling completed, ready to process batches')
def consumer(self, topic): try: return kafka.SimpleConsumer(self.client, self.group, str(topic)) except KeyError: # topic does not exist, hack in a back off period time.sleep(5) return []
def testStart(self): self.thread.start() time.sleep(15) self.producer.stop() message = kafka.SimpleConsumer(kafka.KafkaClient(self.url), "group1", self.topic).get_message() assert message is not None
def initialize(self): cluster = kafka.KafkaClient(leaders[0]) self.consumer = kafka.SimpleConsumer(cluster, "default_group", "WikiTest", buffer_size=16384, max_buffer_size=(10 * 1024 * 1024)) self.consumer.seek(0) self.counter = 0
def initialize(self): cluster = kafka.KafkaClient(leaders[0]) self.consumer = kafka.SimpleConsumer(cluster, "default_group", "WikiTest", buffer_size=8192, max_buffer_size=(10 * 1024 * 1024)) self.consumer.seek(0) self.counter = 0 log.debug("Starting Kafka Consumer")
def consume_forever(self, logger): """ consumer process receiving messages from the brokers """ # get Kafka connection consumer_group = 'default_group' self.consumer = kafka.SimpleConsumer(self.con, consumer_group, self.topic) # read from Kafka for raw in self.consumer: consumed_at = datetime.datetime.now() msg = Message.from_string(raw.message.value) # log logger.log(msg, consumed_at)
import io from datetime import datetime from collections import Counter import kafka import avro.schema from avro.datafile import DataFileWriter from avro.io import DatumWriter kafka_endpoint = "ip-172-31-23-112:9092" topics = ["test02"] consumer_group = "test_kafka_consumer" kafka_client = kafka.KafkaClient(kafka_endpoint) topic = topics[0] consumer = kafka.SimpleConsumer(kafka_client, consumer_group, topic) # reeeeeewiiiiiiind #consumer.seek(0, 0) def dump_message(message): print "****" print(message) print "Message length: %s" % (len(message)) print "* Offset *" print message[0] # get the value back out of the kafka consumer's fetched message print "* Message *" print message[1].value print len(message[1].value)
def get_kafka_consumer(group, topic): client = get_kafka_client() return kafka.SimpleConsumer( client, group, topic, iter_timeout=app.config["CONSUMER_TIMEOUT"] )