def _consume_messages(cls, host, port): kafka = KafkaClient(cls.server.host + ":" + str(cls.server.port)) consumer = MultiProcessConsumer(kafka, None, cls.beaver_config.get('kafka_topic'), num_procs=5) return consumer.get_messages(count=100, block=True, timeout=5)
def cluster(self): rc = 1 # Test cluster as a whole self._client = DevopsSimpleClient(self._broker) # Use multiprocessing for parallel consumers from kafka import MultiProcessConsumer # This will split the number of partitions among two processes consumer = MultiProcessConsumer(self._client, "devops-group", "devopstest1", num_procs=2) # This will spawn processes such that each handles 2 partitions max consumer = MultiProcessConsumer(self._client, "devops-group", "devopstest1", partitions_per_proc=2) for message in consumer: if self._pattern.match(message.message.value): if self._debug: print(message.message.value) rc = 0 for message in consumer.get_messages(count=5, block=True, timeout=4): if self._pattern.match(message.message.value): if self._debug: print(message.message.value) rc = 0 else: rc = 2 self._client.close() return (rc, None)
def test_multi_proc_pending(self): self.send_messages(0, range(0, 10)) self.send_messages(1, range(10, 20)) # set group to None and auto_commit to False to avoid interactions w/ # offset commit/fetch apis consumer = MultiProcessConsumer(self.client, None, self.topic, auto_commit=False, iter_timeout=0) self.assertEqual(consumer.pending(), 20) self.assertEqual(consumer.pending(partitions=[0]), 10) self.assertEqual(consumer.pending(partitions=[1]), 10) consumer.stop()
def test_partition_list(self): client = MagicMock() partitions = (0,) with patch.object(MultiProcessConsumer, 'fetch_last_known_offsets') as fetch_last_known_offsets: MultiProcessConsumer(client, 'testing-group', 'testing-topic', partitions=partitions) self.assertEqual(fetch_last_known_offsets.call_args[0], (partitions,) ) self.assertEqual(client.get_partition_ids_for_topic.call_count, 0) # pylint: disable=no-member
def multiprocess_consumer(): '''multiprocess consumer''' from kafka import KafkaClient, MultiProcessConsumer kafka = KafkaClient(KAFKA_SERVER) # This will split the number of partitions among two processes consumer = MultiProcessConsumer(kafka, b'my-group', b'topic1', num_procs=2) # This will spawn processes such that each handles 2 partitions max consumer = MultiProcessConsumer(kafka, b'my-group', b'topic1', partitions_per_proc=2) for message in consumer: print message for message in consumer.get_messages(count=5, block=True, timeout=4): print message
def __init__(self, client, topic, mode, num_procs=1, group=None): super(GeoWatchChannelKafka, self).__init__(client, topic, mode, num_procs=num_procs) self.group = group if mode == "duplex" or mode == "consumer": self._kafka_consumer = MultiProcessConsumer( self._client._client, self.group, self._client.topic_prefix + self.topic, num_procs=self.num_procs) if mode == "duplex" or mode == "producer": self._kafka_producer = SimpleProducer(self._client._client)
def test_multi_proc_pending(self): self.send_messages(0, range(0, 10)) self.send_messages(1, range(10, 20)) consumer = MultiProcessConsumer(self.client, "group1", self.topic, auto_commit=False) self.assertEqual(consumer.pending(), 20) self.assertEqual(consumer.pending(partitions=[0]), 10) self.assertEqual(consumer.pending(partitions=[1]), 10) consumer.stop()
def __init__(self, options): hosts = options.kafka_hosts consumer_group = options.consumer_group topic = options.topic consumer_type = options.consumer_type ipmap_file = ipmap_path if options.enable_kafka_ipmap else None self.client = KafkaClient(hosts, ip_mapping_file = ipmap_file) logger.info("kafka hosts: %s" % hosts) logger.info("consumer group: %s" % consumer_group) logger.info("topic: %s" % topic) logger.info("consumer type: %s" % consumer_type) logger.info("ip mapping file: %s" % ipmap_file) if consumer_type == "multiprocess": partitions_per_proc = options.partitions_per_proc partition_num = len(self.client.topic_partitions[topic]) num_procs = partition_num * partitions_per_proc auto_commit_every_n = options.auto_commit_msg_count auto_commit_every_t = options.auto_commit_interval self.consumer = MultiProcessConsumer( self.client, consumer_group, topic, auto_commit_every_n, auto_commit_every_t, num_procs, partitions_per_proc) elif consumer_type == "simple": self.consumer = SimpleConsumer( self.client, consumer_group, topic, False) else: raise Exception("unsuppported consumer type: %s" % typestr)
#!/usr/bin/env python from kafka import KafkaConsumer, KafkaClient, MultiProcessConsumer kafka = KafkaClient("hadoop-m.c.onefold-1.internal:6667") # To consume messages consumer = MultiProcessConsumer(kafka, "my-group", "truckevent2", num_procs=1) # consumer = KafkaConsumer("truckevent2", # group_id="my_group", # bootstrap_servers=["130.211.146.208:6667"]) for message in consumer: # message value is raw byte string -- decode if necessary! # e.g., for unicode: `message.value.decode('utf-8')` print("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition, message.offset, message.key, message.value)) kafka.close()
#!/usr/bin/env python ####Takes two arguments topic n #### n is the expected number of messages after which the app will stop reading ####Read the messages from the topic and print each message on a new line import uuid import sys from kafka import KafkaClient, MultiProcessConsumer topic=sys.argv[1] n=int(sys.argv[2]) kafka = KafkaClient("192.168.4.40:9092") consumer = MultiProcessConsumer(kafka, str(uuid.uuid4()), topic) for msg in consumer.get_messages(count=n, block=True, timeout=60000): print(msg) kafka.close()
#!/usr/bin/env python ####Takes two arguments topic n #### n is the expected number of messages after which the app will stop reading ####Read the messages from the topic and print each message on a new line import uuid import sys from kafka import KafkaClient, MultiProcessConsumer topic = sys.argv[1] n = int(sys.argv[2]) kafka = KafkaClient("192.168.4.40:9092") consumer = MultiProcessConsumer(kafka, str(uuid.uuid4()), topic) for msg in consumer.get_messages(count=n, block=True, timeout=60000): print(msg) kafka.close()
from kafka import SimpleProducer, SimpleClient, SimpleConsumer # To consume messages client = SimpleClient('localhost:9092') consumer = SimpleConsumer(client, "my-group", "my-topic") for message in consumer: # message is raw byte string -- decode if necessary! # e.g., for unicode: `message.decode('utf-8')` print(message) # Use multiprocessing for parallel consumers from kafka import MultiProcessConsumer # This will split the number of partitions among two processes consumer = MultiProcessConsumer(client, "my-group", "my-topic", num_procs=2) # This will spawn processes such that each handles 2 partitions max consumer = MultiProcessConsumer(client, "my-group", "my-topic", partitions_per_proc=2) for message in consumer: print(message) for message in consumer.get_messages(count=5, block=True, timeout=4): print(message) client.close()
from kafka import KafkaClient, MultiProcessConsumer kafka = KafkaClient('localhost:9092') # This will split the number of partitions among two processes consumer = MultiProcessConsumer(kafka, b'my-group', b'my-topic', num_procs=2) # This will spawn processes such that each handles 2 partitions max consumer = MultiProcessConsumer(kafka, b'my-group', b'my-topic', partitions_per_proc=2) for message in consumer: print(message) for message in consumer.get_messages(count=5, block=True, timeout=4): print(message)