def __init__(self, kafkaHost=None, kafkaGroup=None, kafkaTopic=None, consumerType=NON_CONSUMER, consumerPartitions=[], producerType=NON_PRODUCER, producerPartitions=[]): self.kafkaHost = kafkaHost self.kafkaGroup = kafkaGroup self.kafkaTopic = kafkaTopic self.consumerPartitions = consumerPartitions self.producerPartitions = producerPartitions self.connect(kafkaHost) try: if producerType == self.SIMPLE_PRODUCER: self.producer = SimpleProducer(self.kafkaClient, async=False, req_acks=KeyedProducer.ACK_NOT_REQUIRED) elif producerType == self.FIXED_PRODUCER: self.producer = FixedProducer(self.kafkaClient, producerPartitions[0], async=False, req_acks=KeyedProducer.ACK_NOT_REQUIRED) elif producerType == self.USER_PRODUCER: self.producer = UserProducer(self.kafkaClient, async=False, req_acks=KeyedProducer.ACK_NOT_REQUIRED) elif producerType == self.NON_PRODUCER: self.producer = None else: raise Exception("wrong producer type {}".format(producerType)) if consumerType == self.SIMPLE_CONSUMER: if not consumerPartitions: self.consumer = SimpleConsumer(self.kafkaClient, self.kafkaGroup, self.kafkaTopic) else: self.consumer = SimpleConsumer(self.kafkaClient, self.kafkaGroup, self.kafkaTopic, partitions=self.consumerPartitions) logger.debug('consumer is listening on {}@{}'.format(self.kafkaTopic, self.consumerPartitions)) elif consumerType == self.NON_CONSUMER: self.consumer = None else: raise Exception("wrong consumer type {}".format(consumerType)) except Exception as e: logger.warning('Exception {}'.format(e)) logger.debug(traceback.format_exc()) self.consumer = None self.producer = None self.kafkaClient = None
def assert_kafka(self, expected_file_name): #print("reading server "+config.KAFKA_SERVER+" on topic:"+config.KAFKA_TOPIC) kafka_client = KafkaClient(config.KAFKA_SERVER) #simpleconsumer takes its timeout in seconds... hence 1, allowing all messages to appear but not hanging too long consumer = SimpleConsumer(kafka_client, b"my_group", config.KAFKA_TOPIC.encode("utf8"), iter_timeout=1) #seek(1,0) means to start processing from the begining (the 0) but skip 1 message from this index (the first msg) #we bypass the first message since it is just used to autostart the topic consumer.seek(1, 0) actual = "" for msg in consumer: #the linefeed at the end is not really needed but it makes for more readable error reports actual += msg.message.value.decode('utf8')+"\n" expected = pkg_resources.resource_string(__name__, expected_file_name).decode('utf8') t_assert.equal(actual, expected)
def set_consumer_partition(self, consumerPartitions): if not consumerPartitions: logger.warning('consumer partitions can not be empty') return if self.consumer: self.consumer.commit() self.consumer.stop() self.consumer = None self.consumerPartitions = consumerPartitions try: self.consumer = SimpleConsumer(self.kafkaClient, self.kafkaGroup, self.kafkaTopic, partitions=self.consumerPartitions) except KafkaError as e: logger.warning('Exception {}'.format(e)) logger.debug(traceback.format_exc()) self.reconnect() except Exception as e: logger.warning('Exception {}'.format(e)) logger.debug(traceback.format_exc())
def test_handler(): topic = '%s-mutations' % (uuid.uuid1().hex,) codec = BinaryCodec(Message) client = KafkaClient('kafka') producer = SimpleProducer(client) writer = KafkaWriter(producer, topic, codec) inputs = list(transaction) writer.push(inputs) consumer = SimpleConsumer(client, 'test', topic, auto_offset_reset='smallest') outputs = map( codec.decode, map( operator.attrgetter('message.value'), list(consumer.get_messages(count=3)), ), ) assert outputs == inputs
def _mp_consume(client, group, topic, message_queue, size, events, **consumer_options): """ A child process worker which consumes messages based on the notifications given by the controller process NOTE: Ideally, this should have been a method inside the Consumer class. However, multiprocessing module has issues in windows. The functionality breaks unless this function is kept outside of a class """ # Initial interval for retries in seconds. interval = 1 while not events.exit.is_set(): try: # Make the child processes open separate socket connections client.reinit() # We will start consumers without auto-commit. Auto-commit will be # done by the master controller process. consumer = SimpleConsumer(client, group, topic, auto_commit=False, auto_commit_every_n=None, auto_commit_every_t=None, **consumer_options) # Ensure that the consumer provides the partition information consumer.provide_partition_info() while True: # Wait till the controller indicates us to start consumption events.start.wait() # If we are asked to quit, do so if events.exit.is_set(): break # Consume messages and add them to the queue. If the controller # indicates a specific number of messages, follow that advice count = 0 message = consumer.get_message() if message: while True: try: message_queue.put( message, timeout=FULL_QUEUE_WAIT_TIME_SECONDS) break except queue.Full: if events.exit.is_set(): break count += 1 # We have reached the required size. The controller might have # more than what he needs. Wait for a while. # Without this logic, it is possible that we run into a big # loop consuming all available messages before the controller # can reset the 'start' event if count == size.value: events.pause.wait() else: # In case we did not receive any message, give up the CPU for # a while before we try again time.sleep(NO_MESSAGES_WAIT_TIME_SECONDS) consumer.stop() except KafkaError as e: # Retry with exponential backoff log.error( "Problem communicating with Kafka (%s), retrying in %d seconds..." % (e, interval)) time.sleep(interval) interval = interval * 2 if interval * 2 < MAX_BACKOFF_SECONDS else MAX_BACKOFF_SECONDS