def _setup_fetch_workers(self): brokers = b','.join(b.host + b":" + get_bytes(str(b.port)) for b in self._cluster.brokers.values()) partition_ids = list(self._partitions_by_id.keys()) start_offsets = [ self._partitions_by_id[p].next_offset for p in partition_ids] conf, topic_conf = self._mk_rdkafka_config_lists() self._rdk_consumer = _rd_kafka.Consumer() log.debug("Configuring _rdk_consumer...") self._rdk_consumer.configure(conf=conf) self._rdk_consumer.configure(topic_conf=topic_conf) start_kwargs = {"brokers": brokers, "topic_name": self._topic.name, "partition_ids": partition_ids, "start_offsets": start_offsets} log.debug("Starting _rdk_consumer with {}".format(start_kwargs)) self._rdk_consumer.start(**start_kwargs) # Poll: for a consumer, the main reason to poll the handle is that # this de-queues log messages at error level that might otherwise be # held up in librdkafka def poll(rdk_handle, stop_event): while not stop_event.is_set(): try: rdk_handle.poll(timeout_ms=1000) except RdKafkaStoppedException: break log.debug("Exiting RdKafkaSimpleConsumer poller thread cleanly.") self._stop_poller_thread.clear() self._poller_thread = self._cluster.handler.spawn( poll, args=(self._rdk_consumer, self._stop_poller_thread))
def test_stopped_exception(self): """Check Consumer_consume raises exception if handle was stopped""" consumer = _rd_kafka.Consumer(brokers=get_bytes(self.kafka.brokers), topic_name=self.topic_name, partition_ids=self.partition_ids, start_offsets=self.start_offsets) consumer.stop() with self.assertRaises(RdKafkaStoppedException): consumer.consume(1)
def produce_messages(self, topic_name, messages, batch_size=200): """Produce some messages to a topic.""" binfile = os.path.join(self._bin_dir, 'bin/kafka-console-producer.sh') cmd = [binfile, '--broker-list', self.brokers, '--topic', topic_name, '--batch-size', batch_size] cmd = [get_string(c) for c in cmd] # execv needs only strings log.debug('running: %s', ' '.join(cmd)) proc = subprocess.Popen(cmd, stdin=subprocess.PIPE) proc.communicate(input=get_bytes('\n'.join(messages))) if proc.poll() is None: proc.kill()
def produce_messages(self, topic_name, messages, batch_size=200): """Produce some messages to a topic.""" binfile = os.path.join(self._bin_dir, 'bin/kafka-console-producer.sh') cmd = [ binfile, '--broker-list', self.brokers, '--topic', topic_name, '--batch-size', batch_size ] cmd = [get_string(c) for c in cmd] # execv needs only strings log.debug('running: %s', ' '.join(cmd)) proc = subprocess.Popen(cmd, stdin=subprocess.PIPE) proc.communicate(input=get_bytes('\n'.join(messages))) if proc.poll() is None: proc.kill()
def make_topic(self, topic, partitions=3, replication=1): """Use kafka-topics.sh to create a topic.""" if self.is_topic(topic): self.log.info("{} topic already exists".format(topic)) return 'already_exists' else: self.log.info("Creating topic {}".format(topic)) args = [ '--create', '--topic', get_bytes(topic), '--partitions', partitions, '--replication-factor', replication ] self._run_sh('topics_sh', args) sleep(0.5) if self.is_topic(topic): self.log.info('Topic {} created'.format(topic)) return 'created'
def start(self): if not self._running: brokers = b",".join(b.host + b":" + get_bytes(str(b.port)) for b in self._cluster.brokers.values()) conf, topic_conf = self._mk_rdkafka_config_lists() self._rdk_producer = _rd_kafka.Producer() self._rdk_producer.configure(conf=conf) self._rdk_producer.configure(topic_conf=topic_conf) self._rdk_producer.start(brokers, self._topic.name, self._delivery_reports.put) self._running = True def poll(rdk_handle, stop_event): while not stop_event.is_set() or rdk_handle.outq_len() > 0: rdk_handle.poll(timeout_ms=1000) log.debug("Exiting RdKafkaProducer poller thread cleanly.") self._stop_poller_thread.clear() self._poller_thread = self._cluster.handler.spawn(poll, args=(self._rdk_producer, self._stop_poller_thread))
def test_stop(self): """Check Consumer_stop really shuts down the librdkafka consumer This is to deal with the fact that librdkafka's _destroy functions are all async, and therefore we don't get direct feedback if we didn't clean up in the correct order, yet the underlying consumer may remain up even if the python object is long gone. Getting a zero thread count in the test gives some reassurance that we didn't leave any loose ends. """ with self.assert_thread_cnt_non_increasing(): consumer = _rd_kafka.Consumer() consumer.configure(conf=[]) consumer.configure(topic_conf=[]) consumer.start(brokers=get_bytes(self.kafka.brokers), topic_name=self.topic_name, partition_ids=self.partition_ids, start_offsets=self.start_offsets) consumer.consume(100) # just to reliably get some threads going consumer.stop()
def start(self): if not self._running: brokers = b','.join(b.host + b":" + get_bytes(str(b.port)) for b in self._cluster.brokers.values()) conf, topic_conf = self._mk_rdkafka_config_lists() self._rdk_producer = _rd_kafka.Producer() self._rdk_producer.configure(conf=conf) self._rdk_producer.configure(topic_conf=topic_conf) self._rdk_producer.start(brokers, self._topic.name, self._delivery_reports.put) self._running = True def poll(rdk_handle, stop_event): while not stop_event.is_set() or rdk_handle.outq_len() > 0: rdk_handle.poll(timeout_ms=1000) log.debug("Exiting RdKafkaProducer poller thread cleanly.") self._stop_poller_thread.clear() self._poller_thread = self._cluster.handler.spawn( poll, args=(self._rdk_producer, self._stop_poller_thread))