class ItemConsumer(Service): """ kafka consumer. """ name = "item_consumer" def __init__(self): super(ItemConsumer, self).__init__() self.topic_in = self.args.topic_in self.consumer_id = self.args.consumer or self.name with ExceptContext(Exception, errback=lambda *args: self.stop()): self.consumer = KafkaConsumer( self.topic_in, group_id=self.consumer_id, bootstrap_servers=self.settings.get("KAFKA_HOSTS").split(","), enable_auto_commit=False, consumer_timeout_ms=1000) def consume_errback(self, exception): self.logger.error("Consume feature failed: %s. " % str(exception)) def consume_callback(self, value): self.logger.debug("Consume feature success: %s. " % str(value)) @call_later("commit", interval=10, immediately=False) def consume(self, callback=lambda value: value, errback=lambda message: message): with ExceptContext(errback=self.log_err): with ExceptContext((StopIteration, OSError), errback=lambda *args: True): message = self.consumer.__next__() while message: value = message.value self.logger.debug( 'Consume message from %s, message is %s' % (self.topic_in, value)) if isinstance(value, bytes): value = value.decode("utf-8") value = callback(value) if value: return value else: message = errback(message) def commit(self): future = self.consumer.commit_async() future.add_callback(self.consume_callback) future.add_errback(self.consume_errback) def enrich_parser_arguments(self): super(ItemConsumer, self).enrich_parser_arguments() self.parser.add_argument("-ti", "--topic-in", help="Topic in. ") self.parser.add_argument("-c", "--consumer", help="consumer id. ") def stop(self, *args): super(ItemConsumer, self).stop(args) with ExceptContext(Exception, errback=lambda *args: True): if self.consumer: self.consumer.close()
class KafkaAdapter(BaseAdapter): """ Adapter for communicating with Kafka brokers """ def __init__(self, direction, bootstrap_servers, topic, **kwargs): super().__init__(direction) self.topic = topic self.bootstrap_servers = bootstrap_servers self.extra_opts = kwargs if 'sasl_plain_username' in self.extra_opts and 'sasl_plain_password' in self.extra_opts: self.extra_opts['sasl_mechanism'] = 'PLAIN' self.extra_opts['security_protocol'] = 'SASL_SSL' self.url = 'kafka://{}/{}'.format(self.bootstrap_servers[0], self.topic) self._connect() def _connect(self): try: if self.direction == 'input': self._consumer = KafkaConsumer(self.topic, bootstrap_servers=self.bootstrap_servers, **self.extra_opts) else: self._producer = KafkaProducer(bootstrap_servers=self.bootstrap_servers, retries=6, **self.extra_opts) TuberLogger.info('Connected to {}'.format(self.url)) except KafkaError as e: raise TuberIOError('Unable to connect to {}. Reason: {}'.format(self.url, str(e))) from e except SSLError as e: raise TuberException('Error creating adapter {}: {}'.format(self.url, str(e))) from e def _send(self, message): try: record = self._producer.send(self.topic, message.serialize()) record_metadata = record.get(timeout=10) except KafkaError as e: raise TuberIOError('Unable to send message to {}. Reason: {}'.format(self.url, str(e))) from e def receive(self): try: record = self._consumer.__next__() return Message(record.value) except KafkaError as e: raise TuberIOError('Error receiving from {}. Reson: {}'.format(self.url, str(e))) from e
class KafkaInput(AbstractInput): def __init__(self, config): super().__init__(config) self.consumer = None self.topics_mapping = self.get_config('kafka_topics_mapping') self.input_mapper = instance_from_config( self.get_config( 'kafka_input_mapper', default={'class': 'ml4iiot.input.kafka.JsonInputMapper'})) def init(self): super().init() self.consumer = KafkaConsumer(*self.topics_mapping.keys(), bootstrap_servers=[ self.get_config( 'kafka_server', default='localhost:9092') ], enable_auto_commit=True) def next_data_frame(self, batch_size: int = 1) -> DataFrame: pandas_dict = {} record_count = 0 while record_count < batch_size: record = self.consumer.__next__() record_dict = self.input_mapper.from_kafka_record_to_dict(record) for key, value in record_dict.items(): if key == self.index_column: append_value_to_dict_list(pandas_dict, self.index_column, value) else: append_value_to_dict_list( pandas_dict, self.topics_mapping[record.topic], value) record_count = record_count + 1 data_frame = pd.DataFrame.from_dict(pandas_dict) data_frame.drop_duplicates(self.index_column) data_frame.set_index(self.index_column, inplace=True) return data_frame def destroy(self) -> None: super().destroy() self.consumer.close()
class EasyKafkaConsumer: __config_dic = {} logger = EasyKafkaLog.logger() def __init__(self, config_or_yml_path): """ init, require config :param config_or_yml_path: yml path or EasyKafkaConfig """ if isinstance(config_or_yml_path, EasyKafkaConfig): self.__config_dic = config_or_yml_path.__dict__ elif isinstance(config_or_yml_path, str): self.__config_dic = EasyKafkaConfig(config_or_yml_path).__dict__ else: raise TypeError('config_or_yml_path: need str or EasyKafkaConfig') self.consumer = KafkaConsumer( bootstrap_servers=self.__config_dic['bootstrap_servers'], group_id=self.__config_dic['group_id']) self.consumer.subscribe(self.__config_dic['topic_subscribe']) self.logger.info('consumer started[topic: {}, group_id: {}]'.format( self.__config_dic['topic_subscribe'], self.__config_dic['group_id'])) def __iter__(self): return self.consumer def __next__(self): return self.consumer.__next__() def subscribe(self, fn, thread=False): """ subscribe with callback fn(record), blocked default(thread=False) def task(record): ... kafka_consumer.subscribe(task) :param fn: method, require one parameter :param thread: new thread to handle if thread=True """ self.logger.info('consumer task started, mode={}'.format( 'async' if thread else 'blocked')) for record in self: self.logger.info('received topic: {}, msg: {}'.format( record.topic, record.value)) if thread: threading.Thread(target=fn, args=(record, )).start() else: fn(record)