def __init__(self, configuration: Mapping[str, Any]) -> None: auto_offset_reset = configuration.get("auto.offset.reset", "largest") if auto_offset_reset in {"smallest", "earliest", "beginning"}: self.__resolve_partition_starting_offset = ( self.__resolve_partition_offset_earliest) elif auto_offset_reset in {"largest", "latest", "end"}: self.__resolve_partition_starting_offset = ( self.__resolve_partition_offset_latest) elif auto_offset_reset == "error": self.__resolve_partition_starting_offset = ( self.__resolve_partition_offset_error) else: raise ValueError( "invalid value for 'auto.offset.reset' configuration") # NOTE: Offsets are explicitly managed as part of the assignment # callback, so preemptively resetting offsets is not enabled. self.__consumer = ConfluentConsumer({ **configuration, "auto.offset.reset": "error" }) self.__offsets: MutableMapping[TopicPartition, int] = {} self.__state = KafkaConsumerState.CONSUMING
def __init__( self, configuration: Mapping[str, Any], *, commit_retry_policy: Optional[RetryPolicy] = None, ) -> None: if commit_retry_policy is None: commit_retry_policy = NoRetryPolicy() auto_offset_reset = configuration.get("auto.offset.reset", "largest") if auto_offset_reset in {"smallest", "earliest", "beginning"}: self.__resolve_partition_starting_offset = ( self.__resolve_partition_offset_earliest ) elif auto_offset_reset in {"largest", "latest", "end"}: self.__resolve_partition_starting_offset = ( self.__resolve_partition_offset_latest ) elif auto_offset_reset == "error": self.__resolve_partition_starting_offset = ( self.__resolve_partition_offset_error ) else: raise ValueError("invalid value for 'auto.offset.reset' configuration") if ( as_kafka_configuration_bool(configuration.get("enable.auto.commit", "true")) is not False ): raise ValueError("invalid value for 'enable.auto.commit' configuration") if ( as_kafka_configuration_bool( configuration.get("enable.auto.offset.store", "true") ) is not False ): raise ValueError( "invalid value for 'enable.auto.offset.store' configuration" ) # NOTE: Offsets are explicitly managed as part of the assignment # callback, so preemptively resetting offsets is not enabled. self.__consumer = ConfluentConsumer( {**configuration, "auto.offset.reset": "error"} ) self.__offsets: MutableMapping[Partition, int] = {} self.__staged_offsets: MutableMapping[Partition, int] = {} self.__paused: Set[Partition] = set() self.__commit_retry_policy = commit_retry_policy self.__state = KafkaConsumerState.CONSUMING
def _shedual_task(self): from confluent_kafka import Consumer as ConfluentConsumer # 这个包不好安装,用户用这个中间件的时候自己再想办法安装。 try: admin_client = KafkaAdminClient( bootstrap_servers=frame_config.KAFKA_BOOTSTRAP_SERVERS) admin_client.create_topics([NewTopic(self._queue_name, 10, 1)]) # admin_client.create_partitions({self._queue_name: NewPartitions(total_count=16)}) except TopicAlreadyExistsError: pass self._producer = KafkaProducer( bootstrap_servers=frame_config.KAFKA_BOOTSTRAP_SERVERS) # consumer 配置 https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md self._confluent_consumer = ConfluentConsumer({ 'bootstrap.servers': ','.join(frame_config.KAFKA_BOOTSTRAP_SERVERS), 'group.id': 'frame_group', 'auto.offset.reset': 'earliest', 'enable.auto.commit': False }) self._confluent_consumer.subscribe([self._queue_name]) self._recent_commit_time = time.time() self._partion__offset_consume_status_map = defaultdict(OrderedDict) while 1: msg = self._confluent_consumer.poll(timeout=10) self._manually_commit() if msg is None: continue if msg.error(): print("Consumer error: {}".format(msg.error())) continue # msg的类型 https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#message # value() offset() partition() # print('Received message: {}'.format(msg.value().decode('utf-8'))) # noqa self._partion__offset_consume_status_map[msg.partition()][ msg.offset()] = 0 kw = { 'partition': msg.partition(), 'offset': msg.offset(), 'body': json.loads(msg.value()) } # noqa if self._is_show_message_get_from_broker: self.logger.debug( f'从kafka的 [{self._queue_name}] 主题,分区 {msg.partition()} 中 的 offset {msg.offset()} 取出的消息是: {msg.value()}' ) # noqa self._submit_task(kw)
def __init__(self, publisher, downloader, engine, incoming_topic, group_id, bootstrap_servers, **kwargs): super().__init__(publisher, downloader, engine) config = kwargs.copy() config["group.id"] = group_id config["bootstrap.servers"] = ",".join(bootstrap_servers) log.info("config", extra={"config": config}) self.auto_commit = kwargs.get("enable.auto.commit", True) self.consumer = ConfluentConsumer(config) self.consumer.subscribe([incoming_topic]) log.info("subscribing to %s: %s", incoming_topic, self.consumer)
def __init__(self, publisher, downloader, engine, incoming_topic, group_id, bootstrap_servers, requeuer=None, **kwargs): super().__init__(publisher, downloader, engine) config = kwargs.copy() config["group.id"] = group_id config["bootstrap.servers"] = ",".join(bootstrap_servers) config["group.instance.id"] = kwargs.get("group.instance.id", os.environ.get("HOSTNAME")) self.auto_commit = kwargs.get("enable.auto.commit", True) self.consumer = ConfluentConsumer(config) self.consumer.subscribe([incoming_topic]) log.info("subscribing to %s: %s", incoming_topic, self.consumer) self.requerer = requeuer
def __init__(self, bootstrap_servers: str, topic: str, group_id: str) -> None: config = { 'bootstrap.servers': bootstrap_servers, # Where to consume from after a reset # "latest" is the end of the topic, "earliest" is the beginning 'default.topic.config': { 'auto.offset.reset': 'latest' }, 'metadata.request.timeout.ms': 20000, 'enable.auto.commit': False, 'group.id': group_id, 'api.version.request': True, 'fetch.wait.max.ms': 100, 'log.connection.close': False, # This logger will log messages originating from non-Python code 'logger': get_logger('librdkafka'), # Max number of bytes per partition returned by the server 'max.partition.fetch.bytes': MEBIBYTE * 5, 'statistics.interval.ms': 15000, 'queued.max.messages.kbytes': 1024 * 64, } self._consumer = ConfluentConsumer(config) self._consumer.subscribe([topic])
def __init__(self, configuration: Mapping[str, Any]) -> None: self.__consumer = ConfluentConsumer(configuration)
def create_consumer(self): from confluent_kafka import Consumer as ConfluentConsumer self.consumer = ConfluentConsumer(self.consumer_settings) self.consumer.subscribe(topics=self.topics) return self.consumer