Example #1
0
File: kafka.py Project: Appva/snuba
    def __init__(self, configuration: Mapping[str, Any]) -> None:
        auto_offset_reset = configuration.get("auto.offset.reset", "largest")
        if auto_offset_reset in {"smallest", "earliest", "beginning"}:
            self.__resolve_partition_starting_offset = (
                self.__resolve_partition_offset_earliest)
        elif auto_offset_reset in {"largest", "latest", "end"}:
            self.__resolve_partition_starting_offset = (
                self.__resolve_partition_offset_latest)
        elif auto_offset_reset == "error":
            self.__resolve_partition_starting_offset = (
                self.__resolve_partition_offset_error)
        else:
            raise ValueError(
                "invalid value for 'auto.offset.reset' configuration")

        # NOTE: Offsets are explicitly managed as part of the assignment
        # callback, so preemptively resetting offsets is not enabled.
        self.__consumer = ConfluentConsumer({
            **configuration, "auto.offset.reset":
            "error"
        })

        self.__offsets: MutableMapping[TopicPartition, int] = {}

        self.__state = KafkaConsumerState.CONSUMING
Example #2
0
    def __init__(
        self,
        configuration: Mapping[str, Any],
        *,
        commit_retry_policy: Optional[RetryPolicy] = None,
    ) -> None:
        if commit_retry_policy is None:
            commit_retry_policy = NoRetryPolicy()

        auto_offset_reset = configuration.get("auto.offset.reset", "largest")
        if auto_offset_reset in {"smallest", "earliest", "beginning"}:
            self.__resolve_partition_starting_offset = (
                self.__resolve_partition_offset_earliest
            )
        elif auto_offset_reset in {"largest", "latest", "end"}:
            self.__resolve_partition_starting_offset = (
                self.__resolve_partition_offset_latest
            )
        elif auto_offset_reset == "error":
            self.__resolve_partition_starting_offset = (
                self.__resolve_partition_offset_error
            )
        else:
            raise ValueError("invalid value for 'auto.offset.reset' configuration")

        if (
            as_kafka_configuration_bool(configuration.get("enable.auto.commit", "true"))
            is not False
        ):
            raise ValueError("invalid value for 'enable.auto.commit' configuration")

        if (
            as_kafka_configuration_bool(
                configuration.get("enable.auto.offset.store", "true")
            )
            is not False
        ):
            raise ValueError(
                "invalid value for 'enable.auto.offset.store' configuration"
            )

        # NOTE: Offsets are explicitly managed as part of the assignment
        # callback, so preemptively resetting offsets is not enabled.
        self.__consumer = ConfluentConsumer(
            {**configuration, "auto.offset.reset": "error"}
        )

        self.__offsets: MutableMapping[Partition, int] = {}
        self.__staged_offsets: MutableMapping[Partition, int] = {}
        self.__paused: Set[Partition] = set()

        self.__commit_retry_policy = commit_retry_policy

        self.__state = KafkaConsumerState.CONSUMING
    def _shedual_task(self):

        from confluent_kafka import Consumer as ConfluentConsumer  # 这个包不好安装,用户用这个中间件的时候自己再想办法安装。
        try:
            admin_client = KafkaAdminClient(
                bootstrap_servers=frame_config.KAFKA_BOOTSTRAP_SERVERS)
            admin_client.create_topics([NewTopic(self._queue_name, 10, 1)])
            # admin_client.create_partitions({self._queue_name: NewPartitions(total_count=16)})
        except TopicAlreadyExistsError:
            pass

        self._producer = KafkaProducer(
            bootstrap_servers=frame_config.KAFKA_BOOTSTRAP_SERVERS)
        # consumer 配置 https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
        self._confluent_consumer = ConfluentConsumer({
            'bootstrap.servers':
            ','.join(frame_config.KAFKA_BOOTSTRAP_SERVERS),
            'group.id':
            'frame_group',
            'auto.offset.reset':
            'earliest',
            'enable.auto.commit':
            False
        })
        self._confluent_consumer.subscribe([self._queue_name])

        self._recent_commit_time = time.time()
        self._partion__offset_consume_status_map = defaultdict(OrderedDict)
        while 1:
            msg = self._confluent_consumer.poll(timeout=10)
            self._manually_commit()
            if msg is None:
                continue
            if msg.error():
                print("Consumer error: {}".format(msg.error()))
                continue
            # msg的类型  https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#message
            # value()  offset() partition()
            # print('Received message: {}'.format(msg.value().decode('utf-8'))) # noqa
            self._partion__offset_consume_status_map[msg.partition()][
                msg.offset()] = 0
            kw = {
                'partition': msg.partition(),
                'offset': msg.offset(),
                'body': json.loads(msg.value())
            }  # noqa
            if self._is_show_message_get_from_broker:
                self.logger.debug(
                    f'从kafka的 [{self._queue_name}] 主题,分区 {msg.partition()} 中 的 offset {msg.offset()} 取出的消息是:  {msg.value()}'
                )  # noqa
            self._submit_task(kw)
Example #4
0
    def __init__(self, publisher, downloader, engine, incoming_topic, group_id,
                 bootstrap_servers, **kwargs):

        super().__init__(publisher, downloader, engine)
        config = kwargs.copy()
        config["group.id"] = group_id
        config["bootstrap.servers"] = ",".join(bootstrap_servers)
        log.info("config", extra={"config": config})

        self.auto_commit = kwargs.get("enable.auto.commit", True)
        self.consumer = ConfluentConsumer(config)

        self.consumer.subscribe([incoming_topic])
        log.info("subscribing to %s: %s", incoming_topic, self.consumer)
Example #5
0
    def __init__(self,
                 publisher,
                 downloader,
                 engine,
                 incoming_topic,
                 group_id,
                 bootstrap_servers,
                 requeuer=None,
                 **kwargs):

        super().__init__(publisher, downloader, engine)
        config = kwargs.copy()
        config["group.id"] = group_id
        config["bootstrap.servers"] = ",".join(bootstrap_servers)
        config["group.instance.id"] = kwargs.get("group.instance.id",
                                                 os.environ.get("HOSTNAME"))

        self.auto_commit = kwargs.get("enable.auto.commit", True)
        self.consumer = ConfluentConsumer(config)

        self.consumer.subscribe([incoming_topic])
        log.info("subscribing to %s: %s", incoming_topic, self.consumer)
        self.requerer = requeuer
Example #6
0
 def __init__(self, bootstrap_servers: str, topic: str,
              group_id: str) -> None:
     config = {
         'bootstrap.servers': bootstrap_servers,
         # Where to consume from after a reset
         # "latest" is the end of the topic, "earliest" is the beginning
         'default.topic.config': {
             'auto.offset.reset': 'latest'
         },
         'metadata.request.timeout.ms': 20000,
         'enable.auto.commit': False,
         'group.id': group_id,
         'api.version.request': True,
         'fetch.wait.max.ms': 100,
         'log.connection.close': False,
         # This logger will log messages originating from non-Python code
         'logger': get_logger('librdkafka'),
         # Max number of bytes per partition returned by the server
         'max.partition.fetch.bytes': MEBIBYTE * 5,
         'statistics.interval.ms': 15000,
         'queued.max.messages.kbytes': 1024 * 64,
     }
     self._consumer = ConfluentConsumer(config)
     self._consumer.subscribe([topic])
Example #7
0
 def __init__(self, configuration: Mapping[str, Any]) -> None:
     self.__consumer = ConfluentConsumer(configuration)
Example #8
0
 def create_consumer(self):
     from confluent_kafka import Consumer as ConfluentConsumer
     self.consumer = ConfluentConsumer(self.consumer_settings)
     self.consumer.subscribe(topics=self.topics)
     return self.consumer