Ejemplo n.º 1
0
    def subscribe(self,
                  topic,
                  subscription_name,
                  consumer_type=ConsumerType.Exclusive,
                  schema=schema.BytesSchema(),
                  message_listener=None,
                  receiver_queue_size=1000,
                  max_total_receiver_queue_size_across_partitions=50000,
                  consumer_name=None,
                  unacked_messages_timeout_ms=None,
                  broker_consumer_stats_cache_time_ms=30000,
                  negative_ack_redelivery_delay_ms=60000,
                  is_read_compacted=False,
                  properties=None,
                  pattern_auto_discovery_period=60,
                  initial_position=InitialPosition.Latest):
        """
        Subscribe to the given topic and subscription combination.

        **Args**

        * `topic`: The name of the topic, list of topics or regex pattern.
                  This method will accept these forms:
                    - `topic='my-topic'`
                    - `topic=['topic-1', 'topic-2', 'topic-3']`
                    - `topic=re.compile('persistent://public/default/topic-*')`
        * `subscription`: The name of the subscription.

        **Options**

        * `consumer_type`:
          Select the subscription type to be used when subscribing to the topic.
        * `schema`:
           Define the schema of the data that will be received by this consumer.
        * `message_listener`:
          Sets a message listener for the consumer. When the listener is set,
          the application will receive messages through it. Calls to
          `consumer.receive()` will not be allowed. The listener function needs
          to accept (consumer, message), for example:

                #!python
                def my_listener(consumer, message):
                    # process message
                    consumer.acknowledge(message)

        * `receiver_queue_size`:
          Sets the size of the consumer receive queue. The consumer receive
          queue controls how many messages can be accumulated by the consumer
          before the application calls `receive()`. Using a higher value could
          potentially increase the consumer throughput at the expense of higher
          memory utilization. Setting the consumer queue size to zero decreases
          the throughput of the consumer by disabling pre-fetching of messages.
          This approach improves the message distribution on shared subscription
          by pushing messages only to those consumers that are ready to process
          them. Neither receive with timeout nor partitioned topics can be used
          if the consumer queue size is zero. The `receive()` function call
          should not be interrupted when the consumer queue size is zero. The
          default value is 1000 messages and should work well for most use
          cases.
        * `max_total_receiver_queue_size_across_partitions`
          Set the max total receiver queue size across partitions.
          This setting will be used to reduce the receiver queue size for individual partitions
        * `consumer_name`:
          Sets the consumer name.
        * `unacked_messages_timeout_ms`:
          Sets the timeout in milliseconds for unacknowledged messages. The
          timeout needs to be greater than 10 seconds. An exception is thrown if
          the given value is less than 10 seconds. If a successful
          acknowledgement is not sent within the timeout, all the unacknowledged
          messages are redelivered.
        * `negative_ack_redelivery_delay_ms`:
           The delay after which to redeliver the messages that failed to be
           processed (with the `consumer.negative_acknowledge()`)
        * `broker_consumer_stats_cache_time_ms`:
          Sets the time duration for which the broker-side consumer stats will
          be cached in the client.
        * `is_read_compacted`:
          Selects whether to read the compacted version of the topic
        * `properties`:
          Sets the properties for the consumer. The properties associated with a consumer
          can be used for identify a consumer at broker side.
        * `pattern_auto_discovery_period`:
          Periods of seconds for consumer to auto discover match topics.
        * `initial_position`:
          Set the initial position of a consumer  when subscribing to the topic.
          It could be either: `InitialPosition.Earliest` or `InitialPosition.Latest`.
          Default: `Latest`.
        """
        _check_type(str, subscription_name, 'subscription_name')
        _check_type(ConsumerType, consumer_type, 'consumer_type')
        _check_type(_schema.Schema, schema, 'schema')
        _check_type(int, receiver_queue_size, 'receiver_queue_size')
        _check_type(int, max_total_receiver_queue_size_across_partitions,
                    'max_total_receiver_queue_size_across_partitions')
        _check_type_or_none(str, consumer_name, 'consumer_name')
        _check_type_or_none(int, unacked_messages_timeout_ms,
                            'unacked_messages_timeout_ms')
        _check_type(int, broker_consumer_stats_cache_time_ms,
                    'broker_consumer_stats_cache_time_ms')
        _check_type(int, negative_ack_redelivery_delay_ms,
                    'negative_ack_redelivery_delay_ms')
        _check_type(int, pattern_auto_discovery_period,
                    'pattern_auto_discovery_period')
        _check_type(bool, is_read_compacted, 'is_read_compacted')
        _check_type_or_none(dict, properties, 'properties')
        _check_type(InitialPosition, initial_position, 'initial_position')

        conf = _pulsar.ConsumerConfiguration()
        conf.consumer_type(consumer_type)
        conf.read_compacted(is_read_compacted)
        if message_listener:
            conf.message_listener(_listener_wrapper(message_listener, schema))
        conf.receiver_queue_size(receiver_queue_size)
        conf.max_total_receiver_queue_size_across_partitions(
            max_total_receiver_queue_size_across_partitions)
        if consumer_name:
            conf.consumer_name(consumer_name)
        if unacked_messages_timeout_ms:
            conf.unacked_messages_timeout_ms(unacked_messages_timeout_ms)

        conf.negative_ack_redelivery_delay_ms(negative_ack_redelivery_delay_ms)
        conf.broker_consumer_stats_cache_time_ms(
            broker_consumer_stats_cache_time_ms)
        if properties:
            for k, v in properties.items():
                conf.property(k, v)
        conf.subscription_initial_position(initial_position)

        conf.schema(schema.schema_info())

        c = Consumer()
        if isinstance(topic, str):
            # Single topic
            c._consumer = self._client.subscribe(topic, subscription_name,
                                                 conf)
        elif isinstance(topic, list):
            # List of topics
            c._consumer = self._client.subscribe_topics(
                topic, subscription_name, conf)
        elif isinstance(topic, _retype):
            # Regex pattern
            c._consumer = self._client.subscribe_pattern(
                topic.pattern, subscription_name, conf)
        else:
            raise ValueError(
                "Argument 'topic' is expected to be of a type between (str, list, re.pattern)"
            )

        c._client = self
        c._schema = schema
        self._consumers.append(c)
        return c
Ejemplo n.º 2
0
    def create_reader(self,
                      topic,
                      start_message_id,
                      schema=schema.BytesSchema(),
                      reader_listener=None,
                      receiver_queue_size=1000,
                      reader_name=None,
                      subscription_role_prefix=None,
                      is_read_compacted=False):
        """
        Create a reader on a particular topic

        **Args**

        * `topic`: The name of the topic.
        * `start_message_id`: The initial reader positioning is done by specifying a message id.
           The options are:
            * `MessageId.earliest`: Start reading from the earliest message available in the topic
            * `MessageId.latest`: Start reading from the end topic, only getting messages published
               after the reader was created
            * `MessageId`: When passing a particular message id, the reader will position itself on
               that specific position. The first message to be read will be the message next to the
               specified messageId. Message id can be serialized into a string and deserialized
               back into a `MessageId` object:

                   # Serialize to string
                   s = msg.message_id().serialize()

                   # Deserialize from string
                   msg_id = MessageId.deserialize(s)

        **Options**

        * `schema`:
           Define the schema of the data that will be received by this reader.
        * `reader_listener`:
          Sets a message listener for the reader. When the listener is set,
          the application will receive messages through it. Calls to
          `reader.read_next()` will not be allowed. The listener function needs
          to accept (reader, message), for example:

                def my_listener(reader, message):
                    # process message
                    pass

        * `receiver_queue_size`:
          Sets the size of the reader receive queue. The reader receive
          queue controls how many messages can be accumulated by the reader
          before the application calls `read_next()`. Using a higher value could
          potentially increase the reader throughput at the expense of higher
          memory utilization.
        * `reader_name`:
          Sets the reader name.
        * `subscription_role_prefix`:
          Sets the subscription role prefix.
        * `is_read_compacted`:
          Selects whether to read the compacted version of the topic
        """
        _check_type(str, topic, 'topic')
        _check_type(_pulsar.MessageId, start_message_id, 'start_message_id')
        _check_type(_schema.Schema, schema, 'schema')
        _check_type(int, receiver_queue_size, 'receiver_queue_size')
        _check_type_or_none(str, reader_name, 'reader_name')
        _check_type_or_none(str, subscription_role_prefix,
                            'subscription_role_prefix')
        _check_type(bool, is_read_compacted, 'is_read_compacted')

        conf = _pulsar.ReaderConfiguration()
        if reader_listener:
            conf.reader_listener(_listener_wrapper(reader_listener, schema))
        conf.receiver_queue_size(receiver_queue_size)
        if reader_name:
            conf.reader_name(reader_name)
        if subscription_role_prefix:
            conf.subscription_role_prefix(subscription_role_prefix)
        conf.schema(schema.schema_info())
        conf.read_compacted(is_read_compacted)

        c = Reader()
        c._reader = self._client.create_reader(topic, start_message_id, conf)
        c._client = self
        c._schema = schema
        self._consumers.append(c)
        return c
Ejemplo n.º 3
0
    def create_producer(
        self,
        topic,
        producer_name=None,
        schema=schema.BytesSchema(),
        initial_sequence_id=None,
        send_timeout_millis=30000,
        compression_type=CompressionType.NONE,
        max_pending_messages=1000,
        max_pending_messages_across_partitions=50000,
        block_if_queue_full=False,
        batching_enabled=False,
        batching_max_messages=1000,
        batching_max_allowed_size_in_bytes=128 * 1024,
        batching_max_publish_delay_ms=10,
        message_routing_mode=PartitionsRoutingMode.RoundRobinDistribution,
        properties=None,
    ):
        """
        Create a new producer on a given topic.

        **Args**

        * `topic`:
          The topic name

        **Options**

        * `producer_name`:
           Specify a name for the producer. If not assigned,
           the system will generate a globally unique name which can be accessed
           with `Producer.producer_name()`. When specifying a name, it is app to
           the user to ensure that, for a given topic, the producer name is unique
           across all Pulsar's clusters.
        * `schema`:
           Define the schema of the data that will be published by this producer.
           The schema will be used for two purposes:
             - Validate the data format against the topic defined schema
             - Perform serialization/deserialization between data and objects
           An example for this parameter would be to pass `schema=JsonSchema(MyRecordClass)`.
        * `initial_sequence_id`:
           Set the baseline for the sequence ids for messages
           published by the producer. First message will be using
           `(initialSequenceId + 1)`` as its sequence id and subsequent messages will
           be assigned incremental sequence ids, if not otherwise specified.
        * `send_timeout_seconds`:
          If a message is not acknowledged by the server before the
          `send_timeout` expires, an error will be reported.
        * `compression_type`:
          Set the compression type for the producer. By default, message
          payloads are not compressed. Supported compression types are
          `CompressionType.LZ4`, `CompressionType.ZLib`, `CompressionType.ZSTD` and `CompressionType.SNAPPY`.
          ZSTD is supported since Pulsar 2.3. Consumers will need to be at least at that
          release in order to be able to receive messages compressed with ZSTD.
          SNAPPY is supported since Pulsar 2.4. Consumers will need to be at least at that
          release in order to be able to receive messages compressed with SNAPPY.
        * `max_pending_messages`:
          Set the max size of the queue holding the messages pending to receive
          an acknowledgment from the broker.
        * `max_pending_messages_across_partitions`:
          Set the max size of the queue holding the messages pending to receive
          an acknowledgment across partitions from the broker.
        * `block_if_queue_full`: Set whether `send_async` operations should
          block when the outgoing message queue is full.
        * `message_routing_mode`:
          Set the message routing mode for the partitioned producer. Default is `PartitionsRoutingMode.RoundRobinDistribution`,
          other option is `PartitionsRoutingMode.UseSinglePartition`
        * `properties`:
          Sets the properties for the producer. The properties associated with a producer
          can be used for identify a producer at broker side.
        """
        _check_type(str, topic, 'topic')
        _check_type_or_none(str, producer_name, 'producer_name')
        _check_type(_schema.Schema, schema, 'schema')
        _check_type_or_none(int, initial_sequence_id, 'initial_sequence_id')
        _check_type(int, send_timeout_millis, 'send_timeout_millis')
        _check_type(CompressionType, compression_type, 'compression_type')
        _check_type(int, max_pending_messages, 'max_pending_messages')
        _check_type(int, max_pending_messages_across_partitions,
                    'max_pending_messages_across_partitions')
        _check_type(bool, block_if_queue_full, 'block_if_queue_full')
        _check_type(bool, batching_enabled, 'batching_enabled')
        _check_type(int, batching_max_messages, 'batching_max_messages')
        _check_type(int, batching_max_allowed_size_in_bytes,
                    'batching_max_allowed_size_in_bytes')
        _check_type(int, batching_max_publish_delay_ms,
                    'batching_max_publish_delay_ms')
        _check_type_or_none(dict, properties, 'properties')

        conf = _pulsar.ProducerConfiguration()
        conf.send_timeout_millis(send_timeout_millis)
        conf.compression_type(compression_type)
        conf.max_pending_messages(max_pending_messages)
        conf.max_pending_messages_across_partitions(
            max_pending_messages_across_partitions)
        conf.block_if_queue_full(block_if_queue_full)
        conf.batching_enabled(batching_enabled)
        conf.batching_max_messages(batching_max_messages)
        conf.batching_max_allowed_size_in_bytes(
            batching_max_allowed_size_in_bytes)
        conf.batching_max_publish_delay_ms(batching_max_publish_delay_ms)
        conf.partitions_routing_mode(message_routing_mode)
        if producer_name:
            conf.producer_name(producer_name)
        if initial_sequence_id:
            conf.initial_sequence_id(initial_sequence_id)
        if properties:
            for k, v in properties.items():
                conf.property(k, v)

        conf.schema(schema.schema_info())

        p = Producer()
        p._producer = self._client.create_producer(topic, conf)
        p._schema = schema
        return p