def subscribe(self, topic, subscription_name, consumer_type=ConsumerType.Exclusive, schema=schema.BytesSchema(), message_listener=None, receiver_queue_size=1000, max_total_receiver_queue_size_across_partitions=50000, consumer_name=None, unacked_messages_timeout_ms=None, broker_consumer_stats_cache_time_ms=30000, negative_ack_redelivery_delay_ms=60000, is_read_compacted=False, properties=None, pattern_auto_discovery_period=60, initial_position=InitialPosition.Latest): """ Subscribe to the given topic and subscription combination. **Args** * `topic`: The name of the topic, list of topics or regex pattern. This method will accept these forms: - `topic='my-topic'` - `topic=['topic-1', 'topic-2', 'topic-3']` - `topic=re.compile('persistent://public/default/topic-*')` * `subscription`: The name of the subscription. **Options** * `consumer_type`: Select the subscription type to be used when subscribing to the topic. * `schema`: Define the schema of the data that will be received by this consumer. * `message_listener`: Sets a message listener for the consumer. When the listener is set, the application will receive messages through it. Calls to `consumer.receive()` will not be allowed. The listener function needs to accept (consumer, message), for example: #!python def my_listener(consumer, message): # process message consumer.acknowledge(message) * `receiver_queue_size`: Sets the size of the consumer receive queue. The consumer receive queue controls how many messages can be accumulated by the consumer before the application calls `receive()`. Using a higher value could potentially increase the consumer throughput at the expense of higher memory utilization. Setting the consumer queue size to zero decreases the throughput of the consumer by disabling pre-fetching of messages. This approach improves the message distribution on shared subscription by pushing messages only to those consumers that are ready to process them. Neither receive with timeout nor partitioned topics can be used if the consumer queue size is zero. The `receive()` function call should not be interrupted when the consumer queue size is zero. The default value is 1000 messages and should work well for most use cases. * `max_total_receiver_queue_size_across_partitions` Set the max total receiver queue size across partitions. This setting will be used to reduce the receiver queue size for individual partitions * `consumer_name`: Sets the consumer name. * `unacked_messages_timeout_ms`: Sets the timeout in milliseconds for unacknowledged messages. The timeout needs to be greater than 10 seconds. An exception is thrown if the given value is less than 10 seconds. If a successful acknowledgement is not sent within the timeout, all the unacknowledged messages are redelivered. * `negative_ack_redelivery_delay_ms`: The delay after which to redeliver the messages that failed to be processed (with the `consumer.negative_acknowledge()`) * `broker_consumer_stats_cache_time_ms`: Sets the time duration for which the broker-side consumer stats will be cached in the client. * `is_read_compacted`: Selects whether to read the compacted version of the topic * `properties`: Sets the properties for the consumer. The properties associated with a consumer can be used for identify a consumer at broker side. * `pattern_auto_discovery_period`: Periods of seconds for consumer to auto discover match topics. * `initial_position`: Set the initial position of a consumer when subscribing to the topic. It could be either: `InitialPosition.Earliest` or `InitialPosition.Latest`. Default: `Latest`. """ _check_type(str, subscription_name, 'subscription_name') _check_type(ConsumerType, consumer_type, 'consumer_type') _check_type(_schema.Schema, schema, 'schema') _check_type(int, receiver_queue_size, 'receiver_queue_size') _check_type(int, max_total_receiver_queue_size_across_partitions, 'max_total_receiver_queue_size_across_partitions') _check_type_or_none(str, consumer_name, 'consumer_name') _check_type_or_none(int, unacked_messages_timeout_ms, 'unacked_messages_timeout_ms') _check_type(int, broker_consumer_stats_cache_time_ms, 'broker_consumer_stats_cache_time_ms') _check_type(int, negative_ack_redelivery_delay_ms, 'negative_ack_redelivery_delay_ms') _check_type(int, pattern_auto_discovery_period, 'pattern_auto_discovery_period') _check_type(bool, is_read_compacted, 'is_read_compacted') _check_type_or_none(dict, properties, 'properties') _check_type(InitialPosition, initial_position, 'initial_position') conf = _pulsar.ConsumerConfiguration() conf.consumer_type(consumer_type) conf.read_compacted(is_read_compacted) if message_listener: conf.message_listener(_listener_wrapper(message_listener, schema)) conf.receiver_queue_size(receiver_queue_size) conf.max_total_receiver_queue_size_across_partitions( max_total_receiver_queue_size_across_partitions) if consumer_name: conf.consumer_name(consumer_name) if unacked_messages_timeout_ms: conf.unacked_messages_timeout_ms(unacked_messages_timeout_ms) conf.negative_ack_redelivery_delay_ms(negative_ack_redelivery_delay_ms) conf.broker_consumer_stats_cache_time_ms( broker_consumer_stats_cache_time_ms) if properties: for k, v in properties.items(): conf.property(k, v) conf.subscription_initial_position(initial_position) conf.schema(schema.schema_info()) c = Consumer() if isinstance(topic, str): # Single topic c._consumer = self._client.subscribe(topic, subscription_name, conf) elif isinstance(topic, list): # List of topics c._consumer = self._client.subscribe_topics( topic, subscription_name, conf) elif isinstance(topic, _retype): # Regex pattern c._consumer = self._client.subscribe_pattern( topic.pattern, subscription_name, conf) else: raise ValueError( "Argument 'topic' is expected to be of a type between (str, list, re.pattern)" ) c._client = self c._schema = schema self._consumers.append(c) return c
def create_reader(self, topic, start_message_id, schema=schema.BytesSchema(), reader_listener=None, receiver_queue_size=1000, reader_name=None, subscription_role_prefix=None, is_read_compacted=False): """ Create a reader on a particular topic **Args** * `topic`: The name of the topic. * `start_message_id`: The initial reader positioning is done by specifying a message id. The options are: * `MessageId.earliest`: Start reading from the earliest message available in the topic * `MessageId.latest`: Start reading from the end topic, only getting messages published after the reader was created * `MessageId`: When passing a particular message id, the reader will position itself on that specific position. The first message to be read will be the message next to the specified messageId. Message id can be serialized into a string and deserialized back into a `MessageId` object: # Serialize to string s = msg.message_id().serialize() # Deserialize from string msg_id = MessageId.deserialize(s) **Options** * `schema`: Define the schema of the data that will be received by this reader. * `reader_listener`: Sets a message listener for the reader. When the listener is set, the application will receive messages through it. Calls to `reader.read_next()` will not be allowed. The listener function needs to accept (reader, message), for example: def my_listener(reader, message): # process message pass * `receiver_queue_size`: Sets the size of the reader receive queue. The reader receive queue controls how many messages can be accumulated by the reader before the application calls `read_next()`. Using a higher value could potentially increase the reader throughput at the expense of higher memory utilization. * `reader_name`: Sets the reader name. * `subscription_role_prefix`: Sets the subscription role prefix. * `is_read_compacted`: Selects whether to read the compacted version of the topic """ _check_type(str, topic, 'topic') _check_type(_pulsar.MessageId, start_message_id, 'start_message_id') _check_type(_schema.Schema, schema, 'schema') _check_type(int, receiver_queue_size, 'receiver_queue_size') _check_type_or_none(str, reader_name, 'reader_name') _check_type_or_none(str, subscription_role_prefix, 'subscription_role_prefix') _check_type(bool, is_read_compacted, 'is_read_compacted') conf = _pulsar.ReaderConfiguration() if reader_listener: conf.reader_listener(_listener_wrapper(reader_listener, schema)) conf.receiver_queue_size(receiver_queue_size) if reader_name: conf.reader_name(reader_name) if subscription_role_prefix: conf.subscription_role_prefix(subscription_role_prefix) conf.schema(schema.schema_info()) conf.read_compacted(is_read_compacted) c = Reader() c._reader = self._client.create_reader(topic, start_message_id, conf) c._client = self c._schema = schema self._consumers.append(c) return c
def create_producer( self, topic, producer_name=None, schema=schema.BytesSchema(), initial_sequence_id=None, send_timeout_millis=30000, compression_type=CompressionType.NONE, max_pending_messages=1000, max_pending_messages_across_partitions=50000, block_if_queue_full=False, batching_enabled=False, batching_max_messages=1000, batching_max_allowed_size_in_bytes=128 * 1024, batching_max_publish_delay_ms=10, message_routing_mode=PartitionsRoutingMode.RoundRobinDistribution, properties=None, ): """ Create a new producer on a given topic. **Args** * `topic`: The topic name **Options** * `producer_name`: Specify a name for the producer. If not assigned, the system will generate a globally unique name which can be accessed with `Producer.producer_name()`. When specifying a name, it is app to the user to ensure that, for a given topic, the producer name is unique across all Pulsar's clusters. * `schema`: Define the schema of the data that will be published by this producer. The schema will be used for two purposes: - Validate the data format against the topic defined schema - Perform serialization/deserialization between data and objects An example for this parameter would be to pass `schema=JsonSchema(MyRecordClass)`. * `initial_sequence_id`: Set the baseline for the sequence ids for messages published by the producer. First message will be using `(initialSequenceId + 1)`` as its sequence id and subsequent messages will be assigned incremental sequence ids, if not otherwise specified. * `send_timeout_seconds`: If a message is not acknowledged by the server before the `send_timeout` expires, an error will be reported. * `compression_type`: Set the compression type for the producer. By default, message payloads are not compressed. Supported compression types are `CompressionType.LZ4`, `CompressionType.ZLib`, `CompressionType.ZSTD` and `CompressionType.SNAPPY`. ZSTD is supported since Pulsar 2.3. Consumers will need to be at least at that release in order to be able to receive messages compressed with ZSTD. SNAPPY is supported since Pulsar 2.4. Consumers will need to be at least at that release in order to be able to receive messages compressed with SNAPPY. * `max_pending_messages`: Set the max size of the queue holding the messages pending to receive an acknowledgment from the broker. * `max_pending_messages_across_partitions`: Set the max size of the queue holding the messages pending to receive an acknowledgment across partitions from the broker. * `block_if_queue_full`: Set whether `send_async` operations should block when the outgoing message queue is full. * `message_routing_mode`: Set the message routing mode for the partitioned producer. Default is `PartitionsRoutingMode.RoundRobinDistribution`, other option is `PartitionsRoutingMode.UseSinglePartition` * `properties`: Sets the properties for the producer. The properties associated with a producer can be used for identify a producer at broker side. """ _check_type(str, topic, 'topic') _check_type_or_none(str, producer_name, 'producer_name') _check_type(_schema.Schema, schema, 'schema') _check_type_or_none(int, initial_sequence_id, 'initial_sequence_id') _check_type(int, send_timeout_millis, 'send_timeout_millis') _check_type(CompressionType, compression_type, 'compression_type') _check_type(int, max_pending_messages, 'max_pending_messages') _check_type(int, max_pending_messages_across_partitions, 'max_pending_messages_across_partitions') _check_type(bool, block_if_queue_full, 'block_if_queue_full') _check_type(bool, batching_enabled, 'batching_enabled') _check_type(int, batching_max_messages, 'batching_max_messages') _check_type(int, batching_max_allowed_size_in_bytes, 'batching_max_allowed_size_in_bytes') _check_type(int, batching_max_publish_delay_ms, 'batching_max_publish_delay_ms') _check_type_or_none(dict, properties, 'properties') conf = _pulsar.ProducerConfiguration() conf.send_timeout_millis(send_timeout_millis) conf.compression_type(compression_type) conf.max_pending_messages(max_pending_messages) conf.max_pending_messages_across_partitions( max_pending_messages_across_partitions) conf.block_if_queue_full(block_if_queue_full) conf.batching_enabled(batching_enabled) conf.batching_max_messages(batching_max_messages) conf.batching_max_allowed_size_in_bytes( batching_max_allowed_size_in_bytes) conf.batching_max_publish_delay_ms(batching_max_publish_delay_ms) conf.partitions_routing_mode(message_routing_mode) if producer_name: conf.producer_name(producer_name) if initial_sequence_id: conf.initial_sequence_id(initial_sequence_id) if properties: for k, v in properties.items(): conf.property(k, v) conf.schema(schema.schema_info()) p = Producer() p._producer = self._client.create_producer(topic, conf) p._schema = schema return p