Esempio n. 1
0
    def test_streaming_pull_ack_deadline(self, publisher, subscriber, project,
                                         topic_path, subscription_path,
                                         cleanup):
        # Make sure the topic and subscription get deleted.
        cleanup.append((publisher.delete_topic, topic_path))
        cleanup.append((subscriber.delete_subscription, subscription_path))

        # Create a topic and a subscription, then subscribe to the topic. This
        # must happen before the messages are published.
        publisher.create_topic(topic_path)

        # Subscribe to the topic. This must happen before the messages
        # are published.
        subscriber.create_subscription(subscription_path,
                                       topic_path,
                                       ack_deadline_seconds=45)

        # publish some messages and wait for completion
        self._publish_messages(publisher, topic_path, batch_sizes=[2])

        # subscribe to the topic
        callback = StreamingPullCallback(
            processing_time=
            13,  # more than the default stream ACK deadline (10s)
            resolve_at_msg_count=3,  # one more than the published messages count
        )
        flow_control = types.FlowControl(max_messages=1)
        subscription_future = subscriber.subscribe(subscription_path,
                                                   callback,
                                                   flow_control=flow_control)

        # We expect to process the first two messages in 2 * 13 seconds, and
        # any duplicate message that is re-sent by the backend in additional
        # 13 seconds, totalling 39 seconds (+ overhead) --> if there have been
        # no duplicates in 60 seconds, we can reasonably assume that there
        # won't be any.
        try:
            callback.done_future.result(timeout=60)
        except exceptions.TimeoutError:
            # future timed out, because we received no excessive messages
            assert sorted(callback.seen_message_ids) == [1, 2]
        else:
            pytest.fail(
                "Expected to receive 2 messages, but got at least {}.".format(
                    len(callback.seen_message_ids)))
        finally:
            subscription_future.cancel()
Esempio n. 2
0
def test__maybe_release_messages_below_overload():
    manager = make_manager(
        flow_control=types.FlowControl(max_messages=10, max_bytes=1000))
    manager._callback = mock.sentinel.callback

    # init leaser message count to 8 to leave room for 2 more messages
    _leaser = manager._leaser = mock.create_autospec(leaser.Leaser)
    fake_leaser_add(_leaser, init_msg_count=8, assumed_msg_size=25)
    _leaser.add = mock.Mock(wraps=_leaser.add)  # to spy on calls

    messages = [
        mock.create_autospec(message.Message,
                             instance=True,
                             ack_id="ack_foo",
                             size=11),
        mock.create_autospec(message.Message,
                             instance=True,
                             ack_id="ack_bar",
                             size=22),
        mock.create_autospec(message.Message,
                             instance=True,
                             ack_id="ack_baz",
                             size=33),
    ]
    for msg in messages:
        manager._messages_on_hold.put(msg)

    # the actual call of MUT
    manager._maybe_release_messages()

    assert manager._messages_on_hold.qsize() == 1
    msg = manager._messages_on_hold.get_nowait()
    assert msg.ack_id == "ack_baz"

    assert len(_leaser.add.mock_calls) == 2
    expected_calls = [
        mock.call([requests.LeaseRequest(ack_id="ack_foo", byte_size=11)]),
        mock.call([requests.LeaseRequest(ack_id="ack_bar", byte_size=22)]),
    ]
    _leaser.add.assert_has_calls(expected_calls)

    schedule_calls = manager._scheduler.schedule.mock_calls
    assert len(schedule_calls) == 2
    for _, call_args, _ in schedule_calls:
        assert call_args[0] == mock.sentinel.callback
        assert isinstance(call_args[1], message.Message)
        assert call_args[1].ack_id in ("ack_foo", "ack_bar")
    def __init__(self,
                 client,
                 subscription,
                 flow_control=types.FlowControl(),
                 executor=None,
                 queue=None):
        """Instantiate the policy.

        Args:
            client (~.pubsub_v1.subscriber.client): The subscriber client used
                to create this instance.
            subscription (str): The name of the subscription. The canonical
                format for this is
                ``projects/{project}/subscriptions/{subscription}``.
            flow_control (~google.cloud.pubsub_v1.types.FlowControl): The flow
                control settings.
            executor (~concurrent.futures.ThreadPoolExecutor): (Optional.) A
                ThreadPoolExecutor instance, or anything duck-type compatible
                with it.
            queue (~queue.Queue): (Optional.) A Queue instance, appropriate
                for crossing the concurrency boundary implemented by
                ``executor``.
        """
        # Default the callback to a no-op; it is provided by `.open`.
        self._callback = lambda message: None

        # Create a queue for keeping track of shared state.
        if queue is None:
            queue = Queue()
        self._request_queue = queue

        # Call the superclass constructor.
        super(Policy, self).__init__(
            client=client,
            flow_control=flow_control,
            subscription=subscription,
        )

        # Also maintain a request queue and an executor.
        logger.debug('Creating callback requests thread (not starting).')
        if executor is None:
            executor = futures.ThreadPoolExecutor(max_workers=10)
        self._executor = executor
        self._callback_requests = _helper_threads.QueueCallbackThread(
            self._request_queue,
            self.on_callback_request,
        )
 def __init__(self, client, subscription, flow_control=types.FlowControl(),
              executor=None, queue=None):
     super(Policy, self).__init__(
         client=client,
         flow_control=flow_control,
         subscription=subscription,
     )
     # Default the callback to a no-op; the **actual** callback is
     # provided by ``.open()``.
     self._callback = _do_nothing_callback
     # Create a queue for keeping track of shared state.
     self._request_queue = self._get_queue(queue)
     # Also maintain an executor.
     self._executor = self._get_executor(executor)
     # The threads created in ``.open()``.
     self._dispatch_thread = None
     self._leases_thread = None
Esempio n. 5
0
def test__maybe_release_messages_on_overload():
    manager = make_manager(
        flow_control=types.FlowControl(max_messages=10, max_bytes=1000)
    )
    # Ensure load is exactly 1.0 (to verify that >= condition is used)
    _leaser = manager._leaser = mock.create_autospec(leaser.Leaser)
    _leaser.message_count = 10
    _leaser.bytes = 1000

    msg = mock.create_autospec(message.Message, instance=True, ack_id="ack", size=11)
    manager._messages_on_hold.put(msg)

    manager._maybe_release_messages()

    assert manager._messages_on_hold.qsize() == 1
    manager._leaser.add.assert_not_called()
    manager._scheduler.schedule.assert_not_called()
def test_load():
    flow_control = types.FlowControl(max_messages=10, max_bytes=1000)
    policy = create_policy(flow_control=flow_control)

    # This should mean that our messages count is at 10%, and our bytes
    # are at 15%; the ._load property should return the higher (0.15).
    policy.lease(ack_id='one', byte_size=150)
    assert policy._load == 0.15

    # After this message is added, the messages should be higher at 20%
    # (versus 16% for bytes).
    policy.lease(ack_id='two', byte_size=10)
    assert policy._load == 0.2

    # Returning a number above 100% is fine.
    policy.lease(ack_id='three', byte_size=1000)
    assert policy._load == 1.16
Esempio n. 7
0
def test_constructor_and_default_state():
    manager = streaming_pull_manager.StreamingPullManager(
        mock.sentinel.client, mock.sentinel.subscription)

    # Public state
    assert manager.is_active is False
    assert manager.flow_control == types.FlowControl()
    assert manager.dispatcher is None
    assert manager.leaser is None
    assert manager.ack_histogram is not None
    assert manager.ack_deadline == 10
    assert manager.load == 0

    # Private state
    assert manager._client == mock.sentinel.client
    assert manager._subscription == mock.sentinel.subscription
    assert manager._scheduler is not None
Esempio n. 8
0
def test_subscribe_options(manager_open):
    creds = mock.Mock(spec=credentials.Credentials)
    client = subscriber.Client(credentials=creds)
    flow_control = types.FlowControl(max_bytes=42)
    scheduler = mock.sentinel.scheduler

    future = client.subscribe(
        "sub_name_a",
        callback=mock.sentinel.callback,
        flow_control=flow_control,
        scheduler=scheduler,
    )
    assert isinstance(future, futures.StreamingPullFuture)

    assert future._manager._subscription == "sub_name_a"
    assert future._manager.flow_control == flow_control
    assert future._manager._scheduler == scheduler
    manager_open.assert_called_once_with(mock.ANY, mock.sentinel.callback)
def test_load_w_requests():
    flow_control = types.FlowControl(max_bytes=100, max_requests=100)
    policy = create_policy(flow_control=flow_control)
    consumer = policy._consumer

    pending_requests_patch = mock.patch.object(consumer.__class__,
                                               'pending_requests',
                                               new_callable=mock.PropertyMock)
    with pending_requests_patch as pending_requests:
        pending_requests.return_value = 0
        assert policy._load == 0

        pending_requests.return_value = 100
        assert policy._load == 1

        # If bytes count is higher, it should return that.
        policy._bytes = 110
        assert policy._load == 1.1
Esempio n. 10
0
    def __init__(self,
                 client,
                 subscription,
                 flow_control=types.FlowControl(),
                 histogram_data=None):
        self._client = client
        self._subscription = subscription
        self._consumer = _consumer.Consumer()
        self._ack_deadline = 10
        self._last_histogram_size = 0
        self._future = None
        self.flow_control = flow_control
        self.histogram = _histogram.Histogram(data=histogram_data)

        # These are for internal flow control tracking.
        # They should not need to be used by subclasses.
        self._bytes = 0
        self._ack_on_resume = set()
Esempio n. 11
0
    def __init__(self,
                 client,
                 subscription,
                 flow_control=types.FlowControl(),
                 scheduler=None):
        self._client = client
        self._subscription = subscription
        self._flow_control = flow_control
        self._ack_histogram = histogram.Histogram()
        self._last_histogram_size = 0
        self._ack_deadline = 10
        self._rpc = None
        self._callback = None
        self._closing = threading.Lock()
        self._closed = False
        self._close_callbacks = []

        if scheduler is None:
            self._scheduler = (
                google.cloud.pubsub_v1.subscriber.scheduler.ThreadScheduler())
        else:
            self._scheduler = scheduler

        # A FIFO queue for the messages that have been received from the server,
        # but not yet added to the lease management (and not sent to user callback),
        # because the FlowControl limits have been hit.
        self._messages_on_hold = queue.Queue()

        # the total number of bytes consumed by the messages currently on hold
        self._on_hold_bytes = 0

        # A lock ensuring that pausing / resuming the consumer are both atomic
        # operations that cannot be executed concurrently. Needed for properly
        # syncing these operations with the current leaser load. Additionally,
        # the lock is used to protect modifications of internal data that
        # affects the load computation, i.e. the count and size of the messages
        # currently on hold.
        self._pause_resume_lock = threading.Lock()

        # The threads created in ``.open()``.
        self._dispatcher = None
        self._leaser = None
        self._consumer = None
        self._heartbeater = None
Esempio n. 12
0
    def subscribe(self, subscription, callback=None, flow_control=()):
        """Return a representation of an individual subscription.

        This method creates and returns a ``Consumer`` object (that is, a
        :class:`~.pubsub_v1.subscriber._consumer.Consumer`)
        subclass) bound to the topic. It does `not` create the subcription
        on the backend (or do any API call at all); it simply returns an
        object capable of doing these things.

        If the ``callback`` argument is provided, then the :meth:`open` method
        is automatically called on the returned object. If ``callback`` is
        not provided, the subscription is returned unopened.

        .. note::
            It only makes sense to provide ``callback`` here if you have
            already created the subscription manually in the API.

        Args:
            subscription (str): The name of the subscription. The
                subscription should have already been created (for example,
                by using :meth:`create_subscription`).
            callback (function): The callback function. This function receives
                the :class:`~.pubsub_v1.types.PubsubMessage` as its only
                argument.
            flow_control (~.pubsub_v1.types.FlowControl): The flow control
                settings. Use this to prevent situations where you are
                inundated with too many messages at once.

        Returns:
            ~.pubsub_v1.subscriber._consumer.Consumer: An instance
                of the defined ``consumer_class`` on the client.

        Raises:
            TypeError: If ``callback`` is not callable.
        """
        flow_control = types.FlowControl(*flow_control)
        subscr = self._policy_class(self, subscription, flow_control)
        if callable(callback):
            subscr.open(callback)
        elif callback is not None:
            error = '{!r} is not callable, please check input'.format(callback)
            raise TypeError(error)
        return subscr
Esempio n. 13
0
    def __init__(self, client, subscription,
                 flow_control=types.FlowControl(), histogram_data=None):
        self._client = client
        self._subscription = subscription
        self._consumer = _consumer.Consumer()
        self._ack_deadline = 10
        self._last_histogram_size = 0
        self._future = None
        self.flow_control = flow_control
        self.histogram = _histogram.Histogram(data=histogram_data)
        """.Histogram: the histogram tracking ack latency."""
        self.leased_messages = {}
        """dict[str, float]: A mapping of ack IDs to the local time when the
            ack ID was initially leased in seconds since the epoch."""

        # These are for internal flow control tracking.
        # They should not need to be used by subclasses.
        self._bytes = 0
        self._ack_on_resume = set()
def test__maybe_release_messages_below_overload():
    manager = make_manager(
        flow_control=types.FlowControl(max_messages=10, max_bytes=1000))
    manager._callback = mock.sentinel.callback

    # Init leaser message count to 11, so that when subtracting the 3 messages
    # that are on hold, there is still room for another 2 messages before the
    # max load is hit.
    _leaser = manager._leaser = mock.create_autospec(leaser.Leaser)
    fake_leaser_add(_leaser, init_msg_count=11, assumed_msg_size=10)

    messages = [
        mock.create_autospec(message.Message,
                             instance=True,
                             ack_id="ack_foo",
                             size=10),
        mock.create_autospec(message.Message,
                             instance=True,
                             ack_id="ack_bar",
                             size=10),
        mock.create_autospec(message.Message,
                             instance=True,
                             ack_id="ack_baz",
                             size=10),
    ]
    for msg in messages:
        manager._messages_on_hold.put(msg)
        manager._on_hold_bytes = 3 * 10

    # the actual call of MUT
    manager._maybe_release_messages()

    assert manager._messages_on_hold.size == 1
    msg = manager._messages_on_hold.get()
    assert msg.ack_id == "ack_baz"

    schedule_calls = manager._scheduler.schedule.mock_calls
    assert len(schedule_calls) == 2
    for _, call_args, _ in schedule_calls:
        assert call_args[0] == mock.sentinel.callback
        assert isinstance(call_args[1], message.Message)
        assert call_args[1].ack_id in ("ack_foo", "ack_bar")
def test_load():
    flow_control = types.FlowControl(max_messages=10, max_bytes=1000)
    policy = create_policy(flow_control=flow_control)
    consumer = policy._consumer

    with mock.patch.object(consumer, 'pause') as pause:
        # This should mean that our messages count is at 10%, and our bytes
        # are at 15%; the ._load property should return the higher (0.15).
        policy.lease(ack_id='one', byte_size=150)
        assert policy._load == 0.15
        pause.assert_not_called()
        # After this message is added, the messages should be higher at 20%
        # (versus 16% for bytes).
        policy.lease(ack_id='two', byte_size=10)
        assert policy._load == 0.2
        pause.assert_not_called()
        # Returning a number above 100% is fine.
        policy.lease(ack_id='three', byte_size=1000)
        assert policy._load == 1.16
        pause.assert_called_once_with()
Esempio n. 16
0
def main():
    # Do set-up.
    logger = utils.setup_logging(CURR_DIR)
    thread_names.monkey_patch()
    utils.make_lease_deterministic()

    # Get clients and resource paths.
    topic_name = 't-repro-{}'.format(int(1000 * time.time()))
    subscription_name = 's-repro-{}'.format(int(1000 * time.time()))
    client_info = utils.get_client_info(topic_name,
                                        subscription_name,
                                        policy_class=utils.FlowControlPolicy)
    publisher, topic_path, subscriber, subscription_path = client_info

    # Create a topic.
    publisher.create_topic(topic_path)

    # Subscribe to the topic. We do this before the messages are
    # published so that we'll receive them as they come in.
    subscriber.create_subscription(subscription_path, topic_path)
    logger.info('Listening for messages on %s', subscription_path)
    flow_control = types.FlowControl(max_messages=8)
    subscription = subscriber.subscribe(subscription_path,
                                        flow_control=flow_control)
    sub_future = subscription.open(utils.AckCallback(logger))
    consumer = subscription._consumer

    # Set off async job to publish some messages.
    publish_async(publisher, consumer, topic_path, logger)

    # The subscriber is non-blocking, so we must keep the main thread from
    # exiting to allow it to process messages in the background.
    utils.heartbeats_block(logger, sub_future, max_time=MAX_TIME)

    # Do clean-up.
    subscription.close()
    publisher.delete_topic(topic_path)
    subscriber.delete_subscription(subscription_path)
    thread_names.save_tree(CURR_DIR, logger)
    thread_names.restore()
    utils.restore()
def test__obtain_ack_deadline_no_custom_flow_control_setting():
    from google.cloud.pubsub_v1.subscriber._protocol import histogram

    manager = make_manager()

    # Make sure that max_duration_per_lease_extension is disabled.
    manager._flow_control = types.FlowControl(
        max_duration_per_lease_extension=0)

    deadline = manager._obtain_ack_deadline(maybe_update=True)
    assert deadline == histogram.MIN_ACK_DEADLINE

    # When we get some historical data, the deadline is adjusted.
    manager.ack_histogram.add(histogram.MIN_ACK_DEADLINE * 2)
    deadline = manager._obtain_ack_deadline(maybe_update=True)
    assert deadline == histogram.MIN_ACK_DEADLINE * 2

    # Adding just a single additional data point does not yet change the deadline.
    manager.ack_histogram.add(histogram.MIN_ACK_DEADLINE)
    deadline = manager._obtain_ack_deadline(maybe_update=True)
    assert deadline == histogram.MIN_ACK_DEADLINE * 2
Esempio n. 18
0
    def __init__(self,
                 client,
                 subscription,
                 flow_control=types.FlowControl(),
                 histogram_data=None):
        """Instantiate the policy.

        Args:
            client (~.pubsub_v1.subscriber.client): The subscriber client used
                to create this instance.
            subscription (str): The name of the subscription. The canonical
                format for this is
                ``projects/{project}/subscriptions/{subscription}``.
            flow_control (~.pubsub_v1.types.FlowControl): The flow control
                settings.
            histogram_data (dict): Optional: A structure to store the histogram
                data for predicting appropriate ack times. If set, this should
                be a dictionary-like object.

                .. note::
                    Additionally, the histogram relies on the assumption
                    that the dictionary will properly sort keys provided
                    that all keys are positive integers. If you are sending
                    your own dictionary class, ensure this assumption holds
                    or you will get strange behavior.
        """
        self._client = client
        self._subscription = subscription
        self._consumer = _consumer.Consumer(self)
        self._ack_deadline = 10
        self._last_histogram_size = 0
        self._future = None
        self.flow_control = flow_control
        self.histogram = _histogram.Histogram(data=histogram_data)

        # These are for internal flow control tracking.
        # They should not need to be used by subclasses.
        self._bytes = 0
        self._ack_on_resume = set()
        self._paused = False
Esempio n. 19
0
def test_on_request_below_threshold():
    """Establish that we resume a paused subscription when the pending
    requests count is below threshold."""
    flow_control = types.FlowControl(max_requests=100)
    policy = create_policy(flow_control=flow_control)
    consumer = policy._consumer

    assert consumer.paused is True

    pending_requests_patch = mock.patch.object(consumer.__class__,
                                               'pending_requests',
                                               new_callable=mock.PropertyMock)
    with pending_requests_patch as pending_requests:
        # should still be paused, not under the threshold.
        pending_requests.return_value = 90
        policy.on_request(None)
        assert consumer.paused is True

        # should unpause, we're under the resume threshold
        pending_requests.return_value = 50
        policy.on_request(None)
        assert consumer.paused is False
def test_subscribe_options(manager_open, creds):
    client = subscriber.Client(credentials=creds)
    flow_control = types.FlowControl(max_bytes=42)
    scheduler = mock.sentinel.scheduler

    future = client.subscribe(
        "sub_name_a",
        callback=mock.sentinel.callback,
        flow_control=flow_control,
        scheduler=scheduler,
        await_callbacks_on_shutdown=mock.sentinel.await_callbacks,
    )
    assert isinstance(future, futures.StreamingPullFuture)

    assert future._manager._subscription == "sub_name_a"
    assert future._manager.flow_control == flow_control
    assert future._manager._scheduler == scheduler
    assert future._manager._await_callbacks_on_shutdown is mock.sentinel.await_callbacks
    manager_open.assert_called_once_with(
        mock.ANY,
        callback=mock.sentinel.callback,
        on_callback_error=future.set_exception,
    )
def test__obtain_ack_deadline_no_value_update():
    manager = make_manager()

    # Make sure that max_duration_per_lease_extension is disabled.
    manager._flow_control = types.FlowControl(
        max_duration_per_lease_extension=0)

    manager.ack_histogram.add(21)
    deadline = manager._obtain_ack_deadline(maybe_update=True)
    assert deadline == 21

    for _ in range(5):
        manager.ack_histogram.add(35)  # Gather some new ACK data.

    deadline = manager._obtain_ack_deadline(maybe_update=False)
    assert deadline == 21  # still the same

    # Accessing the value through the ack_deadline property has no side effects either.
    assert manager.ack_deadline == 21

    # Updating the ack deadline is reflected on ack_deadline wrapper, too.
    deadline = manager._obtain_ack_deadline(maybe_update=True)
    assert manager.ack_deadline == deadline == 35
    def __init__(self, client, subscription, flow_control=types.FlowControl(),
                 scheduler=None):
        self._client = client
        self._subscription = subscription
        self._flow_control = flow_control
        self._ack_histogram = histogram.Histogram()
        self._last_histogram_size = 0
        self._ack_deadline = 10
        self._rpc = None
        self._callback = None
        self._closing = threading.Lock()
        self._closed = False
        self._close_callbacks = []

        if scheduler is None:
            self._scheduler = (
                google.cloud.pubsub_v1.subscriber.scheduler.ThreadScheduler())
        else:
            self._scheduler = scheduler

        # The threads created in ``.open()``.
        self._dispatcher = None
        self._leaser = None
        self._consumer = None
Esempio n. 23
0
 def __init__(self,
              client,
              subscription,
              flow_control=types.FlowControl(),
              scheduler_cls=None):
     raise NotImplementedError
Esempio n. 24
0
    def subscribe_experimental(
            self, subscription, callback, flow_control=(),
            scheduler_=None):
        """Asynchronously start receiving messages on a given subscription.

        This method starts a background thread to begin pulling messages from
        a Pub/Sub subscription and scheduling them to be processed using the
        provided ``callback``.

        The ``callback`` will be called with an individual
        :class:`google.cloud.pubsub_v1.subscriber.message.Message`. It is the
        responsibility of the callback to either call ``ack()`` or ``nack()``
        on the message when it finished processing. If an exception occurs in
        the callback during processing, the exception is logged and the message
        is ``nack()`` ed.

        The ``flow_control`` argument can be used to control the rate of at
        which messages are pulled. The settings are relatively conservative by
        default to prevent "message hoarding" - a situation where the client
        pulls a large number of messages but can not process them fast enough
        leading it to "starve" other clients of messages. Increasing these
        settings may lead to faster throughput for messages that do not take
        a long time to process.

        This method starts the receiver in the background and returns a
        *Future* representing its execution. Waiting on the future (calling
        ``result()``) will block forever or until a non-recoverable error
        is encountered (such as loss of network connectivity). Cancelling the
        future will signal the process to shutdown gracefully and exit.

        Example

        .. code-block:: python

            from google.cloud.pubsub_v1 import subscriber

            subscriber_client = pubsub.SubscriberClient()

            # existing subscription
            subscription = subscriber_client.subscription_path(
                'my-project-id', 'my-subscription')

            def callback(message):
                print(message)
                message.ack()

            future = subscriber.subscribe_experimental(
                subscription, callback)

            try:
                future.result()
            except KeyboardInterrupt:
                future.cancel()

        Args:
            subscription (str): The name of the subscription. The
                subscription should have already been created (for example,
                by using :meth:`create_subscription`).
            callback (Callable[~.pubsub_v1.subscriber.message.Message]):
                The callback function. This function receives the message as
                its only argument and will be called from a different thread/
                process depending on the scheduling strategy.
            flow_control (~.pubsub_v1.types.FlowControl): The flow control
                settings. Use this to prevent situations where you are
                inundated with too many messages at once.

        Returns:
            google.cloud.pubsub_v1.futures.StreamingPullFuture: A Future object
                that can be used to manage the background stream.
        """
        flow_control = types.FlowControl(*flow_control)

        manager = streaming_pull_manager.StreamingPullManager(
            self, subscription, flow_control)

        future = futures.StreamingPullFuture(manager)

        manager.open(callback)

        return future
Esempio n. 25
0
    def subscribe(
            self,
            subscription,
            callback,
            flow_control=(),
            scheduler=None,
            use_legacy_flow_control=False,
    ):
        """Asynchronously start receiving messages on a given subscription.

        This method starts a background thread to begin pulling messages from
        a Pub/Sub subscription and scheduling them to be processed using the
        provided ``callback``.

        The ``callback`` will be called with an individual
        :class:`google.cloud.pubsub_v1.subscriber.message.Message`. It is the
        responsibility of the callback to either call ``ack()`` or ``nack()``
        on the message when it finished processing. If an exception occurs in
        the callback during processing, the exception is logged and the message
        is ``nack()`` ed.

        The ``flow_control`` argument can be used to control the rate of at
        which messages are pulled. The settings are relatively conservative by
        default to prevent "message hoarding" - a situation where the client
        pulls a large number of messages but can not process them fast enough
        leading it to "starve" other clients of messages. Increasing these
        settings may lead to faster throughput for messages that do not take
        a long time to process.

        The ``use_legacy_flow_control`` argument disables enforcing flow control
        settings at the Cloud PubSub server and uses the less accurate method of
        only enforcing flow control at the client side.

        This method starts the receiver in the background and returns a
        *Future* representing its execution. Waiting on the future (calling
        ``result()``) will block forever or until a non-recoverable error
        is encountered (such as loss of network connectivity). Cancelling the
        future will signal the process to shutdown gracefully and exit.

        .. note:: This uses Pub/Sub's *streaming pull* feature. This feature
            properties that may be surprising. Please take a look at
            https://cloud.google.com/pubsub/docs/pull#streamingpull for
            more details on how streaming pull behaves compared to the
            synchronous pull method.

        Example:

        .. code-block:: python

            from google.cloud import pubsub_v1

            subscriber_client = pubsub_v1.SubscriberClient()

            # existing subscription
            subscription = subscriber_client.subscription_path(
                'my-project-id', 'my-subscription')

            def callback(message):
                print(message)
                message.ack()

            future = subscriber_client.subscribe(
                subscription, callback)

            try:
                future.result()
            except KeyboardInterrupt:
                future.cancel()

        Args:
            subscription (str): The name of the subscription. The
                subscription should have already been created (for example,
                by using :meth:`create_subscription`).
            callback (Callable[~google.cloud.pubsub_v1.subscriber.message.Message]):
                The callback function. This function receives the message as
                its only argument and will be called from a different thread/
                process depending on the scheduling strategy.
            flow_control (~google.cloud.pubsub_v1.types.FlowControl): The flow control
                settings. Use this to prevent situations where you are
                inundated with too many messages at once.
            scheduler (~google.cloud.pubsub_v1.subscriber.scheduler.Scheduler): An optional
                *scheduler* to use when executing the callback. This controls
                how callbacks are executed concurrently. This object must not be shared
                across multiple SubscriberClients.

        Returns:
            A :class:`~google.cloud.pubsub_v1.subscriber.futures.StreamingPullFuture`
            instance that can be used to manage the background stream.
        """
        flow_control = types.FlowControl(*flow_control)

        manager = streaming_pull_manager.StreamingPullManager(
            self,
            subscription,
            flow_control=flow_control,
            scheduler=scheduler,
            use_legacy_flow_control=use_legacy_flow_control,
        )

        future = futures.StreamingPullFuture(manager)

        manager.open(callback=callback, on_callback_error=future.set_exception)

        return future
def test_maybe_resume_consumer_wo_consumer_set():
    manager = make_manager(
        flow_control=types.FlowControl(max_messages=10, max_bytes=1000))
    manager.maybe_resume_consumer()  # no raise
def create_policy(flow_control=types.FlowControl()):
    creds = mock.Mock(spec=credentials.Credentials)
    client = subscriber.Client(credentials=creds)
    return thread.Policy(client, 'sub_name_d', flow_control=flow_control)
Esempio n. 28
0
    def __init__(
        self,
        client,
        subscription,
        flow_control=types.FlowControl(),
        scheduler=None,
        use_legacy_flow_control=False,
        await_callbacks_on_shutdown=False,
    ):
        self._client = client
        self._subscription = subscription
        self._flow_control = flow_control
        self._use_legacy_flow_control = use_legacy_flow_control
        self._await_callbacks_on_shutdown = await_callbacks_on_shutdown
        self._ack_histogram = histogram.Histogram()
        self._last_histogram_size = 0
        self._ack_deadline = histogram.MIN_ACK_DEADLINE
        self._rpc = None
        self._callback = None
        self._closing = threading.Lock()
        self._closed = False
        self._close_callbacks = []
        self._regular_shutdown_thread = None  # Created on intentional shutdown.

        # Generate a random client id tied to this object. All streaming pull
        # connections (initial and re-connects) will then use the same client
        # id. Doing so lets the server establish affinity even across stream
        # disconncetions.
        self._client_id = str(uuid.uuid4())

        if scheduler is None:
            self._scheduler = (
                google.cloud.pubsub_v1.subscriber.scheduler.ThreadScheduler())
        else:
            self._scheduler = scheduler

        # A collection for the messages that have been received from the server,
        # but not yet sent to the user callback.
        self._messages_on_hold = messages_on_hold.MessagesOnHold()

        # The total number of bytes consumed by the messages currently on hold
        self._on_hold_bytes = 0

        # A lock ensuring that pausing / resuming the consumer are both atomic
        # operations that cannot be executed concurrently. Needed for properly
        # syncing these operations with the current leaser load. Additionally,
        # the lock is used to protect modifications of internal data that
        # affects the load computation, i.e. the count and size of the messages
        # currently on hold.
        self._pause_resume_lock = threading.Lock()

        # A lock protecting the current ACK deadline used in the lease management. This
        # value can be potentially updated both by the leaser thread and by the message
        # consumer thread when invoking the internal _on_response() callback.
        self._ack_deadline_lock = threading.Lock()

        # The threads created in ``.open()``.
        self._dispatcher = None
        self._leaser = None
        self._consumer = None
        self._heartbeater = None
Esempio n. 29
0
def create_subscriber(flow_control=types.FlowControl()):
    subscriber_ = mock.create_autospec(subscriber.Subscriber, instance=True)
    subscriber_.is_active = True
    subscriber_.flow_control = flow_control
    subscriber_.ack_histogram = _histogram.Histogram()
    return subscriber_
Esempio n. 30
0
def test_streaming_flow_control():
    manager = make_manager(
        flow_control=types.FlowControl(max_messages=10, max_bytes=1000))
    request = manager._get_initial_request(stream_ack_deadline_seconds=10)
    assert request.max_outstanding_messages == 10
    assert request.max_outstanding_bytes == 1000