Ejemplo n.º 1
0
    def manage_lambdas(self, queues: Mapping[str, Queue], enable: bool):
        """
        Enable or disable the readers and writers of the given queues
        """
        indexer = load_app_module('indexer', unit_test=True)
        functions_by_queue = {
            handler.queue: config.indexer_function_name(handler.name)
            for handler in indexer.app.handler_map.values()
            if hasattr(handler, 'queue')
        }

        with ThreadPoolExecutor(max_workers=len(queues)) as tpe:
            futures = []

            def submit(f, *args, **kwargs):
                futures.append(tpe.submit(f, *args, **kwargs))

            for queue_name, queue in queues.items():
                try:
                    function = functions_by_queue[queue_name]
                except KeyError:
                    assert queue_name in config.fail_queue_names
                else:
                    if queue_name == config.notifications_queue_name():
                        # Prevent new notifications from being added
                        submit(self._manage_lambda, config.indexer_name, enable)
                    submit(self._manage_sqs_push, function, queue, enable)
            self._handle_futures(futures)
            futures = [tpe.submit(self._wait_for_queue_idle, queue) for queue in queues.values()]
            self._handle_futures(futures)
Ejemplo n.º 2
0
    def manage_lambdas(self, queues: Mapping[str, Queue], enable: bool):
        """
        Enable or disable the readers and writers of the given queues
        """
        with ThreadPoolExecutor(max_workers=len(queues)) as tpe:
            futures = []

            def submit(f, *args, **kwargs):
                futures.append(tpe.submit(f, *args, **kwargs))

            for queue_name, queue in queues.items():
                if queue_name == config.notifications_queue_name():
                    submit(self._manage_lambda, config.indexer_name, enable)
                    submit(self._manage_sqs_push,
                           config.indexer_name + '-contribute', queue, enable)
                elif queue_name == config.tallies_queue_name():
                    submit(self._manage_sqs_push,
                           config.indexer_name + '-aggregate', queue, enable)
                elif queue_name == config.tallies_queue_name(retry=True):
                    # FIXME: Brittle coupling between the string literal below and
                    #        the handler function name in app.py
                    #        https://github.com/DataBiosphere/azul/issues/1848
                    submit(self._manage_sqs_push,
                           config.indexer_name + '-aggregate_retry', queue,
                           enable)
            self._handle_futures(futures)
            futures = [
                tpe.submit(self._wait_for_queue_idle, queue)
                for queue in queues.values()
            ]
            self._handle_futures(futures)
Ejemplo n.º 3
0
 def progress(self) -> JSON:
     """
     The number of Data Store bundles pending to be indexed and the number
     of index documents in need of updating.
     """
     return {
         'up': True,
         'unindexed_bundles': sum(self.queues[config.notifications_queue_name()].get('messages', {}).values()),
         'unindexed_documents': sum(chain.from_iterable(
             self.queues[config.tallies_queue_name(retry=retry)].get('messages', {}).values()
             for retry in (False, True)
         ))
     }
Ejemplo n.º 4
0
 def notifications_queue(self):
     return self.sqs.get_queue_by_name(
         QueueName=config.notifications_queue_name())
Ejemplo n.º 5
0
    def wait_for_queue_level(self,
                             empty: bool = True,
                             num_expected_bundles: Optional[int] = None,
                             min_timeout: Optional[int] = None,
                             max_timeout: Optional[int] = None):
        """
        Wait for the work queues to reach a desired fill level.

        :param empty: If True, wait for the queues to drain (be empty).
                      If False, wait until they are not emtpy.

        :param num_expected_bundles: Number of bundles being indexed. If None,
                                     the number of bundles will be approximated
                                     dynamically based on the number of message
                                     in the notifications queue.

        :param min_timeout: Minimum timeout in seconds. If specified, wait at
                            least this long for queues to reach the desired
                            level.

        :param max_timeout: Maximum timeout in seconds. If specified, wait at
                            most this long for queues to reach the desired
                            level.
        """
        require(
            min_timeout is None or max_timeout is None
            or min_timeout <= max_timeout,
            'min_timeout must be less than or equal to max_timeout',
            min_timeout, max_timeout)

        def limit_timeout(timeout):
            if max_timeout is not None and max_timeout < timeout:
                timeout = max_timeout
            if min_timeout is not None and timeout < min_timeout:
                timeout = min_timeout
            return timeout

        timeout = limit_timeout(2 * 60)
        sleep_time = 5
        queues = self.get_queues(config.work_queue_names)
        total_lengths = deque(maxlen=10 if empty else 1)
        start_time = time.time()
        num_bundles = 0

        logger.info(
            'Waiting for %s queues to %s notifications about %s bundles ...',
            len(queues), 'be drained of' if empty else 'fill with',
            'an unknown number of'
            if num_expected_bundles is None else num_expected_bundles)

        while True:
            # Determine queue lengths
            total_length, queue_lengths = self._get_queue_lengths(queues)
            total_lengths.append(total_length)
            logger.info('Counting %i messages in %i queues.', total_length,
                        len(queue_lengths))
            logger.info('Message count history (most recent first) is %r.',
                        list(reversed(total_lengths)))

            if len(total_lengths) == total_lengths.maxlen and all(
                    n == 0 for n in total_lengths) == empty:
                logger.info('The queues are at the desired level.')
                break

            # When draining, determine timeout dynamically
            if empty:
                # Estimate number of bundles first, if necessary
                if num_expected_bundles is None:
                    num_notifications = queue_lengths[
                        config.notifications_queue_name()]
                    if num_bundles < num_notifications:
                        num_bundles = num_notifications
                        start_time = time.time()  # restart the timer
                else:
                    num_bundles = num_expected_bundles
                # It takes approx. 6 seconds per worker to process a bundle
                # FIXME: Temporarily doubling the time, but needs fine-tuning
                #        https://github.com/DataBiosphere/azul/issues/2147
                #        https://github.com/DataBiosphere/azul/issues/2189
                timeout = limit_timeout(12 * num_bundles /
                                        config.indexer_concurrency)

            # Do we have time left?
            remaining_time = start_time + timeout - time.time()
            if remaining_time <= 0:
                raise Exception(
                    'Timeout. The queues are NOT at the desired level.')
            else:
                logger.info(
                    'Waiting for up to %.2f seconds for %s queues to %s ...',
                    remaining_time, len(queues), 'drain' if empty else 'fill')
                time.sleep(sleep_time)
Ejemplo n.º 6
0
 def _create_mock_notifications_queue():
     sqs = aws.resource('sqs')
     sqs.create_queue(QueueName=config.notifications_queue_name())
Ejemplo n.º 7
0
import json

from azul import (
    config, )
from azul.deployment import (
    emit_tf, )

emit_tf({
    "resource": [{
        "aws_sqs_queue": {
            config.unqual_notifications_queue_name(): {
                "name":
                config.notifications_queue_name(),
                "visibility_timeout_seconds":
                config.contribution_lambda_timeout + 10,
                "message_retention_seconds":
                24 * 60 * 60,
                "redrive_policy":
                json.dumps({
                    "maxReceiveCount":
                    10,
                    "deadLetterTargetArn":
                    "${aws_sqs_queue.%s.arn}" %
                    config.unqual_notifications_queue_name(fail=True)
                })
            },
            **{
                config.unqual_tallies_queue_name(retry=retry): {
                    "name":
                    config.tallies_queue_name(retry=retry),
                    "fifo_queue":
Ejemplo n.º 8
0
                                                    app.current_request)


# Work around https://github.com/aws/chalice/issues/856


def new_handler(self, event, context):
    app.lambda_context = context
    return old_handler(self, event, context)


old_handler = chalice.app.EventSourceHandler.__call__
chalice.app.EventSourceHandler.__call__ = new_handler


@app.on_sqs_message(queue=config.notifications_queue_name(), batch_size=1)
def contribute(event: chalice.app.SQSEvent):
    app.index_controller.contribute(event)


@app.on_sqs_message(queue=config.tallies_queue_name(),
                    batch_size=IndexController.document_batch_size)
def aggregate(event: chalice.app.SQSEvent):
    app.index_controller.aggregate(event)


# Any messages in the tallies queue that fail being processed will be retried
# with more RAM in the tallies_retry queue.


@app.on_sqs_message(queue=config.tallies_queue_name(retry=True),
Ejemplo n.º 9
0
 def _notifications_queue(self):
     return self._queue(config.notifications_queue_name())
Ejemplo n.º 10
0
 def _notifications_queue(self, retry=False):
     return self._queue(config.notifications_queue_name(retry=retry))