Example #1
0
            def mark_shard_complete():
                try:
                    marker.refresh_from_db()
                except DeferIterationMarker.DoesNotExist:
                    logger.warning(
                        "TaskMarker with ID: %s has vanished, cancelling task",
                        marker_id
                    )
                    return

                marker.shards_complete += 1
                marker.save()

                if marker.shards_complete == marker.shard_count:
                    # Delete the marker if we were asked to
                    if marker.delete_on_completion:
                        marker.delete()

                    defer(
                        finalize,
                        *args,
                        _transactional=True,
                        _queue=task_queue_name(),
                        **kwargs
                    )
Example #2
0
        def make_shard():
            marker.refresh_from_db()
            marker.shard_count += 1
            if is_last:
                marker.is_ready = True
            marker.save()

            defer(
                _process_shard,
                marker.pk,
                qs.model, qs.query, callback, finalize,
                args=args,
                kwargs=kwargs,
                buffer_time=buffer_time,
                _queue=task_queue_name(),
                _transactional=True
            )
Example #3
0
    def test_task_queue_name(self):
        # when not in task
        self.assertIsNone(task_queue_name())
        os.environ["HTTP_X_APPENGINE_QUEUENAME"] = "demo123"
        self.assertIsNone(task_queue_name())
        del os.environ["HTTP_X_APPENGINE_QUEUENAME"]
        self.assertIsNone(task_queue_name())

        # when in task, w/o queue set
        with sleuth.switch('djangae.environment.is_in_task', lambda: True):
            self.assertEqual(task_queue_name(), "default")

        # when in task, with queue set
        with sleuth.switch('djangae.environment.is_in_task', lambda: True):
            os.environ["HTTP_X_APPENGINE_QUEUENAME"] = "demo123"
            self.assertEqual(task_queue_name(), "demo123")
            del os.environ["HTTP_X_APPENGINE_QUEUENAME"]
            self.assertEqual(task_queue_name(), "default")
Example #4
0
def _process_shard(marker_id, model, query, callback, finalize, buffer_time, args, kwargs):
    args = args or tuple()

    start_time = timezone.now()

    try:
        marker = DeferIterationMarker.objects.get(pk=marker_id)
    except DeferIterationMarker.DoesNotExist:
        logger.warning("DeferIterationMarker with ID: %s has vanished, cancelling task", marker_id)
        return

    # Redefer if the task isn't ready to begin
    if not marker.is_ready:
        defer(
            _process_shard, marker_id, model, query, callback, finalize,
            buffer_time=buffer_time,
            args=args,
            kwargs=kwargs,
            _queue=task_queue_name(),
            _countdown=1
        )
        return

    try:
        qs = model.objects.all()
        qs.query = query
        qs.order_by("pk")

        last_pk = None
        for instance in qs.all():
            last_pk = instance.pk

            if (timezone.now() - start_time).total_seconds() > _TASK_TIME_LIMIT - buffer_time:
                raise TimeoutException()

            callback(instance, *args, **kwargs)
        else:
            @transaction.atomic(xg=True)
            def mark_shard_complete():
                try:
                    marker.refresh_from_db()
                except DeferIterationMarker.DoesNotExist:
                    logger.warning("TaskMarker with ID: %s has vanished, cancelling task", marker_id)
                    return

                marker.shards_complete += 1
                marker.save()

                if marker.shards_complete == marker.shard_count:
                    # Delete the marker if we were asked to
                    if marker.delete_on_completion:
                        marker.delete()

                    defer(
                        finalize,
                        *args,
                        _transactional=True,
                        _queue=task_queue_name(),
                        **kwargs
                    )

            retry(mark_shard_complete, _attempts=6)

    except (Exception, TimeoutException) as e:
        # We intentionally don't catch DeadlineExceededError here. There's not enough time to redefer a task
        # and so the only option is to retry the current shard. It shouldn't happen though, 15 seconds should be
        # ample time... DeadlineExceededError doesn't subclass Exception, it subclasses BaseException so it'll
        # never enter here (if it does occur, somehow)

        if isinstance(e, TimeoutException):
            logger.debug("Ran out of time processing shard. Deferring new shard to continue from: %s", last_pk)
        else:
            logger.exception("Error processing shard. Retrying.")

        if last_pk:
            qs = qs.filter(pk__gte=last_pk)

        defer(
            _process_shard, marker_id, qs.model, qs.query, callback, finalize,
            buffer_time=buffer_time,
            args=args,
            kwargs=kwargs,
            _queue=task_queue_name(),
            _countdown=1
        )
Example #5
0
def _process_shard(marker_id, shard_number, model, query, callback, finalize,
                   buffer_time, args, kwargs):
    args = args or tuple()

    # Set an index of the shard in the environment, which is useful for callbacks
    # to have access too so they can identify a task
    os.environ[DEFERRED_ITERATION_SHARD_INDEX_KEY] = str(shard_number)

    start_time = time.time()

    try:
        marker = DeferIterationMarker.objects.get(pk=marker_id)
    except DeferIterationMarker.DoesNotExist:
        logger.warning(
            "DeferIterationMarker with ID: %s has vanished, cancelling task",
            marker_id)
        return

    # Redefer if the task isn't ready to begin
    if not marker.is_ready:
        defer(_process_shard,
              marker_id,
              shard_number,
              model,
              query,
              callback,
              finalize,
              buffer_time=buffer_time,
              args=args,
              kwargs=kwargs,
              _queue=task_queue_name().rsplit("/", 1)[-1],
              _countdown=1)
        return

    try:
        qs = model.objects.all()
        qs.query = query
        qs.order_by("pk")

        calculate_buffer_time = buffer_time is None
        longest_iteration = 0
        longest_iteration_multiplier = 1.1

        last_pk = None
        for instance in qs.all():
            last_pk = instance.pk

            buffer_time_to_apply = (longest_iteration *
                                    longest_iteration_multiplier
                                    if calculate_buffer_time else buffer_time)

            # The first iteration, buffer_time_to_apply will be zero if buffer_time was None
            # that's not a problem.
            shard_time = (time.time() - start_time)
            if shard_time > _TASK_TIME_LIMIT - buffer_time_to_apply:
                raise TimeoutException()

            iteration_start = time.time()

            callback(instance, *args, **kwargs)

            iteration_end = time.time()
            iteration_time = iteration_end - iteration_start

            # Store the iteration time if it's the longest
            longest_iteration = max(longest_iteration, iteration_time)
        else:

            @transaction.atomic(xg=True)
            def mark_shard_complete():
                try:
                    marker.refresh_from_db()
                except DeferIterationMarker.DoesNotExist:
                    logger.warning(
                        "TaskMarker with ID: %s has vanished, cancelling task",
                        marker_id)
                    return

                marker.shards_complete += 1
                marker.save()

                if marker.shards_complete == marker.shard_count:
                    # Delete the marker if we were asked to
                    if marker.delete_on_completion:
                        marker.delete()

                    defer(finalize,
                          *args,
                          _transactional=True,
                          _queue=task_queue_name().rsplit("/", 1)[-1],
                          **kwargs)

            retry(mark_shard_complete, _attempts=6)

    except (Exception, TimeoutException) as e:
        # We intentionally don't catch DeadlineExceededError here. There's not enough time to redefer a task
        # and so the only option is to retry the current shard. It shouldn't happen though, 15 seconds should be
        # ample time... DeadlineExceededError doesn't subclass Exception, it subclasses BaseException so it'll
        # never enter here (if it does occur, somehow)

        if isinstance(e, TimeoutException):
            logger.debug(
                "Ran out of time processing shard. Deferring new shard to continue from: %s",
                last_pk)
        else:
            logger.exception("Error processing shard. Retrying.")

        if last_pk:
            qs = qs.filter(pk__gte=last_pk)

        defer(_process_shard,
              marker_id,
              shard_number,
              qs.model,
              qs.query,
              callback,
              finalize,
              buffer_time=buffer_time,
              args=args,
              kwargs=kwargs,
              _queue=task_queue_name().rsplit("/", 1)[-1],
              _countdown=1)
Example #6
0
def _generate_shards(
    model, query, callback, finalize, args, kwargs, shards, delete_marker, buffer_time
):

    queryset = model.objects.all()
    queryset.query = query

    key_ranges = find_key_ranges_for_queryset(queryset, shards)

    marker = DeferIterationMarker.objects.create(
        delete_on_completion=delete_marker,
        callback_name=callback.__name__,
        finalize_name=finalize.__name__
    )

    queue = task_queue_name()
    if queue:
        queue = queue.rsplit("/", 1)[-1]

    for i, (start, end) in enumerate(key_ranges):
        is_last = i == (len(key_ranges) - 1)
        shard_number = i

        qs = model.objects.all()
        qs.query = query

        filter_kwargs = {}
        if start:
            filter_kwargs["pk__gte"] = start

        if end:
            filter_kwargs["pk__lt"] = end

        # calling order_by with no args to clear any pre-existing ordering (e.g. from Meta.ordering)
        qs = qs.filter(**filter_kwargs).order_by()

        @transaction.atomic(xg=True)
        def make_shard():
            marker.refresh_from_db()
            marker.shard_count += 1
            if is_last:
                marker.is_ready = True
            marker.save()

            defer(
                _process_shard,
                marker.pk,
                shard_number,
                qs.model, qs.query, callback, finalize,
                args=args,
                kwargs=kwargs,
                buffer_time=buffer_time,
                _queue=queue,
                _transactional=True
            )

        try:
            retry(make_shard, _attempts=5)
        except:  # noqa
            marker.delete()  # This will cause outstanding tasks to abort
            raise