Ejemplo n.º 1
0
    async def poll_dynamic_task(
        self,
        utc_now: datetime.datetime,
        task: Task,
        continuation_token: Optional[ContinuationToken] = None,
    ) -> QueryResponse:

        response = await self._table.query(
            IndexName="ready_at",
            Select="ALL_ATTRIBUTES",
            Limit=self._batch_size,
            KeyConditionExpression=Key("partition_id").eq(
                self._get_partition_id(task))
            & Key("ready_at").lt(f"{utc_now.isoformat()}`"),
            **({
                "ExclusiveStartKey": continuation_token
            } if continuation_token is not None else {}),
        )

        return QueryResponse(
            tasks=[(
                task.instantiate_from_spec(json.loads(record["task_spec"])),
                Lease(_TaskRecord.from_dynamo_item(record)),
            ) for record in response["Items"]],
            continuation_token=response.get("LastEvaluatedKey", None),
        )
Ejemplo n.º 2
0
    async def poll_dynamic_task(
        self,
        utc_now: datetime.datetime,
        task: Task,
        continuation_token: Optional[ContinuationToken] = None,
    ) -> QueryResponse:
        async with self._transaction() as cursor:
            locked_by = str(uuid.uuid4())
            locked_until = utc_now + task.lease_duration

            await cursor.execute(
                f"""
                SELECT name, task_spec FROM {self._table_name}
                WHERE parent_name = %s AND GREATEST(COALESCE(locked_until, 0), COALESCE(execute_after, 0)) <= %s
                ORDER BY GREATEST(COALESCE(locked_until, 0), COALESCE(execute_after, 0)) ASC
                LIMIT %s
                FOR UPDATE SKIP LOCKED
                """,
                (
                    task.canonical_name,
                    _to_timestamp(utc_now),
                    self._batch_size,
                ),
            )
            ready_tasks = await cursor.fetchall()

            await cursor.executemany(
                f"""
                UPDATE {self._table_name}
                SET
                    locked_until = %s,
                    locked_by = %s
                WHERE name = %s
                """,
                [(_to_timestamp(locked_until), locked_by, record["name"])
                 for record in ready_tasks],
            )

            logger.debug(f"poll_dynamic_task returned {ready_tasks}")

            return QueryResponse(
                tasks=[(
                    task.instantiate_from_spec(json.loads(
                        record["task_spec"])),
                    Lease(locked_by),
                ) for record in ready_tasks],
                # May result in an extra round-trip if there were exactly
                # batch_size tasks available, but we deem this an acceptable
                # tradeoff.
                continuation_token=_CONTINUATION_TOKEN
                if len(ready_tasks) == self._batch_size else None,
            )
Ejemplo n.º 3
0
    async def extend_lease(self, utc_now: datetime.datetime, task: Task,
                           lease: Lease) -> Optional[Lease]:
        assert isinstance(lease, _ManageScriptResponse)
        new_locked_until = utc_now + task.lease_duration
        response = await self._manage_record(task, "EXTEND", lease.version,
                                             lease.locked_by,
                                             new_locked_until.isoformat())
        task._last_lease = response  # type: ignore

        if response.result == ResultType.READY:
            return Lease(response)
        else:
            return None
Ejemplo n.º 4
0
    async def poll_dynamic_task(
        self,
        utc_now: datetime.datetime,
        task: Task,
        continuation_token: Optional[ContinuationToken] = None,
    ) -> QueryResponse:
        async with self._transaction() as connection:
            locked_by = uuid.uuid4()
            locked_until = utc_now + task.lease_duration

            ready_tasks = await connection.fetch(
                f"""
                UPDATE {self._table_name} a
                SET
                    locked_until = $4,
                    locked_by = $5
                FROM (
                    SELECT name FROM {self._table_name}
                    WHERE parent_name = $1 AND GREATEST(locked_until, execute_after) <= $2
                    ORDER BY GREATEST(locked_until, execute_after) ASC
                    LIMIT $3
                    FOR UPDATE SKIP LOCKED
                ) b
                WHERE a.name = b.name
                RETURNING *
                """,
                task.canonical_name,
                utc_now,
                self._batch_size,
                locked_until,
                locked_by,
            )
            logger.debug(f"poll_dynamic_task returned {ready_tasks}")

            return QueryResponse(
                tasks=[(
                    task.instantiate_from_spec(json.loads(
                        record["task_spec"])),
                    Lease(locked_by),
                ) for record in ready_tasks],
                # May result in an extra round-trip if there were exactly
                # batch_size tasks available, but we deem this an acceptable
                # tradeoff.
                continuation_token=_CONTINUATION_TOKEN
                if len(ready_tasks) == self._batch_size else None,
            )
Ejemplo n.º 5
0
    async def poll_dynamic_task(
        self,
        utc_now: datetime.datetime,
        task: Task,
        continuation_token: Optional[ContinuationToken] = None,
    ) -> QueryResponse:
        async with self._transaction(explicit_begin=True):
            locked_by = uuid.uuid4()
            locked_until = utc_now + task.lease_duration

            ready_tasks = await self._connection.execute_fetchall(
                f"""SELECT * FROM {self._table_name}
                WHERE parent_name = $1 AND MAX(COALESCE(locked_until, 0), COALESCE(execute_after, 0)) <= $2
                ORDER BY MAX(COALESCE(locked_until, 0), COALESCE(execute_after, 0)) ASC
                LIMIT $3
                """,
                (task.canonical_name, _to_timestamp(utc_now),
                 self._batch_size),
            )

            concrete_tasks = [
                task.instantiate_from_spec(json.loads(record["task_spec"]))
                for record in ready_tasks
            ]

            await self._connection.executemany(
                f"""
                UPDATE {self._table_name}
                SET
                    locked_until = $2,
                    locked_by = $3
                WHERE name = $1
                """,
                [(
                    concrete_task.canonical_name,
                    _to_timestamp(locked_until),
                    str(locked_by),
                ) for concrete_task in concrete_tasks],
            )
            return QueryResponse(
                tasks=[(concrete_task, Lease(locked_by))
                       for concrete_task in concrete_tasks],
                continuation_token=_CONTINUATION_TOKEN
                if len(concrete_tasks) == self._batch_size else None,
            )
Ejemplo n.º 6
0
    async def extend_lease(self, utc_now: datetime.datetime, task: Task,
                           lease: Lease) -> Optional[Lease]:
        assert isinstance(lease, _TaskRecord)
        record: Optional[_TaskRecord] = cast(_TaskRecord, copy.copy(lease))

        for _ in range(MAX_OPTIMISTIC_RETRY_COUNT):
            if not record:
                logger.warning(f"Task {task} no longer exists, skipping.")
                return None

            if record.locked_by != lease.locked_by:
                logger.warning(f"Lease lost on task {task}, skipping.")
                return None

            record.locked_until = utc_now + task.lease_duration

            if await self._update_item(task, record):
                return Lease(record)

            # If the update fails due to version mismatch
            record = await self._retreive_item(task, consistent_read=True)

        raise PyncetteException(
            "Unable to acquire the lock on the task due to contention")
Ejemplo n.º 7
0
def _create_dynamic_task(task: Task,
                         response_data: List[bytes]) -> Tuple[Task, Lease]:
    task_data = _ManageScriptResponse.from_response(response_data)
    assert task_data.task_spec is not None

    return (task.instantiate_from_spec(task_data.task_spec), Lease(task_data))
Ejemplo n.º 8
0
    async def poll_task(self,
                        utc_now: datetime.datetime,
                        task: Task,
                        lease: Optional[Lease] = None) -> PollResponse:
        # Nominally, we need at least two round-trips to Redis since the next execute_after is calculated
        # in Python code due to extra flexibility. This is why we have optimistic locking below to ensure that
        # the next execution time was calculated using a correct base if another process modified it in between.
        # In most cases, however, we can assume that the base time has not changed since the last invocation,
        # so by caching it, we can poll a task using a single round-trip (if we are wrong, the loop below will still
        # ensure corretness as the version will not match).
        last_lease = getattr(task, "_last_lease", None)
        if isinstance(lease, _ManageScriptResponse):
            version, execute_after, locked_by = (
                lease.version,
                lease.execute_after,
                lease.locked_by,
            )
        elif last_lease is not None:
            logger.debug("Using cached values for execute_after")
            version, execute_after, locked_by = (
                last_lease.version,
                last_lease.execute_after,
                str(uuid.uuid4()),
            )
        else:
            # By default we assume that the task is brand new
            version, execute_after, locked_by = (
                0,
                None,
                str(uuid.uuid4()),
            )

        new_locked_until = utc_now + task.lease_duration
        for _ in range(5):
            next_execution = task.get_next_execution(utc_now, execute_after)
            response = await self._manage_record(
                task,
                "POLL",
                task.execution_mode.name,
                "REGULAR" if task.parent_task is None else "DYNAMIC",
                utc_now.isoformat(),
                version,
                next_execution.isoformat()
                if next_execution is not None else "",
                new_locked_until.isoformat(),
                locked_by,
            )
            task._last_lease = response  # type: ignore

            if response.result == ResultType.LEASE_MISMATCH:
                logger.debug("Lease mismatch, retrying.")
                execute_after = response.execute_after
                version = response.version
            elif response.result == ResultType.MISSING:
                raise PyncetteException("Task not found")
            else:
                return PollResponse(
                    result=response.result,
                    scheduled_at=execute_after,
                    lease=Lease(response),
                )

        raise PyncetteException(
            "Unable to acquire the lock on the task due to contention")
Ejemplo n.º 9
0
from typing import Any
from typing import AsyncIterator
from typing import Optional

from pyncette.model import ContinuationToken
from pyncette.model import Lease
from pyncette.model import PollResponse
from pyncette.model import QueryResponse
from pyncette.model import ResultType
from pyncette.repository import Repository
from pyncette.task import Task

logger = logging.getLogger(__name__)


_LEASE = Lease(object())
_TASK_SPEC = {
    "name": "fake",
    "interval": None,
    "timezone": None,
    "execute_at": None,
    "extra_args": {},
    "schedule": "* * * * * *",
}


class FakeRepository(Repository):
    """Redis-backed store for Pyncete task execution data"""

    _batch_size: int
    _records_per_tick: int
Ejemplo n.º 10
0
    async def poll_task(self,
                        utc_now: datetime.datetime,
                        task: Task,
                        lease: Optional[Lease] = None) -> PollResponse:
        last_lease = getattr(task, "_last_lease", None)

        # Similar logic as in Redis repository. If we have previously processed this
        # task in any manner, we try to reuse the latest state of the task we have at hand
        # from cache (or lease) to avoid two roundtrips to DynamoDB in the optimistic case.
        # If we are wrong, we will get a version mismatch, whereby we will load the current
        # state from DB.
        # However, in case the task would be PENDING or LOCKED, this will result in no requests
        # being made to the DB at all. For pending this is OK, but for locked, we want to revalidate
        # in order to be able to execute the task as soon as it is unlocked.
        record: Optional[_TaskRecord]
        potentially_stale = False
        if lease is not None:
            assert isinstance(lease, _TaskRecord)
            record = cast(_TaskRecord, copy.copy(lease))
        elif last_lease is not None:
            logger.debug("Using cached values for last lease")
            assert isinstance(last_lease, _TaskRecord)
            record = cast(_TaskRecord, copy.copy(last_lease))
            potentially_stale = True
        else:
            record = await self._retreive_item(task)

        for _ in range(MAX_OPTIMISTIC_RETRY_COUNT):
            must_revalidate = False
            update = False
            if record is None:
                if task.parent_task is not None:
                    raise PyncetteException("Task not found")

                record = _TaskRecord(
                    execute_after=task.get_next_execution(utc_now, None),
                    locked_until=None,
                    locked_by=None,
                    version=0,
                )
                update = True

            assert record.execute_after is not None
            scheduled_at = record.execute_after

            if (record.locked_until is not None
                    and record.locked_until > utc_now and
                (lease is None or lease.locked_by != record.locked_by)):
                result = ResultType.LOCKED
                if potentially_stale:
                    must_revalidate = True
            elif (record.execute_after <= utc_now
                  and task.execution_mode == ExecutionMode.AT_MOST_ONCE):
                result = ResultType.READY
                record.execute_after = task.get_next_execution(
                    utc_now, record.execute_after)
                record.locked_until = None
                record.locked_by = None
                update = True
            elif (record.execute_after <= utc_now
                  and task.execution_mode == ExecutionMode.AT_LEAST_ONCE):
                result = ResultType.READY
                record.locked_until = utc_now + task.lease_duration
                record.locked_by = str(uuid.uuid4())
                update = True
            else:
                result = ResultType.PENDING

            if must_revalidate or (update and
                                   not await self._update_item(task, record)):
                logger.debug("Using cached values for last lease")
                record = await self._retreive_item(task, consistent_read=True)
                potentially_stale = False
                continue

            return PollResponse(result=result,
                                scheduled_at=scheduled_at,
                                lease=Lease(record))

        raise PyncetteException(
            "Unable to acquire the lock on the task due to contention")