async def register_task(self, utc_now: datetime.datetime, task: Task) -> None: assert task.parent_task is not None async with self._transaction() as cursor: execute_at = _to_timestamp(task.get_next_execution(utc_now, None)) task_spec = json.dumps(task.as_spec()) await cursor.execute( f""" INSERT INTO {self._table_name} (name, parent_name, task_spec, execute_after) VALUES (%s, %s, %s, %s) ON DUPLICATE KEY UPDATE task_spec = %s, execute_after = %s, locked_by = NULL, locked_until = NULL """, ( task.canonical_name, task.parent_task.canonical_name, task_spec, execute_at, task_spec, execute_at, ), )
async def register_task(self, utc_now: datetime.datetime, task: Task) -> None: execute_after = task.get_next_execution(utc_now, None) assert execute_after is not None await self._manage_record( task, "REGISTER", execute_after.isoformat(), json.dumps(task.as_spec()), )
async def commit_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None: assert isinstance(lease, _ManageScriptResponse) next_execution = task.get_next_execution(utc_now, lease.execute_after) response = await self._manage_record( task, "COMMIT", lease.version, lease.locked_by, next_execution.isoformat() if next_execution is not None else "", ) task._last_lease = response # type: ignore if response.result == ResultType.LEASE_MISMATCH: logger.info("Not commiting, as we have lost the lease")
async def commit_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None: async with self._transaction() as cursor: await cursor.execute( f"SELECT * FROM {self._table_name} WHERE name = %s FOR UPDATE", (task.canonical_name, ), ) record = await cursor.fetchone() logger.debug(f"commit_task returned {record}") if not record: logger.warning(f"Task {task} not found, skipping.") return if record["locked_by"] != lease: logger.warning(f"Lease lost on task {task}, skipping.") return await self._update_record( cursor, task, None, None, task.get_next_execution( utc_now, _from_timestamp(record["execute_after"])), )
async def poll_dynamic_task( self, utc_now: datetime.datetime, task: Task, continuation_token: Optional[ContinuationToken] = None, ) -> QueryResponse: response = await self._table.query( IndexName="ready_at", Select="ALL_ATTRIBUTES", Limit=self._batch_size, KeyConditionExpression=Key("partition_id").eq( self._get_partition_id(task)) & Key("ready_at").lt(f"{utc_now.isoformat()}`"), **({ "ExclusiveStartKey": continuation_token } if continuation_token is not None else {}), ) return QueryResponse( tasks=[( task.instantiate_from_spec(json.loads(record["task_spec"])), Lease(_TaskRecord.from_dynamo_item(record)), ) for record in response["Items"]], continuation_token=response.get("LastEvaluatedKey", None), )
async def commit_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None: async with self._transaction(explicit_begin=True): records = await self._connection.execute_fetchall( f"SELECT * FROM {self._table_name} WHERE name = $1", (task.canonical_name, ), ) if not records: logger.warning(f"Task {task} not found, skipping.") return record = next(iter(records)) if record["locked_by"] != str(lease): logger.warning(f"Lease lost on task {task}, skipping.") return execute_after = (datetime.datetime.fromtimestamp( record["execute_after"], dateutil.tz.UTC) if record["execute_after"] else None) await self._update_record( task, None, None, task.get_next_execution(utc_now, execute_after), )
async def commit_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None: assert isinstance(lease, _TaskRecord) record: Optional[_TaskRecord] = cast(_TaskRecord, copy.copy(lease)) for _ in range(MAX_OPTIMISTIC_RETRY_COUNT): if not record: logger.warning(f"Task {task} no longer exists, skipping.") return if record.locked_by != lease.locked_by: logger.warning(f"Lease lost on task {task}, skipping.") return record.execute_after = task.get_next_execution( utc_now, lease.execute_after) record.locked_by = None record.locked_until = None if await self._update_item(task, record): return # If the update fails due to version mismatch record = await self._retreive_item(task, consistent_read=True) raise PyncetteException( "Unable to acquire the lock on the task due to contention")
async def register_task(self, utc_now: datetime.datetime, task: Task) -> None: async with self._transaction(explicit_begin=True): assert task.parent_task is not None record = await self._connection.execute_fetchall( f"SELECT 1 FROM {self._table_name} WHERE name = ?", (task.canonical_name, ), ) if record: await self._connection.execute_fetchall( f""" UPDATE {self._table_name} SET task_spec = :task_spec, execute_after = :execute_after, locked_until = NULL, locked_by = NULL WHERE name = :name """, { "name": task.canonical_name, "task_spec": json.dumps(task.as_spec()), "execute_after": _to_timestamp(task.get_next_execution(utc_now, None)), }, ) else: await self._connection.execute_fetchall( f""" INSERT INTO {self._table_name} (name, parent_name, task_spec, execute_after) VALUES (:name, :parent_name, :task_spec, :execute_after) """, { "name": task.canonical_name, "parent_name": task.parent_task.canonical_name, "task_spec": json.dumps(task.as_spec()), "execute_after": _to_timestamp(task.get_next_execution(utc_now, None)), }, )
async def unlock_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None: assert isinstance(lease, _ManageScriptResponse) response = await self._manage_record(task, "UNLOCK", lease.version, lease.locked_by) task._last_lease = response # type: ignore if response.result == ResultType.LEASE_MISMATCH: logger.info("Not unlocking, as we have lost the lease")
async def poll_dynamic_task( self, utc_now: datetime.datetime, task: Task, continuation_token: Optional[ContinuationToken] = None, ) -> QueryResponse: async with self._transaction() as cursor: locked_by = str(uuid.uuid4()) locked_until = utc_now + task.lease_duration await cursor.execute( f""" SELECT name, task_spec FROM {self._table_name} WHERE parent_name = %s AND GREATEST(COALESCE(locked_until, 0), COALESCE(execute_after, 0)) <= %s ORDER BY GREATEST(COALESCE(locked_until, 0), COALESCE(execute_after, 0)) ASC LIMIT %s FOR UPDATE SKIP LOCKED """, ( task.canonical_name, _to_timestamp(utc_now), self._batch_size, ), ) ready_tasks = await cursor.fetchall() await cursor.executemany( f""" UPDATE {self._table_name} SET locked_until = %s, locked_by = %s WHERE name = %s """, [(_to_timestamp(locked_until), locked_by, record["name"]) for record in ready_tasks], ) logger.debug(f"poll_dynamic_task returned {ready_tasks}") return QueryResponse( tasks=[( task.instantiate_from_spec(json.loads( record["task_spec"])), Lease(locked_by), ) for record in ready_tasks], # May result in an extra round-trip if there were exactly # batch_size tasks available, but we deem this an acceptable # tradeoff. continuation_token=_CONTINUATION_TOKEN if len(ready_tasks) == self._batch_size else None, )
async def register_task(self, utc_now: datetime.datetime, task: Task) -> None: assert task.parent_task is not None async with self._transaction() as connection: result = await connection.execute( f""" INSERT INTO {self._table_name} (name, parent_name, task_spec, execute_after) VALUES ($1, $2, $3, $4) ON CONFLICT (name) DO UPDATE SET task_spec = $3, execute_after = $4, locked_by = NULL, locked_until = NULL """, task.canonical_name, task.parent_task.canonical_name, json.dumps(task.as_spec()), task.get_next_execution(utc_now, None), ) logger.debug(f"register_task returned {result}")
async def extend_lease(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> Optional[Lease]: assert isinstance(lease, _ManageScriptResponse) new_locked_until = utc_now + task.lease_duration response = await self._manage_record(task, "EXTEND", lease.version, lease.locked_by, new_locked_until.isoformat()) task._last_lease = response # type: ignore if response.result == ResultType.READY: return Lease(response) else: return None
async def register_task(self, utc_now: datetime.datetime, task: Task) -> None: execute_after = task.get_next_execution(utc_now, None) assert execute_after is not None await self._table.put_item(Item={ "partition_id": self._get_partition_id(task), "task_id": task.canonical_name, "task_spec": json.dumps(task.as_spec()), "version": 0, "locked_by": None, "locked_until": None, "execute_after": execute_after.isoformat(), "ready_at": f"{execute_after.isoformat()}_{task.canonical_name}", }, )
async def poll_dynamic_task( self, utc_now: datetime.datetime, task: Task, continuation_token: Optional[ContinuationToken] = None, ) -> QueryResponse: async with self._transaction() as connection: locked_by = uuid.uuid4() locked_until = utc_now + task.lease_duration ready_tasks = await connection.fetch( f""" UPDATE {self._table_name} a SET locked_until = $4, locked_by = $5 FROM ( SELECT name FROM {self._table_name} WHERE parent_name = $1 AND GREATEST(locked_until, execute_after) <= $2 ORDER BY GREATEST(locked_until, execute_after) ASC LIMIT $3 FOR UPDATE SKIP LOCKED ) b WHERE a.name = b.name RETURNING * """, task.canonical_name, utc_now, self._batch_size, locked_until, locked_by, ) logger.debug(f"poll_dynamic_task returned {ready_tasks}") return QueryResponse( tasks=[( task.instantiate_from_spec(json.loads( record["task_spec"])), Lease(locked_by), ) for record in ready_tasks], # May result in an extra round-trip if there were exactly # batch_size tasks available, but we deem this an acceptable # tradeoff. continuation_token=_CONTINUATION_TOKEN if len(ready_tasks) == self._batch_size else None, )
async def poll_dynamic_task( self, utc_now: datetime.datetime, task: Task, continuation_token: Optional[ContinuationToken] = None, ) -> QueryResponse: async with self._transaction(explicit_begin=True): locked_by = uuid.uuid4() locked_until = utc_now + task.lease_duration ready_tasks = await self._connection.execute_fetchall( f"""SELECT * FROM {self._table_name} WHERE parent_name = $1 AND MAX(COALESCE(locked_until, 0), COALESCE(execute_after, 0)) <= $2 ORDER BY MAX(COALESCE(locked_until, 0), COALESCE(execute_after, 0)) ASC LIMIT $3 """, (task.canonical_name, _to_timestamp(utc_now), self._batch_size), ) concrete_tasks = [ task.instantiate_from_spec(json.loads(record["task_spec"])) for record in ready_tasks ] await self._connection.executemany( f""" UPDATE {self._table_name} SET locked_until = $2, locked_by = $3 WHERE name = $1 """, [( concrete_task.canonical_name, _to_timestamp(locked_until), str(locked_by), ) for concrete_task in concrete_tasks], ) return QueryResponse( tasks=[(concrete_task, Lease(locked_by)) for concrete_task in concrete_tasks], continuation_token=_CONTINUATION_TOKEN if len(concrete_tasks) == self._batch_size else None, )
async def commit_task(self, utc_now: datetime.datetime, task: Task, lease: Lease) -> None: async with self._transaction() as connection: record = await connection.fetchrow( f"SELECT * FROM {self._table_name} WHERE name = $1 FOR UPDATE", task.canonical_name, ) logger.debug(f"commit_task returned {record}") if not record: logger.warning(f"Task {task} not found, skipping.") return if record["locked_by"] != lease: logger.warning(f"Lease lost on task {task}, skipping.") return await self._update_record( connection, task, None, None, task.get_next_execution(utc_now, record["execute_after"]), )
async def poll_dynamic_task( self, utc_now: datetime.datetime, task: Task, continuation_token: Optional[ContinuationToken] = None, ) -> QueryResponse: if isinstance(continuation_token, int): remaining = self._records_per_tick - continuation_token else: remaining = self._records_per_tick result_count = max(remaining, self._batch_size) remaining -= result_count return QueryResponse( tasks=[ ( task.instantiate_from_spec(_TASK_SPEC), _LEASE, ) for _ in range(result_count) ], continuation_token=remaining if remaining else None, )
async def poll_task(self, utc_now: datetime.datetime, task: Task, lease: Optional[Lease] = None) -> PollResponse: # Nominally, we need at least two round-trips to Redis since the next execute_after is calculated # in Python code due to extra flexibility. This is why we have optimistic locking below to ensure that # the next execution time was calculated using a correct base if another process modified it in between. # In most cases, however, we can assume that the base time has not changed since the last invocation, # so by caching it, we can poll a task using a single round-trip (if we are wrong, the loop below will still # ensure corretness as the version will not match). last_lease = getattr(task, "_last_lease", None) if isinstance(lease, _ManageScriptResponse): version, execute_after, locked_by = ( lease.version, lease.execute_after, lease.locked_by, ) elif last_lease is not None: logger.debug("Using cached values for execute_after") version, execute_after, locked_by = ( last_lease.version, last_lease.execute_after, str(uuid.uuid4()), ) else: # By default we assume that the task is brand new version, execute_after, locked_by = ( 0, None, str(uuid.uuid4()), ) new_locked_until = utc_now + task.lease_duration for _ in range(5): next_execution = task.get_next_execution(utc_now, execute_after) response = await self._manage_record( task, "POLL", task.execution_mode.name, "REGULAR" if task.parent_task is None else "DYNAMIC", utc_now.isoformat(), version, next_execution.isoformat() if next_execution is not None else "", new_locked_until.isoformat(), locked_by, ) task._last_lease = response # type: ignore if response.result == ResultType.LEASE_MISMATCH: logger.debug("Lease mismatch, retrying.") execute_after = response.execute_after version = response.version elif response.result == ResultType.MISSING: raise PyncetteException("Task not found") else: return PollResponse( result=response.result, scheduled_at=execute_after, lease=Lease(response), ) raise PyncetteException( "Unable to acquire the lock on the task due to contention")
async def poll_task(self, utc_now: datetime.datetime, task: Task, lease: Optional[Lease] = None) -> PollResponse: async with self._transaction() as cursor: await cursor.execute( f"SELECT * FROM {self._table_name} WHERE name = %s FOR UPDATE", (task.canonical_name, ), ) record = await cursor.fetchone() logger.debug(f"poll_task returned {record}") update = False if record is None: # Regular (non-dynamic) tasks will be implicitly created on first poll, # but dynamic task instances must be explicitely created to prevent spurious # poll from re-creating them after being deleted. if task.parent_task is not None: raise PyncetteException("Task not found") execute_after = task.get_next_execution(utc_now, None) locked_until = None locked_by = None update = True else: execute_after = _from_timestamp(record["execute_after"]) locked_until = _from_timestamp(record["locked_until"]) locked_by = record["locked_by"] assert execute_after is not None scheduled_at = execute_after if (locked_until is not None and locked_until > utc_now and (lease != locked_by)): result = ResultType.LOCKED elif (execute_after <= utc_now and task.execution_mode == ExecutionMode.AT_MOST_ONCE): execute_after = task.get_next_execution(utc_now, execute_after) result = ResultType.READY locked_until = None locked_by = None update = True elif (execute_after <= utc_now and task.execution_mode == ExecutionMode.AT_LEAST_ONCE): locked_until = utc_now + task.lease_duration locked_by = str(uuid.uuid4()) result = ResultType.READY update = True else: result = ResultType.PENDING if update: await self._update_record( cursor, task, locked_until, locked_by, execute_after, ) return PollResponse(result=result, scheduled_at=scheduled_at, lease=locked_by)
def _create_dynamic_task(task: Task, response_data: List[bytes]) -> Tuple[Task, Lease]: task_data = _ManageScriptResponse.from_response(response_data) assert task_data.task_spec is not None return (task.instantiate_from_spec(task_data.task_spec), Lease(task_data))
async def poll_task(self, utc_now: datetime.datetime, task: Task, lease: Optional[Lease] = None) -> PollResponse: last_lease = getattr(task, "_last_lease", None) # Similar logic as in Redis repository. If we have previously processed this # task in any manner, we try to reuse the latest state of the task we have at hand # from cache (or lease) to avoid two roundtrips to DynamoDB in the optimistic case. # If we are wrong, we will get a version mismatch, whereby we will load the current # state from DB. # However, in case the task would be PENDING or LOCKED, this will result in no requests # being made to the DB at all. For pending this is OK, but for locked, we want to revalidate # in order to be able to execute the task as soon as it is unlocked. record: Optional[_TaskRecord] potentially_stale = False if lease is not None: assert isinstance(lease, _TaskRecord) record = cast(_TaskRecord, copy.copy(lease)) elif last_lease is not None: logger.debug("Using cached values for last lease") assert isinstance(last_lease, _TaskRecord) record = cast(_TaskRecord, copy.copy(last_lease)) potentially_stale = True else: record = await self._retreive_item(task) for _ in range(MAX_OPTIMISTIC_RETRY_COUNT): must_revalidate = False update = False if record is None: if task.parent_task is not None: raise PyncetteException("Task not found") record = _TaskRecord( execute_after=task.get_next_execution(utc_now, None), locked_until=None, locked_by=None, version=0, ) update = True assert record.execute_after is not None scheduled_at = record.execute_after if (record.locked_until is not None and record.locked_until > utc_now and (lease is None or lease.locked_by != record.locked_by)): result = ResultType.LOCKED if potentially_stale: must_revalidate = True elif (record.execute_after <= utc_now and task.execution_mode == ExecutionMode.AT_MOST_ONCE): result = ResultType.READY record.execute_after = task.get_next_execution( utc_now, record.execute_after) record.locked_until = None record.locked_by = None update = True elif (record.execute_after <= utc_now and task.execution_mode == ExecutionMode.AT_LEAST_ONCE): result = ResultType.READY record.locked_until = utc_now + task.lease_duration record.locked_by = str(uuid.uuid4()) update = True else: result = ResultType.PENDING if must_revalidate or (update and not await self._update_item(task, record)): logger.debug("Using cached values for last lease") record = await self._retreive_item(task, consistent_read=True) potentially_stale = False continue return PollResponse(result=result, scheduled_at=scheduled_at, lease=Lease(record)) raise PyncetteException( "Unable to acquire the lock on the task due to contention")
async def poll_task(self, utc_now: datetime.datetime, task: Task, lease: Optional[Lease] = None) -> PollResponse: async with self._transaction(explicit_begin=True): records = await self._connection.execute_fetchall( f"SELECT * FROM {self._table_name} WHERE name = ?", (task.canonical_name, ), ) if not records: # Regular (non-dynamic) tasks will be implicitly created on first poll, # but dynamic task instances must be explicitely created to prevent spurious # poll from re-creating them after being deleted. if task.parent_task is not None: raise PyncetteException("Task not found") locked_until = None locked_by = None execute_after = task.get_next_execution(utc_now, None) await self._connection.execute_fetchall( f""" INSERT INTO {self._table_name} (name, execute_after) VALUES (:name, :locked_until) """, (task.canonical_name, _to_timestamp(execute_after)), ) else: record = next(iter(records)) execute_after = cast(datetime.datetime, _from_timestamp(record["execute_after"])) locked_until = _from_timestamp(record["locked_until"]) locked_by = record["locked_by"] assert execute_after is not None scheduled_at = execute_after if (locked_until is not None and locked_until > utc_now and (lease != locked_by)): result = ResultType.LOCKED elif (execute_after <= utc_now and task.execution_mode == ExecutionMode.AT_MOST_ONCE): execute_after = task.get_next_execution(utc_now, execute_after) result = ResultType.READY locked_until = None locked_by = None await self._update_record( task, locked_until, locked_by, execute_after, ) elif (execute_after <= utc_now and task.execution_mode == ExecutionMode.AT_LEAST_ONCE): locked_until = utc_now + task.lease_duration locked_by = uuid.uuid4() result = ResultType.READY await self._update_record( task, locked_until, locked_by, execute_after, ) else: result = ResultType.PENDING return PollResponse(result=result, scheduled_at=scheduled_at, lease=locked_by)
import datetime import os import pymysql import pytest from conftest import random_table_name from pyncette import mysql from pyncette.task import Task DUMMY_TASK = Task(name="foo", func=object(), schedule="* * * * *") @pytest.mark.asyncio @pytest.mark.integration async def test_invalid_table_name(): with pytest.raises(ValueError): await mysql.mysql_repository( mysql_host=os.environ.get("MYSQL_HOST", "localhost"), mysql_database=os.environ.get("MYSQL_DATABASE", "pyncette"), mysql_user=os.environ.get("MYSQL_USER", "pyncette"), mysql_password=os.environ.get("MYSQL_PASSWORD", "password"), mysql_table_name="spaces in table name", ).__aenter__() @pytest.mark.asyncio @pytest.mark.integration async def test_skip_table_create(): with pytest.raises(pymysql.err.ProgrammingError): async with mysql.mysql_repository(
async def _update_item(self, task: Task, record: _TaskRecord) -> bool: current_version = record.version try: if record.execute_after is None: await self._table.delete_item( Key={ "partition_id": self._get_partition_id(task), "task_id": task.canonical_name, }, ConditionExpression=(Attr("version").not_exists() | Attr("version").eq(0)) if current_version == 0 else Attr("version").eq(current_version), ) task._last_lease = None # type: ignore else: ready_at = (max(record.execute_after, record.locked_until) if record.locked_until is not None else record.execute_after) await self._table.update_item( Key={ "partition_id": self._get_partition_id(task), "task_id": task.canonical_name, }, UpdateExpression=""" set execute_after=:execute_after, locked_until=:locked_until, locked_by=:locked_by, ready_at=:ready_at, version=:version """, ExpressionAttributeValues={ ":execute_after": record.execute_after.isoformat() if record.execute_after is not None else "", ":locked_until": record.locked_until.isoformat() if record.locked_until is not None else "", ":locked_by": record.locked_by, # Add a suffix to ready_at to guarantee uniqueness of the secondary index ":ready_at": f"{ready_at.isoformat()}_{task.canonical_name}", ":version": current_version + 1, }, ConditionExpression=(Attr("version").not_exists() | Attr("version").eq(0)) if current_version == 0 else Attr("version").eq(current_version), ) record.version = current_version + 1 task._last_lease = record # type: ignore except ClientError as e: if e.response["Error"][ "Code"] == "ConditionalCheckFailedException": return False else: raise else: return True