def find_repository_with_garbage(limit_to_gc_policy_s): expiration_timestamp = get_epoch_timestamp() - limit_to_gc_policy_s try: candidates = (RepositoryTag.select( RepositoryTag.repository).join(Repository).join( Namespace, on=(Repository.namespace_user == Namespace.id)).where( ~(RepositoryTag.lifetime_end_ts >> None), (RepositoryTag.lifetime_end_ts <= expiration_timestamp), (Namespace.removed_tag_expiration_s == limit_to_gc_policy_s), ).limit(500).distinct().alias("candidates")) found = (RepositoryTag.select( candidates.c.repository_id).from_(candidates).order_by( db_random_func()).get()) if found is None: return return Repository.get(Repository.id == found.repository_id) except RepositoryTag.DoesNotExist: return None except Repository.DoesNotExist: return None
def _select_available_item(self, ordering_required, now): """ Selects an available queue item from the queue table and returns it, if any. If none, return None. """ name_match_query = self._name_match_query() try: if ordering_required: # The previous solution to this used a select for update in a # transaction to prevent multiple instances from processing the # same queue item. This suffered performance problems. This solution # instead has instances attempt to update the potential queue item to be # unavailable. However, since their update clause is restricted to items # that are available=False, only one instance's update will succeed, and # it will have a changed row count of 1. Instances that have 0 changed # rows know that another instance is already handling that item. running = self._running_jobs(now, name_match_query) avail = self._available_jobs_not_running( now, name_match_query, running) return avail.order_by(QueueItem.id).get() else: # If we don't require ordering, we grab a random item from any of the first 50 available. subquery = self._available_jobs( now, name_match_query).limit(50).alias("j1") return (QueueItem.select().join( subquery, on=QueueItem.id == subquery.c.id).order_by( db_random_func()).get()) except QueueItem.DoesNotExist: # No available queue item was found. return None
def find_repository_with_garbage(limit_to_gc_policy_s): """ Returns a repository that has garbage (defined as an expired Tag that is past the repo's namespace's expiration window) or None if none. """ expiration_timestamp = get_epoch_timestamp_ms() - (limit_to_gc_policy_s * 1000) try: candidates = (Tag.select(Tag.repository).join(Repository).join( Namespace, on=(Repository.namespace_user == Namespace.id)).where( ~(Tag.lifetime_end_ms >> None), (Tag.lifetime_end_ms <= expiration_timestamp), (Namespace.removed_tag_expiration_s == limit_to_gc_policy_s), (Namespace.enabled == True), (Repository.state != RepositoryState.MARKED_FOR_DELETION), ).limit(GC_CANDIDATE_COUNT).distinct().alias("candidates")) found = (Tag.select( candidates.c.repository_id).from_(candidates).order_by( db_random_func()).get()) if found is None: return return Repository.get(Repository.id == found.repository_id) except Tag.DoesNotExist: return None except Repository.DoesNotExist: return None
def get_stale_blob_upload(stale_timespan): """ Returns a random blob upload which was created before the stale timespan. """ stale_threshold = datetime.now() - stale_timespan try: candidates = ( BlobUpload.select() .where(BlobUpload.created <= stale_threshold) .limit(500) .distinct() .alias("candidates") ) found = ( BlobUpload.select(candidates.c.id).from_(candidates).order_by(db_random_func()).get() ) if not found: return None return ( BlobUpload.select(BlobUpload, ImageStorageLocation) .join(ImageStorageLocation) .where(BlobUpload.id == found.id) .get() ) except BlobUpload.DoesNotExist: return None
def get_archivable_build(): presumed_dead_date = datetime.utcnow() - PRESUMED_DEAD_BUILD_AGE candidates = (RepositoryBuild.select(RepositoryBuild.id).where( (RepositoryBuild.phase << ARCHIVABLE_BUILD_PHASES) | (RepositoryBuild.started < presumed_dead_date), RepositoryBuild.logs_archived == False).limit(50).alias('candidates')) try: found_id = (RepositoryBuild.select( candidates.c.id).from_(candidates).order_by( db_random_func()).get()) return RepositoryBuild.get(id=found_id) except RepositoryBuild.DoesNotExist: return None
def find_uncounted_repository(): """ Returns a repository that has not yet had an entry added into the RepositoryActionCount table for yesterday. """ try: # Get a random repository to count. today = date.today() yesterday = today - timedelta(days=1) has_yesterday_actions = RepositoryActionCount.select( RepositoryActionCount.repository).where( RepositoryActionCount.date == yesterday) to_count = (Repository.select().where( ~(Repository.id << (has_yesterday_actions))).order_by( db_random_func()).get()) return to_count except Repository.DoesNotExist: return None
def get_stale_team(stale_timespan): """ Returns a team that is setup to sync to an external group, and who has not been synced in now - stale_timespan. Returns None if none found. """ stale_at = datetime.now() - stale_timespan try: candidates = (TeamSync.select( TeamSync.id).where((TeamSync.last_updated <= stale_at) | ( TeamSync.last_updated >> None)).limit(500).alias("candidates")) found = TeamSync.select(candidates.c.id).from_(candidates).order_by( db_random_func()).get() if found is None: return return TeamSync.select( TeamSync, Team).join(Team).where(TeamSync.id == found.id).get() except TeamSync.DoesNotExist: return None