def test_filter_repositories(username, include_public, filter_to_namespace, repo_kind, initialized_db): namespace = username if filter_to_namespace else None if '+' in username and filter_to_namespace: namespace, _ = parse_robot_username(username) user = get_namespace_user(username) query = (Repository.select().distinct().join( Namespace, on=(Repository.namespace_user == Namespace.id )).switch(Repository).join(RepositoryPermission, JOIN.LEFT_OUTER)) # Prime the cache. Repository.kind.get_id('image') with assert_query_count(1): found = list( filter_to_repos_for_user(query, user.id, namespace=namespace, include_public=include_public, repo_kind=repo_kind)) expected = list( _get_visible_repositories_for_user(user, repo_kind=repo_kind, namespace=namespace, include_public=include_public)) assert len(found) == len(expected) assert {r.id for r in found} == {r.id for r in expected}
def find_create_or_link_image( docker_image_id, repo_obj, username, translations, preferred_location ): # First check for the image existing in the repository. If found, we simply return it. repo_image = get_repo_image(repo_obj.namespace_user.username, repo_obj.name, docker_image_id) if repo_image: return repo_image # We next check to see if there is an existing storage the new image can link to. existing_image_query = ( Image.select(Image, ImageStorage) .distinct() .join(ImageStorage) .switch(Image) .join(Repository) .join(RepositoryPermission, JOIN.LEFT_OUTER) .switch(Repository) .join(Namespace, on=(Repository.namespace_user == Namespace.id)) .where(Image.docker_image_id == docker_image_id) ) existing_image_query = _basequery.filter_to_repos_for_user( existing_image_query, _namespace_id_for_username(username) ) # If there is an existing image, we try to translate its ancestry and copy its storage. new_image = None try: logger.debug("Looking up existing image for ID: %s", docker_image_id) existing_image = existing_image_query.get() logger.debug("Existing image %s found for ID: %s", existing_image.id, docker_image_id) new_image = _find_or_link_image( existing_image, repo_obj, username, translations, preferred_location ) if new_image: return new_image except Image.DoesNotExist: logger.debug("No existing image found for ID: %s", docker_image_id) # Otherwise, create a new storage directly. with db_transaction(): # Final check for an existing image, under the transaction. repo_image = get_repo_image( repo_obj.namespace_user.username, repo_obj.name, docker_image_id ) if repo_image: return repo_image logger.debug("Creating new storage for docker id: %s", docker_image_id) new_storage = storage.create_v1_storage(preferred_location) return Image.create( docker_image_id=docker_image_id, repository=repo_obj, storage=new_storage, ancestors="/" )
def get_matching_user_namespaces(namespace_prefix, username, limit=10): namespace_user = get_namespace_user(username) namespace_user_id = namespace_user.id if namespace_user is not None else None namespace_search = prefix_search(Namespace.username, namespace_prefix) base_query = (Namespace.select().distinct().join( Repository, on=(Repository.namespace_user == Namespace.id)).join( RepositoryPermission, JOIN.LEFT_OUTER).where(namespace_search)) return _basequery.filter_to_repos_for_user(base_query, namespace_user_id).limit(limit)
def get_visible_repositories(username, namespace=None, kind_filter="image", include_public=False, start_id=None, limit=None): """ Returns the repositories visible to the given user (if any). """ if not include_public and not username: # Short circuit by returning a query that will find no repositories. We need to return a query # here, as it will be modified by other queries later on. return Repository.select( Repository.id.alias("rid")).where(Repository.id == -1) query = (Repository.select( Repository.name, Repository.id.alias("rid"), Repository.description, Namespace.username, Repository.visibility, Repository.kind, Repository.state, ).switch(Repository).join( Namespace, on=(Repository.namespace_user == Namespace.id)).where( Repository.state != RepositoryState.MARKED_FOR_DELETION)) user_id = None if username: # Note: We only need the permissions table if we will filter based on a user's permissions. query = query.switch(Repository).distinct().join( RepositoryPermission, JOIN.LEFT_OUTER) found_namespace = _get_namespace_user(username) if not found_namespace: return Repository.select( Repository.id.alias("rid")).where(Repository.id == -1) user_id = found_namespace.id query = _basequery.filter_to_repos_for_user(query, user_id, namespace, kind_filter, include_public, start_id=start_id) if limit is not None: query = query.limit(limit).order_by(SQL("rid")) return query
def _filter_repositories_visible_to_user(unfiltered_query, filter_user_id, limit, repo_kind): encountered = set() chunk_count = limit * 2 unfiltered_page = 0 iteration_count = 0 while iteration_count < 10: # Just to be safe # Find the next chunk's worth of repository IDs, paginated by the chunk size. unfiltered_page = unfiltered_page + 1 found_ids = [ r.id for r in unfiltered_query.paginate(unfiltered_page, chunk_count) ] # Make sure we haven't encountered these results before. This code is used to handle # the case where we've previously seen a result, as pagination is not necessary # stable in SQL databases. unfiltered_repository_ids = set(found_ids) new_unfiltered_ids = unfiltered_repository_ids - encountered if not new_unfiltered_ids: break encountered.update(new_unfiltered_ids) # Filter the repositories found to only those visible to the current user. query = (Repository.select(Repository, Namespace).distinct().join( Namespace, on=(Namespace.id == Repository.namespace_user )).switch(Repository).join(RepositoryPermission).where( Repository.id << list(new_unfiltered_ids))) filtered = _basequery.filter_to_repos_for_user(query, filter_user_id, repo_kind=repo_kind) # Sort the filtered repositories by their initial order. all_filtered_repos = list(filtered) all_filtered_repos.sort(key=lambda repo: found_ids.index(repo.id)) # Yield the repositories in sorted order. for filtered_repo in all_filtered_repos: yield filtered_repo # If the number of found IDs is less than the chunk count, then we're done. if len(found_ids) < chunk_count: break iteration_count = iteration_count + 1