def publish_artifacts(self, content, prefix=""): """ Publish artifacts. Args: content (pulpcore.plugin.models.Content): content set. prefix (str): a relative path prefix for the published artifact """ published_artifacts = [] # Special case for Packages contentartifact_qs = ContentArtifact.objects.filter( content__in=content).filter( content__pulp_type=Package.get_pulp_type()) for content_artifact in contentartifact_qs.values( "pk", "relative_path").iterator(): relative_path = content_artifact["relative_path"] relative_path = os.path.join(prefix, PACKAGES_DIRECTORY, relative_path.lower()[0], relative_path) published_artifacts.append( PublishedArtifact( relative_path=relative_path, publication=self.publication, content_artifact_id=content_artifact["pk"], )) # Handle everything else is_treeinfo = Q(relative_path__in=["treeinfo", ".treeinfo"]) unpublishable_types = Q(content__pulp_type__in=[ RepoMetadataFile.get_pulp_type(), Modulemd.get_pulp_type(), ModulemdDefaults.get_pulp_type(), # already dealt with Package.get_pulp_type(), ]) contentartifact_qs = (ContentArtifact.objects.filter( content__in=content).exclude(unpublishable_types).exclude( is_treeinfo)) for content_artifact in contentartifact_qs.values( "pk", "relative_path").iterator(): published_artifacts.append( PublishedArtifact( relative_path=content_artifact["relative_path"], publication=self.publication, content_artifact_id=content_artifact["pk"], )) PublishedArtifact.objects.bulk_create(published_artifacts, batch_size=2000)
def _apply_retention_policy(self, new_version): """Apply the repository's "retain_package_versions" settings to the new version. Remove all non-modular packages that are older than the retention policy. A value of 0 for the package retention policy represents disabled. A value of 3 would mean that the 3 most recent versions of each package would be kept while older versions are discarded. Args: new_version (models.RepositoryVersion): Repository version to filter """ assert not new_version.complete, \ "Cannot apply retention policy to completed repository versions" if self.retain_package_versions > 0: # It would be more ideal if, instead of annotating with an age and filtering manually, # we could use Django to filter the particular Package content we want to delete. # Something like ".filter(F('age') > self.retain_package_versions)" would be better # however this is not currently possible with Django. It would be possible with raw # SQL but the repository version content membership subquery is currently # django-managed and would be difficult to share. # # Instead we have to do the filtering manually. nonmodular_packages = Package.objects.with_age().filter( pk__in=new_version.content.filter(pulp_type=Package.get_pulp_type()), is_modular=False, # don't want to filter out modular RPMs ).only('pk') old_packages = [] for package in nonmodular_packages: if package.age > self.retain_package_versions: old_packages.append(package.pk) new_version.remove_content(Content.objects.filter(pk__in=old_packages))
def publish_artifacts(self, content): """ Publish artifacts. Args: content (pulpcore.plugin.models.Content): content set. """ published_artifacts = [] for content_artifact in ContentArtifact.objects.filter( content__in=content.exclude(pulp_type__in=[ RepoMetadataFile.get_pulp_type(), Modulemd.get_pulp_type(), ModulemdDefaults.get_pulp_type() ]).distinct()).iterator(): relative_path = content_artifact.relative_path if content_artifact.content.pulp_type == Package.get_pulp_type(): relative_path = os.path.join(PACKAGES_DIRECTORY, relative_path.lower()[0], content_artifact.relative_path) published_artifacts.append( PublishedArtifact(relative_path=relative_path, publication=self.publication, content_artifact=content_artifact)) PublishedArtifact.objects.bulk_create(published_artifacts, batch_size=2000)
def find_children_of_content(content, src_repo_version): """Finds the content referenced directly by other content and returns it all together. Finds RPMs referenced by Advisory/Errata content. Args: content (iterable): Content for which to resolve children src_repo_version (pulpcore.models.RepositoryVersion): Source repo version Returns: Queryset of Content objects that are children of the intial set of content """ # Content that were selected to be copied advisory_ids = content.filter( pulp_type=UpdateRecord.get_pulp_type()).only('pk') packagecategory_ids = content.filter( pulp_type=PackageCategory.get_pulp_type()).only('pk') packageenvironment_ids = content.filter( pulp_type=PackageEnvironment.get_pulp_type()).only('pk') packagegroup_ids = content.filter( pulp_type=PackageGroup.get_pulp_type()).only('pk') # Content in the source repository version package_ids = src_repo_version.content.filter( pulp_type=Package.get_pulp_type()).only('pk') module_ids = src_repo_version.content.filter( pulp_type=Modulemd.get_pulp_type()).only('pk') advisories = UpdateRecord.objects.filter(pk__in=advisory_ids) packages = Package.objects.filter(pk__in=package_ids) packagecategories = PackageCategory.objects.filter( pk__in=packagecategory_ids) packageenvironments = PackageEnvironment.objects.filter( pk__in=packageenvironment_ids) packagegroups = PackageGroup.objects.filter(pk__in=packagegroup_ids) modules = Modulemd.objects.filter(pk__in=module_ids) children = set() for advisory in advisories: # Find rpms referenced by Advisories/Errata package_nevras = advisory.get_pkglist() for nevra in package_nevras: (name, epoch, version, release, arch) = nevra try: package = packages.get(name=name, epoch=epoch, version=version, release=release, arch=arch) children.add(package.pk) except Package.DoesNotExist: raise except MultipleObjectsReturned: raise module_nsvcas = advisory.get_module_list() for nsvca in module_nsvcas: (name, stream, version, context, arch) = nsvca try: module = modules.get(name=name, stream=stream, version=version, context=context, arch=arch) children.add(module.pk) except Modulemd.DoesNotExist: raise except MultipleObjectsReturned: raise # PackageCategories & PackageEnvironments resolution must go before PackageGroups # TODO: refactor to be more effecient (lower number of queries) for packagecategory in packagecategories.iterator(): for category_package_group in packagecategory.group_ids: category_package_groups = PackageGroup.objects.filter( name=category_package_group['name'], pk__in=src_repo_version.content) children.update( [pkggroup.pk for pkggroup in category_package_groups]) packagegroups = packagegroups.union(category_package_groups) for packageenvironment in packageenvironments.iterator(): for env_package_group in packageenvironment.group_ids: env_package_groups = PackageGroup.objects.filter( name=env_package_group['name'], pk__in=src_repo_version.content) children.update([envgroup.pk for envgroup in env_package_groups]) packagegroups = packagegroups.union(env_package_groups) for optional_env_package_group in packageenvironment.option_ids: opt_env_package_groups = PackageGroup.objects.filter( name=optional_env_package_group['name'], pk__in=src_repo_version.content) children.update( [optpkggroup.pk for optpkggroup in opt_env_package_groups]) packagegroups = packagegroups.union(opt_env_package_groups) # Find rpms referenced by PackageGroups for packagegroup in packagegroups.iterator(): group_package_names = [pkg['name'] for pkg in packagegroup.packages] for pkg in group_package_names: packages_by_name = [ pkg for pkg in Package.objects.with_age().filter( name=pkg, pk__in=src_repo_version.content) if pkg.age == 1 ] for pkg in packages_by_name: children.add(pkg.pk) return Content.objects.filter(pk__in=children)
def publish( repository_version_pk, gpgcheck_options=None, metadata_signing_service=None, checksum_types=None, sqlite_metadata=False, ): """ Create a Publication based on a RepositoryVersion. Args: repository_version_pk (str): Create a publication from this repository version. gpgcheck_options (dict): GPG signature check options. metadata_signing_service (pulpcore.app.models.AsciiArmoredDetachedSigningService): A reference to an associated signing service. checksum_types (dict): Checksum types for metadata and packages. sqlite_metadata (bool): Whether to generate metadata files in sqlite format. """ repository_version = RepositoryVersion.objects.get(pk=repository_version_pk) repository = repository_version.repository.cast() checksum_types = checksum_types or {} if metadata_signing_service: metadata_signing_service = AsciiArmoredDetachedSigningService.objects.get( pk=metadata_signing_service ) checksum_types["original"] = repository.original_checksum_types log.info( _("Publishing: repository={repo}, version={version}").format( repo=repository.name, version=repository_version.number, ) ) with tempfile.TemporaryDirectory("."): with RpmPublication.create(repository_version) as publication: kwargs = {} first_package = repository_version.content.filter( pulp_type=Package.get_pulp_type() ).first() if first_package: kwargs["default"] = first_package.cast().checksum_type publication.metadata_checksum_type = get_checksum_type( "primary", checksum_types, **kwargs ) publication.package_checksum_type = ( checksum_types.get("package") or publication.metadata_checksum_type ) if gpgcheck_options is not None: publication.gpgcheck = gpgcheck_options.get("gpgcheck") publication.repo_gpgcheck = gpgcheck_options.get("repo_gpgcheck") if sqlite_metadata: publication.sqlite_metadata = True publication_data = PublicationData(publication) publication_data.populate() total_repos = 1 + len(publication_data.sub_repos) pb_data = dict( message="Generating repository metadata", code="publish.generating_metadata", total=total_repos, ) with ProgressReport(**pb_data) as publish_pb: content = publication.repository_version.content # Main repo generate_repo_metadata( content, publication, checksum_types, publication_data.repomdrecords, metadata_signing_service=metadata_signing_service, ) publish_pb.increment() for sub_repo in publication_data.sub_repos: name = sub_repo[0] checksum_types["original"] = getattr(publication_data, f"{name}_checksums") content = getattr(publication_data, f"{name}_content") extra_repomdrecords = getattr(publication_data, f"{name}_repomdrecords") generate_repo_metadata( content, publication, checksum_types, extra_repomdrecords, name, metadata_signing_service=metadata_signing_service, ) publish_pb.increment() log.info(_("Publication: {publication} created").format(publication=publication.pk)) return publication
def publish_artifacts(self, content, prefix=""): """ Publish artifacts. Args: content (pulpcore.plugin.models.Content): content set. prefix (str): a relative path prefix for the published artifact """ published_artifacts = [] # Special case for Packages contentartifact_qs = ContentArtifact.objects.filter(content__in=content).filter( content__pulp_type=Package.get_pulp_type() ) paths = set() duplicated_paths = [] for content_artifact in contentartifact_qs.values("pk", "relative_path").iterator(): relative_path = content_artifact["relative_path"] relative_path = os.path.join( prefix, PACKAGES_DIRECTORY, relative_path.lower()[0], relative_path ) # # Some Suboptimal Repos have the 'same' artifact living in multiple places. # Specifically, the same NEVRA, in more than once place, **with different checksums** # (since if all that was different was location_href there would be only one # ContentArtifact in the first place). # # pulp_rpm wants to publish a 'canonical' repository-layout, under which an RPM # "name-version-release-arch" appears at "Packages/n/name-version-release-arch.rpm". # Because the assumption is that Packages don't "own" their path, only the filename # is kept as relative_path. # # In this case, we have to pick one - which is essentially what the rest of the RPM # Ecosystem does when faced with the impossible. This code takes the first-found. We # could implement something more complicated, if there are better options # (choose by last-created maybe?) # # Note that this only impacts user-created publications, which produce the "standard" # RPM layout of repo/Packages/f/foo.rpm. A publication created by mirror-sync retains # whatever layout their "upstream" repo-metadata dictates. # if relative_path in paths: duplicated_paths.append(f'{relative_path}:{content_artifact["pk"]}') continue else: paths.add(relative_path) published_artifacts.append( PublishedArtifact( relative_path=relative_path, publication=self.publication, content_artifact_id=content_artifact["pk"], ) ) if duplicated_paths: log.warning( _("Duplicate paths found at publish : {problems} ").format( problems="; ".join(duplicated_paths) ) ) # Handle everything else is_treeinfo = Q(relative_path__in=["treeinfo", ".treeinfo"]) unpublishable_types = Q( content__pulp_type__in=[ RepoMetadataFile.get_pulp_type(), Modulemd.get_pulp_type(), ModulemdDefaults.get_pulp_type(), # already dealt with Package.get_pulp_type(), ] ) contentartifact_qs = ( ContentArtifact.objects.filter(content__in=content) .exclude(unpublishable_types) .exclude(is_treeinfo) ) for content_artifact in contentartifact_qs.values("pk", "relative_path").iterator(): published_artifacts.append( PublishedArtifact( relative_path=content_artifact["relative_path"], publication=self.publication, content_artifact_id=content_artifact["pk"], ) ) PublishedArtifact.objects.bulk_create(published_artifacts, batch_size=2000)
def find_children_of_content(content, src_repo_version): """Finds the content referenced directly by other content and returns it all together. Finds RPMs referenced by Advisory/Errata content. Args: content (Queryset): Content for which to resolve children src_repo_version (pulpcore.models.RepositoryVersion): Source repo version Returns: Queryset of Content objects that are children of the intial set of content """ # Content that were selected to be copied advisory_ids = content.filter( pulp_type=UpdateRecord.get_pulp_type()).only("pk") packagecategory_ids = content.filter( pulp_type=PackageCategory.get_pulp_type()).only("pk") packageenvironment_ids = content.filter( pulp_type=PackageEnvironment.get_pulp_type()).only("pk") packagegroup_ids = content.filter( pulp_type=PackageGroup.get_pulp_type()).only("pk") # Content in the source repository version package_ids = src_repo_version.content.filter( pulp_type=Package.get_pulp_type()).only("pk") module_ids = src_repo_version.content.filter( pulp_type=Modulemd.get_pulp_type()).only("pk") advisories = UpdateRecord.objects.filter(pk__in=advisory_ids) packages = Package.objects.filter(pk__in=package_ids) packagecategories = PackageCategory.objects.filter( pk__in=packagecategory_ids) packageenvironments = PackageEnvironment.objects.filter( pk__in=packageenvironment_ids) packagegroups = PackageGroup.objects.filter(pk__in=packagegroup_ids) modules = Modulemd.objects.filter(pk__in=module_ids) children = set() for advisory in advisories.iterator(): # Find rpms referenced by Advisories/Errata package_nevras = advisory.get_pkglist() advisory_package_q = Q(pk__in=[]) for nevra in package_nevras: (name, epoch, version, release, arch) = nevra advisory_package_q |= Q(name=name, epoch=epoch, version=version, release=release, arch=arch) children.update( packages.filter(advisory_package_q).values_list("pk", flat=True)) module_nsvcas = advisory.get_module_list() advisory_module_q = Q(pk__in=[]) for nsvca in module_nsvcas: (name, stream, version, context, arch) = nsvca advisory_module_q |= Q(name=name, stream=stream, version=version, context=context, arch=arch) children.update( modules.filter(advisory_module_q).values_list("pk", flat=True)) # PackageCategories & PackageEnvironments resolution must go before PackageGroups packagegroup_names = set() for packagecategory in packagecategories.iterator(): for group_id in packagecategory.group_ids: packagegroup_names.add(group_id["name"]) for packageenvironment in packageenvironments.iterator(): for group_id in packageenvironment.group_ids: packagegroup_names.add(group_id["name"]) for group_id in packageenvironment.option_ids: packagegroup_names.add(group_id["name"]) child_package_groups = PackageGroup.objects.filter( name__in=packagegroup_names, pk__in=src_repo_version.content) children.update([pkggroup.pk for pkggroup in child_package_groups]) packagegroups = packagegroups.union(child_package_groups) # Find rpms referenced by PackageGroups packagegroup_package_names = set() for packagegroup in packagegroups.iterator(): packagegroup_package_names |= set(pkg["name"] for pkg in packagegroup.packages) # TODO: do modular/nonmodular need to be taken into account? existing_package_names = (Package.objects.filter( name__in=packagegroup_package_names, pk__in=content, ).values_list("name", flat=True).distinct()) missing_package_names = packagegroup_package_names - set( existing_package_names) needed_packages = Package.objects.with_age().filter( name__in=missing_package_names, pk__in=src_repo_version.content) # Pick the latest version of each package available which isn't already present # in the content set. for pkg in needed_packages.iterator(): if pkg.age == 1: children.add(pkg.pk) return Content.objects.filter(pk__in=children)
def publish_artifacts(self, content, prefix=""): """ Publish artifacts. Args: content (pulpcore.plugin.models.Content): content set. prefix (str): a relative path prefix for the published artifact """ published_artifacts = [] # Special case for Packages contentartifact_qs = (ContentArtifact.objects.filter( content__in=content).filter( content__pulp_type=Package.get_pulp_type()).select_related( "content__rpm_package__time_build")) rel_path_mapping = defaultdict(list) # Some Suboptimal Repos have the 'same' artifact living in multiple places. # Specifically, the same NEVRA, in more than once place, **with different checksums** # (since if all that was different was location_href there would be only one # ContentArtifact in the first place). # # pulp_rpm wants to publish a 'canonical' repository-layout, under which an RPM # "name-version-release-arch" appears at "Packages/n/name-version-release-arch.rpm". # Because the assumption is that Packages don't "own" their path, only the filename # is kept as relative_path. # # In this case, we have to pick one - which is essentially what the rest of the RPM # Ecosystem does when faced with the impossible. This code takes the one with the # most recent build time which is the same heuristic used by Yum/DNF/Zypper. # # Note that this only impacts user-created publications, which produce the "standard" # RPM layout of repo/Packages/f/foo.rpm. A publication created by mirror-sync retains # whatever layout their "upstream" repo-metadata dictates. fields = ["pk", "relative_path", "content__rpm_package__time_build"] for content_artifact in contentartifact_qs.values(*fields).iterator(): relative_path = content_artifact["relative_path"] time_build = content_artifact["content__rpm_package__time_build"] relative_path = os.path.join(prefix, PACKAGES_DIRECTORY, relative_path.lower()[0], relative_path) rel_path_mapping[relative_path].append( (content_artifact["pk"], time_build)) for rel_path, content_artifacts in rel_path_mapping.items(): # sort the content artifacts by when the package was built if len(content_artifacts) > 1: content_artifacts.sort(key=lambda p: p[1], reverse=True) log.warning( "Duplicate packages found competing for {path}, selected the one with " "the most recent build time, excluding {others} others.". format(path=rel_path, others=len(content_artifacts[1:]))) # Only add the first one (the one with the highest build time) published_artifacts.append( PublishedArtifact( relative_path=rel_path, publication=self.publication, content_artifact_id=content_artifacts[0][0], )) # Handle everything else is_treeinfo = Q(relative_path__in=["treeinfo", ".treeinfo"]) unpublishable_types = Q(content__pulp_type__in=[ RepoMetadataFile.get_pulp_type(), Modulemd.get_pulp_type(), ModulemdDefaults.get_pulp_type(), # already dealt with Package.get_pulp_type(), ]) contentartifact_qs = (ContentArtifact.objects.filter( content__in=content).exclude(unpublishable_types).exclude( is_treeinfo)) for content_artifact in contentartifact_qs.values( "pk", "relative_path").iterator(): published_artifacts.append( PublishedArtifact( relative_path=content_artifact["relative_path"], publication=self.publication, content_artifact_id=content_artifact["pk"], )) PublishedArtifact.objects.bulk_create(published_artifacts, batch_size=2000)
def find_children_of_content(content, repository_version): """Finds the content referenced directly by other content and returns it all together. Finds RPMs referenced by Advisory/Errata content. Args: content (iterable): Content for which to resolve children repository_version (pulpcore.models.RepositoryVersion): Source repo version Returns: Queryset of Content objects that are children of the intial set of content """ # Advisories that were selected to be copied advisory_ids = content.filter( pulp_type=UpdateRecord.get_pulp_type()).only('pk') # All packages in the source repository version package_ids = repository_version.content.filter( pulp_type=Package.get_pulp_type()).only('pk') # All modules in the source repository version module_ids = repository_version.content.filter( pulp_type=Modulemd.get_pulp_type()).only('pk') advisories = UpdateRecord.objects.filter(pk__in=advisory_ids) packages = Package.objects.filter(pk__in=package_ids) modules = Modulemd.objects.filter(pk__in=module_ids) children = set() for advisory in advisories: # Find rpms referenced by Advisories/Errata package_nevras = advisory.get_pkglist() for nevra in package_nevras: (name, epoch, version, release, arch) = nevra try: package = packages.get(name=name, epoch=epoch, version=version, release=release, arch=arch) children.add(package.pk) except Package.DoesNotExist: raise except MultipleObjectsReturned: raise module_nsvcas = advisory.get_module_list() for nsvca in module_nsvcas: (name, stream, version, context, arch) = nsvca try: module = modules.get(name=name, stream=stream, version=version, context=context, arch=arch) children.add(module.pk) except Modulemd.DoesNotExist: raise except MultipleObjectsReturned: raise # TODO: Find rpms referenced by PackageGroups, # PackageGroups referenced by PackageCategories, etc. return Content.objects.filter(pk__in=children)