Esempio n. 1
0
    def publish_artifacts(self, content, prefix=""):
        """
        Publish artifacts.

        Args:
            content (pulpcore.plugin.models.Content): content set.
            prefix (str): a relative path prefix for the published artifact

        """
        published_artifacts = []

        # Special case for Packages
        contentartifact_qs = ContentArtifact.objects.filter(
            content__in=content).filter(
                content__pulp_type=Package.get_pulp_type())

        for content_artifact in contentartifact_qs.values(
                "pk", "relative_path").iterator():
            relative_path = content_artifact["relative_path"]
            relative_path = os.path.join(prefix, PACKAGES_DIRECTORY,
                                         relative_path.lower()[0],
                                         relative_path)
            published_artifacts.append(
                PublishedArtifact(
                    relative_path=relative_path,
                    publication=self.publication,
                    content_artifact_id=content_artifact["pk"],
                ))

        # Handle everything else
        is_treeinfo = Q(relative_path__in=["treeinfo", ".treeinfo"])
        unpublishable_types = Q(content__pulp_type__in=[
            RepoMetadataFile.get_pulp_type(),
            Modulemd.get_pulp_type(),
            ModulemdDefaults.get_pulp_type(),
            # already dealt with
            Package.get_pulp_type(),
        ])

        contentartifact_qs = (ContentArtifact.objects.filter(
            content__in=content).exclude(unpublishable_types).exclude(
                is_treeinfo))

        for content_artifact in contentartifact_qs.values(
                "pk", "relative_path").iterator():
            published_artifacts.append(
                PublishedArtifact(
                    relative_path=content_artifact["relative_path"],
                    publication=self.publication,
                    content_artifact_id=content_artifact["pk"],
                ))

        PublishedArtifact.objects.bulk_create(published_artifacts,
                                              batch_size=2000)
Esempio n. 2
0
    def _apply_retention_policy(self, new_version):
        """Apply the repository's "retain_package_versions" settings to the new version.

        Remove all non-modular packages that are older than the retention policy. A value of 0
        for the package retention policy represents disabled. A value of 3 would mean that the
        3 most recent versions of each package would be kept while older versions are discarded.

        Args:
            new_version (models.RepositoryVersion): Repository version to filter
        """
        assert not new_version.complete, \
            "Cannot apply retention policy to completed repository versions"

        if self.retain_package_versions > 0:
            # It would be more ideal if, instead of annotating with an age and filtering manually,
            # we could use Django to filter the particular Package content we want to delete.
            # Something like ".filter(F('age') > self.retain_package_versions)" would be better
            # however this is not currently possible with Django. It would be possible with raw
            # SQL but the repository version content membership subquery is currently
            # django-managed and would be difficult to share.
            #
            # Instead we have to do the filtering manually.
            nonmodular_packages = Package.objects.with_age().filter(
                pk__in=new_version.content.filter(pulp_type=Package.get_pulp_type()),
                is_modular=False,  # don't want to filter out modular RPMs
            ).only('pk')

            old_packages = []
            for package in nonmodular_packages:
                if package.age > self.retain_package_versions:
                    old_packages.append(package.pk)

            new_version.remove_content(Content.objects.filter(pk__in=old_packages))
Esempio n. 3
0
    def publish_artifacts(self, content):
        """
        Publish artifacts.

        Args:
            content (pulpcore.plugin.models.Content): content set.

        """
        published_artifacts = []
        for content_artifact in ContentArtifact.objects.filter(
                content__in=content.exclude(pulp_type__in=[
                    RepoMetadataFile.get_pulp_type(),
                    Modulemd.get_pulp_type(),
                    ModulemdDefaults.get_pulp_type()
                ]).distinct()).iterator():
            relative_path = content_artifact.relative_path
            if content_artifact.content.pulp_type == Package.get_pulp_type():
                relative_path = os.path.join(PACKAGES_DIRECTORY,
                                             relative_path.lower()[0],
                                             content_artifact.relative_path)
            published_artifacts.append(
                PublishedArtifact(relative_path=relative_path,
                                  publication=self.publication,
                                  content_artifact=content_artifact))

        PublishedArtifact.objects.bulk_create(published_artifacts,
                                              batch_size=2000)
Esempio n. 4
0
def find_children_of_content(content, src_repo_version):
    """Finds the content referenced directly by other content and returns it all together.

    Finds RPMs referenced by Advisory/Errata content.

    Args:
        content (iterable): Content for which to resolve children
        src_repo_version (pulpcore.models.RepositoryVersion): Source repo version

    Returns: Queryset of Content objects that are children of the intial set of content
    """
    # Content that were selected to be copied
    advisory_ids = content.filter(
        pulp_type=UpdateRecord.get_pulp_type()).only('pk')
    packagecategory_ids = content.filter(
        pulp_type=PackageCategory.get_pulp_type()).only('pk')
    packageenvironment_ids = content.filter(
        pulp_type=PackageEnvironment.get_pulp_type()).only('pk')
    packagegroup_ids = content.filter(
        pulp_type=PackageGroup.get_pulp_type()).only('pk')

    # Content in the source repository version
    package_ids = src_repo_version.content.filter(
        pulp_type=Package.get_pulp_type()).only('pk')
    module_ids = src_repo_version.content.filter(
        pulp_type=Modulemd.get_pulp_type()).only('pk')

    advisories = UpdateRecord.objects.filter(pk__in=advisory_ids)
    packages = Package.objects.filter(pk__in=package_ids)
    packagecategories = PackageCategory.objects.filter(
        pk__in=packagecategory_ids)
    packageenvironments = PackageEnvironment.objects.filter(
        pk__in=packageenvironment_ids)
    packagegroups = PackageGroup.objects.filter(pk__in=packagegroup_ids)
    modules = Modulemd.objects.filter(pk__in=module_ids)

    children = set()

    for advisory in advisories:
        # Find rpms referenced by Advisories/Errata
        package_nevras = advisory.get_pkglist()
        for nevra in package_nevras:
            (name, epoch, version, release, arch) = nevra
            try:
                package = packages.get(name=name,
                                       epoch=epoch,
                                       version=version,
                                       release=release,
                                       arch=arch)
                children.add(package.pk)
            except Package.DoesNotExist:
                raise
            except MultipleObjectsReturned:
                raise

        module_nsvcas = advisory.get_module_list()
        for nsvca in module_nsvcas:
            (name, stream, version, context, arch) = nsvca
            try:
                module = modules.get(name=name,
                                     stream=stream,
                                     version=version,
                                     context=context,
                                     arch=arch)
                children.add(module.pk)
            except Modulemd.DoesNotExist:
                raise
            except MultipleObjectsReturned:
                raise

    # PackageCategories & PackageEnvironments resolution must go before PackageGroups
    # TODO: refactor to be more effecient (lower number of queries)
    for packagecategory in packagecategories.iterator():
        for category_package_group in packagecategory.group_ids:
            category_package_groups = PackageGroup.objects.filter(
                name=category_package_group['name'],
                pk__in=src_repo_version.content)
            children.update(
                [pkggroup.pk for pkggroup in category_package_groups])
            packagegroups = packagegroups.union(category_package_groups)

    for packageenvironment in packageenvironments.iterator():
        for env_package_group in packageenvironment.group_ids:
            env_package_groups = PackageGroup.objects.filter(
                name=env_package_group['name'],
                pk__in=src_repo_version.content)
            children.update([envgroup.pk for envgroup in env_package_groups])
            packagegroups = packagegroups.union(env_package_groups)
        for optional_env_package_group in packageenvironment.option_ids:
            opt_env_package_groups = PackageGroup.objects.filter(
                name=optional_env_package_group['name'],
                pk__in=src_repo_version.content)
            children.update(
                [optpkggroup.pk for optpkggroup in opt_env_package_groups])
            packagegroups = packagegroups.union(opt_env_package_groups)

    # Find rpms referenced by PackageGroups
    for packagegroup in packagegroups.iterator():
        group_package_names = [pkg['name'] for pkg in packagegroup.packages]
        for pkg in group_package_names:
            packages_by_name = [
                pkg for pkg in Package.objects.with_age().filter(
                    name=pkg, pk__in=src_repo_version.content) if pkg.age == 1
            ]
            for pkg in packages_by_name:
                children.add(pkg.pk)

    return Content.objects.filter(pk__in=children)
Esempio n. 5
0
def publish(
    repository_version_pk,
    gpgcheck_options=None,
    metadata_signing_service=None,
    checksum_types=None,
    sqlite_metadata=False,
):
    """
    Create a Publication based on a RepositoryVersion.

    Args:
        repository_version_pk (str): Create a publication from this repository version.
        gpgcheck_options (dict): GPG signature check options.
        metadata_signing_service (pulpcore.app.models.AsciiArmoredDetachedSigningService):
            A reference to an associated signing service.
        checksum_types (dict): Checksum types for metadata and packages.
        sqlite_metadata (bool): Whether to generate metadata files in sqlite format.

    """
    repository_version = RepositoryVersion.objects.get(pk=repository_version_pk)
    repository = repository_version.repository.cast()
    checksum_types = checksum_types or {}

    if metadata_signing_service:
        metadata_signing_service = AsciiArmoredDetachedSigningService.objects.get(
            pk=metadata_signing_service
        )

    checksum_types["original"] = repository.original_checksum_types

    log.info(
        _("Publishing: repository={repo}, version={version}").format(
            repo=repository.name,
            version=repository_version.number,
        )
    )
    with tempfile.TemporaryDirectory("."):
        with RpmPublication.create(repository_version) as publication:
            kwargs = {}
            first_package = repository_version.content.filter(
                pulp_type=Package.get_pulp_type()
            ).first()
            if first_package:
                kwargs["default"] = first_package.cast().checksum_type
            publication.metadata_checksum_type = get_checksum_type(
                "primary", checksum_types, **kwargs
            )
            publication.package_checksum_type = (
                checksum_types.get("package") or publication.metadata_checksum_type
            )

            if gpgcheck_options is not None:
                publication.gpgcheck = gpgcheck_options.get("gpgcheck")
                publication.repo_gpgcheck = gpgcheck_options.get("repo_gpgcheck")

            if sqlite_metadata:
                publication.sqlite_metadata = True

            publication_data = PublicationData(publication)
            publication_data.populate()

            total_repos = 1 + len(publication_data.sub_repos)
            pb_data = dict(
                message="Generating repository metadata",
                code="publish.generating_metadata",
                total=total_repos,
            )
            with ProgressReport(**pb_data) as publish_pb:

                content = publication.repository_version.content

                # Main repo
                generate_repo_metadata(
                    content,
                    publication,
                    checksum_types,
                    publication_data.repomdrecords,
                    metadata_signing_service=metadata_signing_service,
                )
                publish_pb.increment()

                for sub_repo in publication_data.sub_repos:
                    name = sub_repo[0]
                    checksum_types["original"] = getattr(publication_data, f"{name}_checksums")
                    content = getattr(publication_data, f"{name}_content")
                    extra_repomdrecords = getattr(publication_data, f"{name}_repomdrecords")
                    generate_repo_metadata(
                        content,
                        publication,
                        checksum_types,
                        extra_repomdrecords,
                        name,
                        metadata_signing_service=metadata_signing_service,
                    )
                    publish_pb.increment()

            log.info(_("Publication: {publication} created").format(publication=publication.pk))

            return publication
Esempio n. 6
0
    def publish_artifacts(self, content, prefix=""):
        """
        Publish artifacts.

        Args:
            content (pulpcore.plugin.models.Content): content set.
            prefix (str): a relative path prefix for the published artifact

        """
        published_artifacts = []

        # Special case for Packages
        contentartifact_qs = ContentArtifact.objects.filter(content__in=content).filter(
            content__pulp_type=Package.get_pulp_type()
        )

        paths = set()
        duplicated_paths = []
        for content_artifact in contentartifact_qs.values("pk", "relative_path").iterator():
            relative_path = content_artifact["relative_path"]
            relative_path = os.path.join(
                prefix, PACKAGES_DIRECTORY, relative_path.lower()[0], relative_path
            )
            #
            # Some Suboptimal Repos have the 'same' artifact living in multiple places.
            # Specifically, the same NEVRA, in more than once place, **with different checksums**
            # (since if all that was different was location_href there would be only one
            # ContentArtifact in the first place).
            #
            # pulp_rpm wants to publish a 'canonical' repository-layout, under which an RPM
            # "name-version-release-arch" appears at "Packages/n/name-version-release-arch.rpm".
            # Because the assumption is that Packages don't "own" their path, only the filename
            # is kept as relative_path.
            #
            # In this case, we have to pick one - which is essentially what the rest of the RPM
            # Ecosystem does when faced with the impossible. This code takes the first-found. We
            # could implement something more complicated, if there are better options
            # (choose by last-created maybe?)
            #
            # Note that this only impacts user-created publications, which produce the "standard"
            # RPM layout of repo/Packages/f/foo.rpm. A publication created by mirror-sync retains
            # whatever layout their "upstream" repo-metadata dictates.
            #
            if relative_path in paths:
                duplicated_paths.append(f'{relative_path}:{content_artifact["pk"]}')
                continue
            else:
                paths.add(relative_path)
            published_artifacts.append(
                PublishedArtifact(
                    relative_path=relative_path,
                    publication=self.publication,
                    content_artifact_id=content_artifact["pk"],
                )
            )
        if duplicated_paths:
            log.warning(
                _("Duplicate paths found at publish : {problems} ").format(
                    problems="; ".join(duplicated_paths)
                )
            )

        # Handle everything else
        is_treeinfo = Q(relative_path__in=["treeinfo", ".treeinfo"])
        unpublishable_types = Q(
            content__pulp_type__in=[
                RepoMetadataFile.get_pulp_type(),
                Modulemd.get_pulp_type(),
                ModulemdDefaults.get_pulp_type(),
                # already dealt with
                Package.get_pulp_type(),
            ]
        )

        contentartifact_qs = (
            ContentArtifact.objects.filter(content__in=content)
            .exclude(unpublishable_types)
            .exclude(is_treeinfo)
        )

        for content_artifact in contentartifact_qs.values("pk", "relative_path").iterator():
            published_artifacts.append(
                PublishedArtifact(
                    relative_path=content_artifact["relative_path"],
                    publication=self.publication,
                    content_artifact_id=content_artifact["pk"],
                )
            )

        PublishedArtifact.objects.bulk_create(published_artifacts, batch_size=2000)
Esempio n. 7
0
def find_children_of_content(content, src_repo_version):
    """Finds the content referenced directly by other content and returns it all together.

    Finds RPMs referenced by Advisory/Errata content.

    Args:
        content (Queryset): Content for which to resolve children
        src_repo_version (pulpcore.models.RepositoryVersion): Source repo version

    Returns: Queryset of Content objects that are children of the intial set of content
    """
    # Content that were selected to be copied
    advisory_ids = content.filter(
        pulp_type=UpdateRecord.get_pulp_type()).only("pk")
    packagecategory_ids = content.filter(
        pulp_type=PackageCategory.get_pulp_type()).only("pk")
    packageenvironment_ids = content.filter(
        pulp_type=PackageEnvironment.get_pulp_type()).only("pk")
    packagegroup_ids = content.filter(
        pulp_type=PackageGroup.get_pulp_type()).only("pk")

    # Content in the source repository version
    package_ids = src_repo_version.content.filter(
        pulp_type=Package.get_pulp_type()).only("pk")
    module_ids = src_repo_version.content.filter(
        pulp_type=Modulemd.get_pulp_type()).only("pk")

    advisories = UpdateRecord.objects.filter(pk__in=advisory_ids)
    packages = Package.objects.filter(pk__in=package_ids)
    packagecategories = PackageCategory.objects.filter(
        pk__in=packagecategory_ids)
    packageenvironments = PackageEnvironment.objects.filter(
        pk__in=packageenvironment_ids)
    packagegroups = PackageGroup.objects.filter(pk__in=packagegroup_ids)
    modules = Modulemd.objects.filter(pk__in=module_ids)

    children = set()

    for advisory in advisories.iterator():
        # Find rpms referenced by Advisories/Errata
        package_nevras = advisory.get_pkglist()
        advisory_package_q = Q(pk__in=[])
        for nevra in package_nevras:
            (name, epoch, version, release, arch) = nevra
            advisory_package_q |= Q(name=name,
                                    epoch=epoch,
                                    version=version,
                                    release=release,
                                    arch=arch)
        children.update(
            packages.filter(advisory_package_q).values_list("pk", flat=True))

        module_nsvcas = advisory.get_module_list()
        advisory_module_q = Q(pk__in=[])
        for nsvca in module_nsvcas:
            (name, stream, version, context, arch) = nsvca
            advisory_module_q |= Q(name=name,
                                   stream=stream,
                                   version=version,
                                   context=context,
                                   arch=arch)
        children.update(
            modules.filter(advisory_module_q).values_list("pk", flat=True))

    # PackageCategories & PackageEnvironments resolution must go before PackageGroups
    packagegroup_names = set()
    for packagecategory in packagecategories.iterator():
        for group_id in packagecategory.group_ids:
            packagegroup_names.add(group_id["name"])

    for packageenvironment in packageenvironments.iterator():
        for group_id in packageenvironment.group_ids:
            packagegroup_names.add(group_id["name"])
        for group_id in packageenvironment.option_ids:
            packagegroup_names.add(group_id["name"])

    child_package_groups = PackageGroup.objects.filter(
        name__in=packagegroup_names, pk__in=src_repo_version.content)
    children.update([pkggroup.pk for pkggroup in child_package_groups])
    packagegroups = packagegroups.union(child_package_groups)

    # Find rpms referenced by PackageGroups
    packagegroup_package_names = set()
    for packagegroup in packagegroups.iterator():
        packagegroup_package_names |= set(pkg["name"]
                                          for pkg in packagegroup.packages)

    # TODO: do modular/nonmodular need to be taken into account?
    existing_package_names = (Package.objects.filter(
        name__in=packagegroup_package_names,
        pk__in=content,
    ).values_list("name", flat=True).distinct())

    missing_package_names = packagegroup_package_names - set(
        existing_package_names)

    needed_packages = Package.objects.with_age().filter(
        name__in=missing_package_names, pk__in=src_repo_version.content)

    # Pick the latest version of each package available which isn't already present
    # in the content set.
    for pkg in needed_packages.iterator():
        if pkg.age == 1:
            children.add(pkg.pk)

    return Content.objects.filter(pk__in=children)
Esempio n. 8
0
    def publish_artifacts(self, content, prefix=""):
        """
        Publish artifacts.

        Args:
            content (pulpcore.plugin.models.Content): content set.
            prefix (str): a relative path prefix for the published artifact

        """
        published_artifacts = []

        # Special case for Packages
        contentartifact_qs = (ContentArtifact.objects.filter(
            content__in=content).filter(
                content__pulp_type=Package.get_pulp_type()).select_related(
                    "content__rpm_package__time_build"))

        rel_path_mapping = defaultdict(list)
        # Some Suboptimal Repos have the 'same' artifact living in multiple places.
        # Specifically, the same NEVRA, in more than once place, **with different checksums**
        # (since if all that was different was location_href there would be only one
        # ContentArtifact in the first place).
        #
        # pulp_rpm wants to publish a 'canonical' repository-layout, under which an RPM
        # "name-version-release-arch" appears at "Packages/n/name-version-release-arch.rpm".
        # Because the assumption is that Packages don't "own" their path, only the filename
        # is kept as relative_path.
        #
        # In this case, we have to pick one - which is essentially what the rest of the RPM
        # Ecosystem does when faced with the impossible. This code takes the one with the
        # most recent build time which is the same heuristic used by Yum/DNF/Zypper.
        #
        # Note that this only impacts user-created publications, which produce the "standard"
        # RPM layout of repo/Packages/f/foo.rpm. A publication created by mirror-sync retains
        # whatever layout their "upstream" repo-metadata dictates.
        fields = ["pk", "relative_path", "content__rpm_package__time_build"]
        for content_artifact in contentartifact_qs.values(*fields).iterator():
            relative_path = content_artifact["relative_path"]
            time_build = content_artifact["content__rpm_package__time_build"]

            relative_path = os.path.join(prefix, PACKAGES_DIRECTORY,
                                         relative_path.lower()[0],
                                         relative_path)
            rel_path_mapping[relative_path].append(
                (content_artifact["pk"], time_build))

        for rel_path, content_artifacts in rel_path_mapping.items():
            # sort the content artifacts by when the package was built
            if len(content_artifacts) > 1:
                content_artifacts.sort(key=lambda p: p[1], reverse=True)
                log.warning(
                    "Duplicate packages found competing for {path}, selected the one with "
                    "the most recent build time, excluding {others} others.".
                    format(path=rel_path, others=len(content_artifacts[1:])))

            # Only add the first one (the one with the highest build time)
            published_artifacts.append(
                PublishedArtifact(
                    relative_path=rel_path,
                    publication=self.publication,
                    content_artifact_id=content_artifacts[0][0],
                ))

        # Handle everything else
        is_treeinfo = Q(relative_path__in=["treeinfo", ".treeinfo"])
        unpublishable_types = Q(content__pulp_type__in=[
            RepoMetadataFile.get_pulp_type(),
            Modulemd.get_pulp_type(),
            ModulemdDefaults.get_pulp_type(),
            # already dealt with
            Package.get_pulp_type(),
        ])

        contentartifact_qs = (ContentArtifact.objects.filter(
            content__in=content).exclude(unpublishable_types).exclude(
                is_treeinfo))

        for content_artifact in contentartifact_qs.values(
                "pk", "relative_path").iterator():
            published_artifacts.append(
                PublishedArtifact(
                    relative_path=content_artifact["relative_path"],
                    publication=self.publication,
                    content_artifact_id=content_artifact["pk"],
                ))

        PublishedArtifact.objects.bulk_create(published_artifacts,
                                              batch_size=2000)
Esempio n. 9
0
def find_children_of_content(content, repository_version):
    """Finds the content referenced directly by other content and returns it all together.

    Finds RPMs referenced by Advisory/Errata content.

    Args:
        content (iterable): Content for which to resolve children
        repository_version (pulpcore.models.RepositoryVersion): Source repo version

    Returns: Queryset of Content objects that are children of the intial set of content
    """
    # Advisories that were selected to be copied
    advisory_ids = content.filter(
        pulp_type=UpdateRecord.get_pulp_type()).only('pk')
    # All packages in the source repository version
    package_ids = repository_version.content.filter(
        pulp_type=Package.get_pulp_type()).only('pk')
    # All modules in the source repository version
    module_ids = repository_version.content.filter(
        pulp_type=Modulemd.get_pulp_type()).only('pk')

    advisories = UpdateRecord.objects.filter(pk__in=advisory_ids)
    packages = Package.objects.filter(pk__in=package_ids)
    modules = Modulemd.objects.filter(pk__in=module_ids)

    children = set()

    for advisory in advisories:
        # Find rpms referenced by Advisories/Errata
        package_nevras = advisory.get_pkglist()
        for nevra in package_nevras:
            (name, epoch, version, release, arch) = nevra
            try:
                package = packages.get(name=name,
                                       epoch=epoch,
                                       version=version,
                                       release=release,
                                       arch=arch)
                children.add(package.pk)
            except Package.DoesNotExist:
                raise
            except MultipleObjectsReturned:
                raise

        module_nsvcas = advisory.get_module_list()
        for nsvca in module_nsvcas:
            (name, stream, version, context, arch) = nsvca
            try:
                module = modules.get(name=name,
                                     stream=stream,
                                     version=version,
                                     context=context,
                                     arch=arch)
                children.add(module.pk)
            except Modulemd.DoesNotExist:
                raise
            except MultipleObjectsReturned:
                raise

    # TODO: Find rpms referenced by PackageGroups,
    # PackageGroups referenced by PackageCategories, etc.

    return Content.objects.filter(pk__in=children)