Beispiel #1
0
    def test_segv2(self):
        c1 = libcomps.Comps()
        c1.fromxml_f(support.COMPS_PATH)

        c2 = libcomps.Comps()
        c2.fromxml_f(support.COMPS_PATH)

        c = c1 + c2
        x = c.groups[0].packages[0].name
Beispiel #2
0
 def load_input_files(self):
     """
     Loads all input xml files.
     Returns True if at least one file was successfuly loaded
     """
     for file_name in self.opts.load:
         file_comps = libcomps.Comps()
         try:
             if file_name.endswith('.gz'):
                 # libcomps does not support gzipped files - decompress to temporary
                 # location
                 with gzip.open(file_name) as gz_file:
                     temp_file = tempfile.NamedTemporaryFile(delete=False)
                     try:
                         shutil.copyfileobj(gz_file, temp_file)
                         # close temp_file to ensure the content is flushed to disk
                         temp_file.close()
                         file_comps.fromxml_f(temp_file.name)
                     finally:
                         os.unlink(temp_file.name)
             else:
                 file_comps.fromxml_f(file_name)
         except (IOError, OSError, libcomps.ParserError) as err:
             # gzip module raises OSError on reading from malformed gz file
             # get_last_errors() output often contains duplicit lines, remove them
             seen = set()
             for error in file_comps.get_last_errors():
                 if error in seen:
                     continue
                 logger.error(error.strip())
                 seen.add(error)
             raise dnf.exceptions.Error(
                 _("Can't load file \"{}\": {}").format(file_name, err))
         else:
             self.comps += file_comps
Beispiel #3
0
    def test_environment_parse(self):
        xml = """\
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE comps PUBLIC "-//Red Hat, Inc.//DTD Comps info//EN" "comps.dtd">
<comps>
  <group>
   <id>somerset</id>
   <default>true</default>
   <uservisible>true</uservisible>
   <display_order>1024</display_order>
   <name>Solid Ground</name>
   <description>--</description>
    <packagelist>
      <packagereq type="mandatory">pepper</packagereq>
      <packagereq type="mandatory">trampoline</packagereq>
    </packagelist>
  </group>
  <environment>
    <id>minimal</id>
    <name>Min install</name>
    <description>Basic functionality.</description>
    <display_order>5</display_order>
    <grouplist>
      <groupid>somerset</groupid>
    </grouplist>
  </environment>
</comps>
"""
        comps = libcomps.Comps()
        ret = comps.fromxml_str(xml)
        self.assertGreaterEqual(ret, 0)
Beispiel #4
0
 def _add_from_xml_filename(self, fn):
     comps = libcomps.Comps()
     ret = comps.fromxml_f(fn)
     if ret == -1:
         errors = comps.get_last_parse_errors()
         raise CompsError(' '.join(errors))
     self._i = self._i + comps
Beispiel #5
0
 def _add_from_xml_filename(self, fn):
     comps = libcomps.Comps()
     try:
         comps.fromxml_f(fn)
     except libcomps.ParserError:
         errors = comps.get_last_errors()
         raise CompsError(' '.join(errors))
     self._i += comps
Beispiel #6
0
def parse_comps_components(comps_file):
    """Parse comps-related components found in the specified file."""
    # created = {"categories": [], "environments": [], "groups": [], "langpack": None}
    created_objects = []
    all_objects = []
    comps = libcomps.Comps()
    # Read the file and pass the string along because comps.fromxml_f() will only take a
    # path-string that doesn't work on things like S3 storage
    with comps_file.file.open("rb") as f:
        data = f.read()
        comps.fromxml_str(data.decode("utf-8"))

    if comps.langpacks:
        langpack_dict = PackageLangpacks.libcomps_to_dict(comps.langpacks)
        langpack, created = PackageLangpacks.objects.get_or_create(
            matches=strdict_to_dict(comps.langpacks),
            digest=dict_digest(langpack_dict))
        if created:
            created_objects.append(langpack)
        all_objects.append(langpack)

    if comps.categories:
        for category in comps.categories:
            category_dict = PackageCategory.libcomps_to_dict(category)
            category_dict["digest"] = dict_digest(category_dict)
            packagecategory, created = PackageCategory.objects.get_or_create(
                **category_dict)
            if created:
                created_objects.append(packagecategory)
            all_objects.append(packagecategory)

    if comps.environments:
        for environment in comps.environments:
            environment_dict = PackageEnvironment.libcomps_to_dict(environment)
            environment_dict["digest"] = dict_digest(environment_dict)
            packageenvironment, created = PackageEnvironment.objects.get_or_create(
                **environment_dict)
            if created:
                created_objects.append(packageenvironment)
            all_objects.append(packageenvironment)

    if comps.groups:
        for group in comps.groups:
            group_dict = PackageGroup.libcomps_to_dict(group)
            group_dict["digest"] = dict_digest(group_dict)
            packagegroup, created = PackageGroup.objects.get_or_create(
                **group_dict)
            if created:
                created_objects.append(packagegroup)
            all_objects.append(packagegroup)

    return created_objects, all_objects
Beispiel #7
0
    def parse(self):
        """Parse packages' components."""
        comps = libcomps.Comps()
        comps.fromxml_f(self.comps_result.path)

        with ProgressReport(message="Parsed Comps",
                            code="parsing.comps") as comps_pb:
            comps_total = len(comps.groups) + len(comps.categories) + len(
                comps.environments)
            comps_pb.total = comps_total
            comps_pb.done = comps_total

        if comps.langpacks:
            langpack_dict = PackageLangpacks.libcomps_to_dict(comps.langpacks)
            packagelangpack = PackageLangpacks(
                matches=strdict_to_dict(comps.langpacks),
                digest=dict_digest(langpack_dict))
            self.package_language_pack_dc = DeclarativeContent(
                content=packagelangpack)
            self.package_language_pack_dc.extra_data = defaultdict(list)

        self._init_dc_categories(comps)
        self._init_dc_environments(comps)
        self._init_dc_groups(comps)
Beispiel #8
0
 def test_segv(self):
     c1 = libcomps.Comps()
     c2 = libcomps.Comps()
     c2.fromxml_f(support.COMPS_PATH)
     c = c1 + c2  # sigsegved here
Beispiel #9
0
 def __init__(self):
     self._i = libcomps.Comps()
     self._langs = _Langs()
Beispiel #10
0
 def __init__(self, cli):
     super(GroupsManagerCommand, self).__init__(cli)
     self.comps = libcomps.Comps()
Beispiel #11
0
def create_repomd_xml(
    content,
    publication,
    checksum_types,
    extra_repomdrecords,
    sub_folder=None,
    metadata_signing_service=None,
):
    """
    Creates a repomd.xml file.

    Args:
        content(app.models.Content): content set
        publication(pulpcore.plugin.models.Publication): the publication
        extra_repomdrecords(list): list with data relative to repo metadata files
        sub_folder(str): name of the folder for sub repos
        metadata_signing_service (pulpcore.app.models.AsciiArmoredDetachedSigningService):
            A reference to an associated signing service.

    """
    cwd = os.getcwd()
    repodata_path = REPODATA_PATH
    has_modules = False
    has_comps = False
    package_checksum_type = checksum_types.get("package")

    if sub_folder:
        cwd = os.path.join(cwd, sub_folder)
        repodata_path = os.path.join(sub_folder, repodata_path)

    # Prepare metadata files
    repomd_path = os.path.join(cwd, "repomd.xml")
    pri_xml_path = os.path.join(cwd, "primary.xml.gz")
    fil_xml_path = os.path.join(cwd, "filelists.xml.gz")
    oth_xml_path = os.path.join(cwd, "other.xml.gz")
    upd_xml_path = os.path.join(cwd, "updateinfo.xml.gz")
    mod_yml_path = os.path.join(cwd, "modules.yaml")
    comps_xml_path = os.path.join(cwd, "comps.xml")

    pri_xml = cr.PrimaryXmlFile(pri_xml_path)
    fil_xml = cr.FilelistsXmlFile(fil_xml_path)
    oth_xml = cr.OtherXmlFile(oth_xml_path)
    upd_xml = cr.UpdateInfoXmlFile(upd_xml_path)

    if publication.sqlite_metadata:
        pri_db_path = os.path.join(cwd, "primary.sqlite")
        fil_db_path = os.path.join(cwd, "filelists.sqlite")
        oth_db_path = os.path.join(cwd, "other.sqlite")
        pri_db = cr.PrimarySqlite(pri_db_path)
        fil_db = cr.FilelistsSqlite(fil_db_path)
        oth_db = cr.OtherSqlite(oth_db_path)

    packages = Package.objects.filter(pk__in=content)
    total_packages = packages.count()

    pri_xml.set_num_of_pkgs(total_packages)
    fil_xml.set_num_of_pkgs(total_packages)
    oth_xml.set_num_of_pkgs(total_packages)

    # We want to support publishing with a different checksum type than the one built-in to the
    # package itself, so we need to get the correct checksums somehow if there is an override.
    # We must also take into consideration that if the package has not been downloaded the only
    # checksum that is available is the one built-in.
    #
    # Since this lookup goes from Package->Content->ContentArtifact->Artifact, performance is a
    # challenge. We use ContentArtifact as our starting point because it enables us to work with
    # simple foreign keys and avoid messing with the many-to-many relationship, which doesn't
    # work with select_related() and performs poorly with prefetch_related(). This is fine
    # because we know that Packages should only ever have one artifact per content.
    contentartifact_qs = (
        ContentArtifact.objects.filter(content__in=packages.only("pk")).
        select_related(
            # content__rpm_package is a bit of a hack, exploiting the way django sets up model
            # inheritance, but it works and is unlikely to break. All content artifacts being
            # accessed here have an associated Package since they originally came from the
            # Package queryset.
            "artifact",
            "content__rpm_package",
        ).only("artifact", "content__rpm_package__checksum_type",
               "content__rpm_package__pkgId"))

    pkg_to_hash = {}
    for ca in contentartifact_qs.iterator():
        pkgid = None
        if package_checksum_type:
            package_checksum_type = package_checksum_type.lower()
            pkgid = getattr(ca.artifact, package_checksum_type, None)
        if pkgid:
            pkg_to_hash[ca.content_id] = (package_checksum_type, pkgid)
        else:
            pkg_to_hash[ca.content_id] = (
                ca.content.rpm_package.checksum_type,
                ca.content.rpm_package.pkgId,
            )

    # Process all packages
    for package in packages.iterator():
        pkg = package.to_createrepo_c()

        # rewrite the checksum and checksum type with the desired ones
        (checksum, pkgId) = pkg_to_hash[package.pk]
        pkg.checksum_type = checksum
        pkg.pkgId = pkgId

        pkg_filename = os.path.basename(package.location_href)
        # this can cause an issue when two same RPM package names appears
        # a/name1.rpm b/name1.rpm
        pkg.location_href = os.path.join(PACKAGES_DIRECTORY,
                                         pkg_filename[0].lower(), pkg_filename)
        pri_xml.add_pkg(pkg)
        fil_xml.add_pkg(pkg)
        oth_xml.add_pkg(pkg)
        if publication.sqlite_metadata:
            pri_db.add_pkg(pkg)
            fil_db.add_pkg(pkg)
            oth_db.add_pkg(pkg)

    # Process update records
    for update_record in UpdateRecord.objects.filter(
            pk__in=content).iterator():
        upd_xml.add_chunk(
            cr.xml_dump_updaterecord(update_record.to_createrepo_c()))

    # Process modulemd and modulemd_defaults
    with open(mod_yml_path, "ab") as mod_yml:
        for modulemd in Modulemd.objects.filter(pk__in=content).iterator():
            mod_yml.write(modulemd._artifacts.get().file.read())
            has_modules = True
        for default in ModulemdDefaults.objects.filter(
                pk__in=content).iterator():
            mod_yml.write(default._artifacts.get().file.read())
            has_modules = True

    # Process comps
    comps = libcomps.Comps()
    for pkg_grp in PackageGroup.objects.filter(pk__in=content).iterator():
        group = pkg_grp.pkg_grp_to_libcomps()
        comps.groups.append(group)
        has_comps = True
    for pkg_cat in PackageCategory.objects.filter(pk__in=content).iterator():
        cat = pkg_cat.pkg_cat_to_libcomps()
        comps.categories.append(cat)
        has_comps = True
    for pkg_env in PackageEnvironment.objects.filter(
            pk__in=content).iterator():
        env = pkg_env.pkg_env_to_libcomps()
        comps.environments.append(env)
        has_comps = True
    for pkg_lng in PackageLangpacks.objects.filter(pk__in=content).iterator():
        comps.langpacks = dict_to_strdict(pkg_lng.matches)
        has_comps = True

    comps.toxml_f(
        comps_xml_path,
        xml_options={
            "default_explicit": True,
            "empty_groups": True,
            "uservisible_explicit": True
        },
    )

    pri_xml.close()
    fil_xml.close()
    oth_xml.close()
    upd_xml.close()

    repomd = cr.Repomd()

    if publication.sqlite_metadata:
        repomdrecords = [
            ("primary", pri_xml_path, pri_db),
            ("filelists", fil_xml_path, fil_db),
            ("other", oth_xml_path, oth_db),
            ("primary_db", pri_db_path, None),
            ("filelists_db", fil_db_path, None),
            ("other_db", oth_db_path, None),
            ("updateinfo", upd_xml_path, None),
        ]
    else:
        repomdrecords = [
            ("primary", pri_xml_path, None),
            ("filelists", fil_xml_path, None),
            ("other", oth_xml_path, None),
            ("updateinfo", upd_xml_path, None),
        ]

    if has_modules:
        repomdrecords.append(("modules", mod_yml_path, None))

    if has_comps:
        repomdrecords.append(("group", comps_xml_path, None))

    repomdrecords.extend(extra_repomdrecords)

    sqlite_files = ("primary_db", "filelists_db", "other_db")
    for name, path, db_to_update in repomdrecords:
        record = cr.RepomdRecord(name, path)
        checksum_type = get_checksum_type(name, checksum_types)
        if name in sqlite_files:
            record_bz = record.compress_and_fill(checksum_type, cr.BZ2)
            record_bz.type = name
            record_bz.rename_file()
            path = record_bz.location_href.split("/")[-1]
            repomd.set_record(record_bz)
        else:
            record.fill(checksum_type)
            if db_to_update:
                db_to_update.dbinfo_update(record.checksum)
                db_to_update.close()
            record.rename_file()
            path = record.location_href.split("/")[-1]
            repomd.set_record(record)

        if sub_folder:
            path = os.path.join(sub_folder, path)

        PublishedMetadata.create_from_file(
            relative_path=os.path.join(repodata_path, os.path.basename(path)),
            publication=publication,
            file=File(open(path, "rb")),
        )

    with open(repomd_path, "w") as repomd_f:
        repomd_f.write(repomd.xml_dump())

    if metadata_signing_service:
        signing_service = AsciiArmoredDetachedSigningService.objects.get(
            pk=metadata_signing_service.pk)
        sign_results = signing_service.sign(repomd_path)

        # publish a signed file
        PublishedMetadata.create_from_file(
            relative_path=os.path.join(repodata_path,
                                       os.path.basename(sign_results["file"])),
            publication=publication,
            file=File(open(sign_results["file"], "rb")),
        )

        # publish a detached signature
        PublishedMetadata.create_from_file(
            relative_path=os.path.join(
                repodata_path, os.path.basename(sign_results["signature"])),
            publication=publication,
            file=File(open(sign_results["signature"], "rb")),
        )

        # publish a public key required for further verification
        PublishedMetadata.create_from_file(
            relative_path=os.path.join(repodata_path,
                                       os.path.basename(sign_results["key"])),
            publication=publication,
            file=File(open(sign_results["key"], "rb")),
        )
    else:
        PublishedMetadata.create_from_file(
            relative_path=os.path.join(repodata_path,
                                       os.path.basename(repomd_path)),
            publication=publication,
            file=File(open(repomd_path, "rb")),
        )
Beispiel #12
0
            h.setopt(librepo.LRO_CHECKSUM, True)
            h.setopt(librepo.LRO_PROGRESSCB, dl_callback)
            h.setopt(librepo.LRO_YUMDLIST, ["group", "primary"])
            h.setopt(librepo.LRO_INTERRUPTIBLE, True)
            h.perform(r)
            repo_info = r.getinfo(librepo.LRR_YUM_REPO)

            # Get primary
            primary_sack = hawkey.Sack()
            hk_repo = hawkey.Repo(repo)
            hk_repo.repomd_fn = repo_info['repomd']
            hk_repo.primary_fn = repo_info['primary']
            primary_sack.load_repo(hk_repo, load_filelists=False)

            # Get comps
            comps = libcomps.Comps()
            if 'group' in repo_info:
                ret = comps.fromxml_f(repo_info['group'])
                if ret == -1:
                    print('Error parsing')
                    break

            repos[repo] = (comps, primary_sack)

    expanded = 0
    in_packages = contents['packages']
    out_packages = set()
    for package in in_packages:
        if package.startswith('@'):
            # Evaluate
            package = package[1:]
Beispiel #13
0
    async def run(self):
        """
        Build `DeclarativeContent` from the repodata.
        """
        remote_url = self.new_url or self.remote.url
        remote_url = remote_url if remote_url[-1] == "/" else f"{remote_url}/"
        optimize_sync = self.optimize

        progress_data = dict(message='Downloading Metadata Files',
                             code='downloading.metadata')
        with ProgressReport(**progress_data) as metadata_pb:
            downloader = self.remote.get_downloader(
                url=urljoin(remote_url, 'repodata/repomd.xml'))
            # TODO: decide how to distinguish between a mirror list and a normal repo
            result = await downloader.run()
            metadata_pb.increment()

            repomd_path = result.path
            repomd = cr.Repomd(repomd_path)

            # Caution: we are not storing when the remote was last updated, so the order of this
            # logic must remain in this order where we first check the version number as other
            # changes than sync could have taken place such that the date or repo version will be
            # different from last sync
            if (optimize_sync and self.repository.last_sync_remote
                    and self.remote.pk == self.repository.last_sync_remote.pk
                    and (self.repository.last_sync_repo_version
                         == self.repository.latest_version().number)
                    and (self.remote.pulp_last_updated <=
                         self.repository.latest_version().pulp_created)
                    and is_previous_version(
                        repomd.revision,
                        self.repository.last_sync_revision_number)):
                optimize_data = dict(message='Optimizing Sync',
                                     code='optimizing.sync')
                with ProgressReport(**optimize_data) as optimize_pb:
                    optimize_pb.done = 1
                    optimize_pb.save()
                    return

            self.repository.last_sync_revision_number = repomd.revision

            if self.treeinfo:
                d_artifacts = [
                    DeclarativeArtifact(
                        artifact=Artifact(),
                        url=urljoin(remote_url, self.treeinfo["filename"]),
                        relative_path=".treeinfo",
                        remote=self.remote,
                        deferred_download=False,
                    )
                ]
                for path, checksum in self.treeinfo["download"][
                        "images"].items():
                    artifact = Artifact(**checksum)
                    da = DeclarativeArtifact(
                        artifact=artifact,
                        url=urljoin(remote_url, path),
                        relative_path=path,
                        remote=self.remote,
                        deferred_download=self.deferred_download)
                    d_artifacts.append(da)

                distribution_tree = DistributionTree(
                    **self.treeinfo["distribution_tree"])
                dc = DeclarativeContent(content=distribution_tree,
                                        d_artifacts=d_artifacts)
                dc.extra_data = self.treeinfo
                await self.put(dc)

            package_repodata_urls = {}
            downloaders = []
            modulemd_list = list()
            dc_groups = []
            dc_categories = []
            dc_environments = []
            nevra_to_module = defaultdict(dict)
            pkgname_to_groups = defaultdict(list)
            group_to_categories = defaultdict(list)
            group_to_environments = defaultdict(list)
            optionalgroup_to_environments = defaultdict(list)
            modulemd_results = None
            comps_downloader = None
            main_types = set()
            checksums = {}

            for record in repomd.records:
                checksums[record.type] = record.checksum_type.upper()
                if record.type in PACKAGE_REPODATA:
                    main_types.update([record.type])
                    package_repodata_urls[record.type] = urljoin(
                        remote_url, record.location_href)

                elif record.type in UPDATE_REPODATA:
                    updateinfo_url = urljoin(remote_url, record.location_href)
                    downloader = self.remote.get_downloader(url=updateinfo_url)
                    downloaders.append([downloader.run()])

                elif record.type in COMPS_REPODATA:
                    comps_url = urljoin(remote_url, record.location_href)
                    comps_downloader = self.remote.get_downloader(
                        url=comps_url)

                elif record.type in SKIP_REPODATA:
                    continue

                elif '_zck' in record.type:
                    continue

                elif record.type in MODULAR_REPODATA:
                    modules_url = urljoin(remote_url, record.location_href)
                    modulemd_downloader = self.remote.get_downloader(
                        url=modules_url)
                    modulemd_results = await modulemd_downloader.run()

                elif record.type not in PACKAGE_DB_REPODATA:
                    file_data = {
                        record.checksum_type: record.checksum,
                        "size": record.size
                    }
                    da = DeclarativeArtifact(
                        artifact=Artifact(**file_data),
                        url=urljoin(remote_url, record.location_href),
                        relative_path=record.location_href,
                        remote=self.remote,
                        deferred_download=False)
                    repo_metadata_file = RepoMetadataFile(
                        data_type=record.type,
                        checksum_type=record.checksum_type,
                        checksum=record.checksum,
                    )
                    dc = DeclarativeContent(content=repo_metadata_file,
                                            d_artifacts=[da])
                    await self.put(dc)

            missing_type = set(PACKAGE_REPODATA) - main_types
            if missing_type:
                raise FileNotFoundError(
                    _("XML file(s): {filename} not found").format(
                        filename=", ".join(missing_type)))

            self.repository.original_checksum_types = checksums

            # we have to sync module.yaml first if it exists, to make relations to packages
            if modulemd_results:
                modulemd_index = mmdlib.ModuleIndex.new()
                open_func = gzip.open if modulemd_results.url.endswith(
                    '.gz') else open
                with open_func(modulemd_results.path, 'r') as moduleyaml:
                    content = moduleyaml.read()
                    module_content = content if isinstance(
                        content, str) else content.decode()
                    modulemd_index.update_from_string(module_content, True)

                modulemd_names = modulemd_index.get_module_names() or []
                modulemd_all = parse_modulemd(modulemd_names, modulemd_index)

                # Parsing modules happens all at one time, and from here on no useful work happens.
                # So just report that it finished this stage.
                modulemd_pb_data = {
                    'message': 'Parsed Modulemd',
                    'code': 'parsing.modulemds'
                }
                with ProgressReport(**modulemd_pb_data) as modulemd_pb:
                    modulemd_total = len(modulemd_all)
                    modulemd_pb.total = modulemd_total
                    modulemd_pb.done = modulemd_total

                for modulemd in modulemd_all:
                    artifact = modulemd.pop('artifact')
                    relative_path = '{}{}{}{}{}snippet'.format(
                        modulemd[PULP_MODULE_ATTR.NAME],
                        modulemd[PULP_MODULE_ATTR.STREAM],
                        modulemd[PULP_MODULE_ATTR.VERSION],
                        modulemd[PULP_MODULE_ATTR.CONTEXT],
                        modulemd[PULP_MODULE_ATTR.ARCH])
                    da = DeclarativeArtifact(artifact=artifact,
                                             relative_path=relative_path,
                                             url=modules_url)
                    modulemd_content = Modulemd(**modulemd)
                    dc = DeclarativeContent(content=modulemd_content,
                                            d_artifacts=[da])
                    dc.extra_data = defaultdict(list)

                    # dc.content.artifacts are Modulemd artifacts
                    for artifact in dc.content.artifacts:
                        nevra_to_module.setdefault(artifact, set()).add(dc)
                    modulemd_list.append(dc)

                # delete list now that we're done with it for memory savings
                del modulemd_all

                modulemd_default_names = parse_defaults(modulemd_index)

                # Parsing module-defaults happens all at one time, and from here on no useful
                # work happens. So just report that it finished this stage.
                modulemd_defaults_pb_data = {
                    'message': 'Parsed Modulemd-defaults',
                    'code': 'parsing.modulemd_defaults'
                }
                with ProgressReport(
                        **modulemd_defaults_pb_data) as modulemd_defaults_pb:
                    modulemd_defaults_total = len(modulemd_default_names)
                    modulemd_defaults_pb.total = modulemd_defaults_total
                    modulemd_defaults_pb.done = modulemd_defaults_total

                for default in modulemd_default_names:
                    artifact = default.pop('artifact')
                    relative_path = '{}{}snippet'.format(
                        default[PULP_MODULEDEFAULTS_ATTR.MODULE],
                        default[PULP_MODULEDEFAULTS_ATTR.STREAM])
                    da = DeclarativeArtifact(artifact=artifact,
                                             relative_path=relative_path,
                                             url=modules_url)
                    default_content = ModulemdDefaults(**default)
                    dc = DeclarativeContent(content=default_content,
                                            d_artifacts=[da])
                    await self.put(dc)

                # delete list now that we're done with it for memory savings
                del modulemd_default_names

            if comps_downloader:
                comps_result = await comps_downloader.run()

                comps = libcomps.Comps()
                comps.fromxml_f(comps_result.path)

                with ProgressReport(message='Parsed Comps',
                                    code='parsing.comps') as comps_pb:
                    comps_total = (len(comps.groups) + len(comps.categories) +
                                   len(comps.environments))
                    comps_pb.total = comps_total
                    comps_pb.done = comps_total

                if comps.langpacks:
                    langpack_dict = PackageLangpacks.libcomps_to_dict(
                        comps.langpacks)
                    packagelangpack = PackageLangpacks(
                        matches=strdict_to_dict(comps.langpacks),
                        digest=dict_digest(langpack_dict))
                    dc = DeclarativeContent(content=packagelangpack)
                    dc.extra_data = defaultdict(list)
                    await self.put(dc)

                if comps.categories:
                    for category in comps.categories:
                        category_dict = PackageCategory.libcomps_to_dict(
                            category)
                        category_dict['digest'] = dict_digest(category_dict)
                        packagecategory = PackageCategory(**category_dict)
                        dc = DeclarativeContent(content=packagecategory)
                        dc.extra_data = defaultdict(list)

                        if packagecategory.group_ids:
                            for group_id in packagecategory.group_ids:
                                group_to_categories[group_id['name']].append(
                                    dc)
                        dc_categories.append(dc)

                if comps.environments:
                    for environment in comps.environments:
                        environment_dict = PackageEnvironment.libcomps_to_dict(
                            environment)
                        environment_dict['digest'] = dict_digest(
                            environment_dict)
                        packageenvironment = PackageEnvironment(
                            **environment_dict)
                        dc = DeclarativeContent(content=packageenvironment)
                        dc.extra_data = defaultdict(list)

                        if packageenvironment.option_ids:
                            for option_id in packageenvironment.option_ids:
                                optionalgroup_to_environments[
                                    option_id['name']].append(dc)

                        if packageenvironment.group_ids:
                            for group_id in packageenvironment.group_ids:
                                group_to_environments[group_id['name']].append(
                                    dc)

                        dc_environments.append(dc)

                if comps.groups:
                    for group in comps.groups:
                        group_dict = PackageGroup.libcomps_to_dict(group)
                        group_dict['digest'] = dict_digest(group_dict)
                        packagegroup = PackageGroup(**group_dict)
                        dc = DeclarativeContent(content=packagegroup)
                        dc.extra_data = defaultdict(list)

                        if packagegroup.packages:
                            for package in packagegroup.packages:
                                pkgname_to_groups[package['name']].append(dc)

                        if dc.content.id in group_to_categories.keys():
                            for dc_category in group_to_categories[
                                    dc.content.id]:
                                dc.extra_data['category_relations'].append(
                                    dc_category)
                                dc_category.extra_data['packagegroups'].append(
                                    dc)

                        if dc.content.id in group_to_environments.keys():
                            for dc_environment in group_to_environments[
                                    dc.content.id]:
                                dc.extra_data['environment_relations'].append(
                                    dc_environment)
                                dc_environment.extra_data[
                                    'packagegroups'].append(dc)

                        if dc.content.id in optionalgroup_to_environments.keys(
                        ):
                            for dc_environment in optionalgroup_to_environments[
                                    dc.content.id]:
                                dc.extra_data['env_relations_optional'].append(
                                    dc_environment)
                                dc_environment.extra_data[
                                    'optionalgroups'].append(dc)

                        dc_groups.append(dc)

                for dc_category in dc_categories:
                    await self.put(dc_category)

                for dc_environment in dc_environments:
                    await self.put(dc_environment)

            # delete lists now that we're done with them for memory savings
            del dc_environments
            del dc_categories

            # to preserve order, downloaders are created after all repodata urls are identified
            package_repodata_downloaders = []
            for repodata_type in PACKAGE_REPODATA:
                downloader = self.remote.get_downloader(
                    url=package_repodata_urls[repodata_type])
                package_repodata_downloaders.append(downloader.run())

            downloaders.append(package_repodata_downloaders)

            # asyncio.gather is used to preserve the order of results for package repodata
            pending = [
                asyncio.gather(*downloaders_group)
                for downloaders_group in downloaders
            ]

            while pending:
                done, pending = await asyncio.wait(
                    pending, return_when=asyncio.FIRST_COMPLETED)
                for downloader in done:
                    try:
                        results = downloader.result()
                    except ClientResponseError as exc:
                        raise HTTPNotFound(
                            reason=_("File not found: {filename}").format(
                                filename=exc.request_info.url))
                    if results[0].url == package_repodata_urls['primary']:
                        primary_xml_path = results[0].path
                        filelists_xml_path = results[1].path
                        other_xml_path = results[2].path
                        metadata_pb.done += 3
                        metadata_pb.save()

                        packages = await RpmFirstStage.parse_repodata(
                            primary_xml_path, filelists_xml_path,
                            other_xml_path)
                        # skip SRPM if defined
                        if 'srpm' in self.skip_types:
                            packages = {
                                pkgId: pkg
                                for pkgId, pkg in packages.items()
                                if pkg.arch != 'src'
                            }

                        progress_data = {
                            'message': 'Parsed Packages',
                            'code': 'parsing.packages',
                            'total': len(packages),
                        }
                        with ProgressReport(**progress_data) as packages_pb:
                            for pkg in packages.values():
                                package = Package(
                                    **Package.createrepo_to_dict(pkg))
                                artifact = Artifact(size=package.size_package)
                                checksum_type = getattr(
                                    CHECKSUM_TYPES,
                                    package.checksum_type.upper())
                                setattr(artifact, checksum_type, package.pkgId)
                                url = urljoin(remote_url,
                                              package.location_href)
                                filename = os.path.basename(
                                    package.location_href)
                                da = DeclarativeArtifact(
                                    artifact=artifact,
                                    url=url,
                                    relative_path=filename,
                                    remote=self.remote,
                                    deferred_download=self.deferred_download)
                                dc = DeclarativeContent(content=package,
                                                        d_artifacts=[da])
                                dc.extra_data = defaultdict(list)

                                # find if a package relates to a modulemd
                                if dc.content.nevra in nevra_to_module.keys():
                                    dc.content.is_modular = True
                                    for dc_modulemd in nevra_to_module[
                                            dc.content.nevra]:
                                        dc.extra_data[
                                            'modulemd_relation'].append(
                                                dc_modulemd)
                                        dc_modulemd.extra_data[
                                            'package_relation'].append(dc)

                                if dc.content.name in pkgname_to_groups.keys():
                                    for dc_group in pkgname_to_groups[
                                            dc.content.name]:
                                        dc.extra_data[
                                            'group_relations'].append(dc_group)
                                        dc_group.extra_data[
                                            'related_packages'].append(dc)

                                packages_pb.increment()
                                await self.put(dc)

                    elif results[0].url == updateinfo_url:
                        updateinfo_xml_path = results[0].path
                        metadata_pb.increment()

                        updates = await RpmFirstStage.parse_updateinfo(
                            updateinfo_xml_path)

                        progress_data = {
                            'message': 'Parsed Advisories',
                            'code': 'parsing.advisories',
                            'total': len(updates),
                        }
                        with ProgressReport(**progress_data) as advisories_pb:
                            for update in updates:
                                update_record = UpdateRecord(
                                    **UpdateRecord.createrepo_to_dict(update))
                                update_record.digest = hash_update_record(
                                    update)
                                future_relations = {
                                    'collections': defaultdict(list),
                                    'references': []
                                }

                                for collection in update.collections:
                                    coll_dict = UpdateCollection.createrepo_to_dict(
                                        collection)
                                    coll = UpdateCollection(**coll_dict)

                                    for package in collection.packages:
                                        pkg_dict = UpdateCollectionPackage.createrepo_to_dict(
                                            package)
                                        pkg = UpdateCollectionPackage(
                                            **pkg_dict)
                                        future_relations['collections'][
                                            coll].append(pkg)

                                for reference in update.references:
                                    reference_dict = UpdateReference.createrepo_to_dict(
                                        reference)
                                    ref = UpdateReference(**reference_dict)
                                    future_relations['references'].append(ref)

                                advisories_pb.increment()
                                dc = DeclarativeContent(content=update_record)
                                dc.extra_data = future_relations
                                await self.put(dc)

            # now send modules down the pipeline since all relations have been set up
            for modulemd in modulemd_list:
                await self.put(modulemd)

            for dc_group in dc_groups:
                await self.put(dc_group)
Beispiel #14
0
def create_repomd_xml(content, publication, extra_repomdrecords, sub_folder=None):
    """
    Creates a repomd.xml file.

    Args:
        content(app.models.Content): content set
        publication(pulpcore.plugin.models.Publication): the publication
        extra_repomdrecords(list): list with data relative to repo metadata files
        sub_folder(str): name of the folder for sub repos

    """
    cwd = os.getcwd()
    repodata_path = REPODATA_PATH
    has_modules = False
    has_comps = False

    if sub_folder:
        cwd = os.path.join(cwd, sub_folder)
        repodata_path = os.path.join(sub_folder, repodata_path)

    # Prepare metadata files
    repomd_path = os.path.join(cwd, "repomd.xml")
    pri_xml_path = os.path.join(cwd, "primary.xml.gz")
    fil_xml_path = os.path.join(cwd, "filelists.xml.gz")
    oth_xml_path = os.path.join(cwd, "other.xml.gz")
    pri_db_path = os.path.join(cwd, "primary.sqlite")
    fil_db_path = os.path.join(cwd, "filelists.sqlite")
    oth_db_path = os.path.join(cwd, "other.sqlite")
    upd_xml_path = os.path.join(cwd, "updateinfo.xml.gz")
    mod_yml_path = os.path.join(cwd, "modules.yaml")
    comps_xml_path = os.path.join(cwd, "comps.xml")

    pri_xml = cr.PrimaryXmlFile(pri_xml_path)
    fil_xml = cr.FilelistsXmlFile(fil_xml_path)
    oth_xml = cr.OtherXmlFile(oth_xml_path)
    pri_db = cr.PrimarySqlite(pri_db_path)
    fil_db = cr.FilelistsSqlite(fil_db_path)
    oth_db = cr.OtherSqlite(oth_db_path)
    upd_xml = cr.UpdateInfoXmlFile(upd_xml_path)

    packages = Package.objects.filter(pk__in=content)
    total_packages = packages.count()

    pri_xml.set_num_of_pkgs(total_packages)
    fil_xml.set_num_of_pkgs(total_packages)
    oth_xml.set_num_of_pkgs(total_packages)

    # Process all packages
    for package in packages.iterator():
        pkg = package.to_createrepo_c()
        pkg.location_href = package.contentartifact_set.only('relative_path').first().relative_path
        pri_xml.add_pkg(pkg)
        fil_xml.add_pkg(pkg)
        oth_xml.add_pkg(pkg)
        pri_db.add_pkg(pkg)
        fil_db.add_pkg(pkg)
        oth_db.add_pkg(pkg)

    # Process update records
    for update_record in UpdateRecord.objects.filter(pk__in=content).iterator():
        upd_xml.add_chunk(cr.xml_dump_updaterecord(update_record.to_createrepo_c()))

    # Process modulemd and modulemd_defaults
    with open(mod_yml_path, 'ab') as mod_yml:
        for modulemd in Modulemd.objects.filter(pk__in=content).iterator():
            mod_yml.write(modulemd._artifacts.get().file.read())
            has_modules = True
        for default in ModulemdDefaults.objects.filter(pk__in=content).iterator():
            mod_yml.write(default._artifacts.get().file.read())
            has_modules = True

    # Process comps
    comps = libcomps.Comps()
    for pkg_grp in PackageGroup.objects.filter(pk__in=content).iterator():
        group = pkg_grp.pkg_grp_to_libcomps()
        comps.groups.append(group)
        has_comps = True
    for pkg_cat in PackageCategory.objects.filter(pk__in=content).iterator():
        cat = pkg_cat.pkg_cat_to_libcomps()
        comps.categories.append(cat)
        has_comps = True
    for pkg_env in PackageEnvironment.objects.filter(pk__in=content).iterator():
        env = pkg_env.pkg_env_to_libcomps()
        comps.environments.append(env)
        has_comps = True
    for pkg_lng in PackageLangpacks.objects.filter(pk__in=content).iterator():
        comps.langpacks = dict_to_strdict(pkg_lng.matches)
        has_comps = True

    comps.toxml_f(comps_xml_path, xml_options={"default_explicit": True,
                                               "empty_groups": True,
                                               "uservisible_explicit": True})

    pri_xml.close()
    fil_xml.close()
    oth_xml.close()
    upd_xml.close()

    repomd = cr.Repomd()

    repomdrecords = [("primary", pri_xml_path, pri_db),
                     ("filelists", fil_xml_path, fil_db),
                     ("other", oth_xml_path, oth_db),
                     ("primary_db", pri_db_path, None),
                     ("filelists_db", fil_db_path, None),
                     ("other_db", oth_db_path, None),
                     ("updateinfo", upd_xml_path, None)]

    if has_modules:
        repomdrecords.append(("modules", mod_yml_path, None))

    if has_comps:
        repomdrecords.append(("group", comps_xml_path, None))

    repomdrecords.extend(extra_repomdrecords)

    sqlite_files = ("primary_db", "filelists_db", "other_db")
    for name, path, db_to_update in repomdrecords:
        record = cr.RepomdRecord(name, path)
        if name in sqlite_files:
            record_bz = record.compress_and_fill(cr.SHA256, cr.BZ2)
            record_bz.type = name
            record_bz.rename_file()
            path = record_bz.location_href.split('/')[-1]
            repomd.set_record(record_bz)
        else:
            record.fill(cr.SHA256)
            if (db_to_update):
                db_to_update.dbinfo_update(record.checksum)
                db_to_update.close()
            record.rename_file()
            path = record.location_href.split('/')[-1]
            repomd.set_record(record)

        if sub_folder:
            path = os.path.join(sub_folder, path)

        PublishedMetadata.create_from_file(
            relative_path=os.path.join(repodata_path, os.path.basename(path)),
            publication=publication,
            file=File(open(path, 'rb'))
        )

    with open(repomd_path, "w") as repomd_f:
        repomd_f.write(repomd.xml_dump())

    PublishedMetadata.create_from_file(
        relative_path=os.path.join(repodata_path, os.path.basename(repomd_path)),
        publication=publication,
        file=File(open(repomd_path, 'rb'))
    )
Beispiel #15
0
    async def run(self):
        """
        Build `DeclarativeContent` from the repodata.
        """
        packages_pb = ProgressReport(message='Parsed Packages',
                                     code='parsing.packages')
        errata_pb = ProgressReport(message='Parsed Erratum',
                                   code='parsing.errata')
        modulemd_pb = ProgressReport(message='Parse Modulemd',
                                     code='parsing.modulemds')
        modulemd_defaults_pb = ProgressReport(
            message='Parse Modulemd-defaults', code='parsing.modulemddefaults')
        comps_pb = ProgressReport(message='Parsed Comps', code='parsing.comps')

        packages_pb.save()
        errata_pb.save()
        comps_pb.save()

        remote_url = self.new_url or self.remote.url
        remote_url = remote_url if remote_url[-1] == "/" else f"{remote_url}/"

        progress_data = dict(message='Downloading Metadata Files',
                             code='downloading.metadata')
        with ProgressReport(**progress_data) as metadata_pb:
            downloader = self.remote.get_downloader(
                url=urljoin(remote_url, 'repodata/repomd.xml'))
            # TODO: decide how to distinguish between a mirror list and a normal repo
            result = await downloader.run()
            metadata_pb.increment()

            if self.kickstart:
                d_artifacts = []
                for path, checksum in self.kickstart["download"][
                        "images"].items():
                    artifact = Artifact(**checksum)

                    da = DeclarativeArtifact(
                        artifact=artifact,
                        url=urljoin(remote_url, path),
                        relative_path=path,
                        remote=self.remote,
                        deferred_download=self.deferred_download)

                    d_artifacts.append(da)

                distribution_tree = DistributionTree(
                    **self.kickstart["distribution_tree"])
                dc = DeclarativeContent(content=distribution_tree,
                                        d_artifacts=d_artifacts)
                dc.extra_data = self.kickstart
                await self.put(dc)

            repomd_path = result.path
            repomd = cr.Repomd(repomd_path)
            package_repodata_urls = {}
            downloaders = []
            modulemd_list = list()
            dc_groups = []
            dc_categories = []
            dc_environments = []
            nevra_to_module = defaultdict(dict)
            pkgname_to_groups = defaultdict(list)
            group_to_categories = defaultdict(list)
            group_to_environments = defaultdict(list)
            optionalgroup_to_environments = defaultdict(list)
            modulemd_results = None
            comps_downloader = None

            for record in repomd.records:
                if record.type in PACKAGE_REPODATA:
                    package_repodata_urls[record.type] = urljoin(
                        remote_url, record.location_href)
                elif record.type in UPDATE_REPODATA:
                    updateinfo_url = urljoin(remote_url, record.location_href)
                    downloader = self.remote.get_downloader(url=updateinfo_url)
                    downloaders.append([downloader.run()])

                elif record.type in COMPS_REPODATA:
                    comps_url = urljoin(remote_url, record.location_href)
                    comps_downloader = self.remote.get_downloader(
                        url=comps_url)

                elif record.type in SKIP_REPODATA:
                    continue

                elif record.type in MODULAR_REPODATA:
                    modules_url = urljoin(remote_url, record.location_href)
                    modulemd_downloader = self.remote.get_downloader(
                        url=modules_url)
                    modulemd_results = await modulemd_downloader.run()

                elif record.type not in PACKAGE_DB_REPODATA:
                    file_data = {
                        record.checksum_type: record.checksum,
                        "size": record.size
                    }
                    da = DeclarativeArtifact(
                        artifact=Artifact(**file_data),
                        url=urljoin(remote_url, record.location_href),
                        relative_path=record.location_href,
                        remote=self.remote,
                        deferred_download=False)
                    repo_metadata_file = RepoMetadataFile(
                        data_type=record.type,
                        checksum_type=record.checksum_type,
                        checksum=record.checksum,
                    )
                    dc = DeclarativeContent(content=repo_metadata_file,
                                            d_artifacts=[da])
                    await self.put(dc)

            # we have to sync module.yaml first if it exists, to make relations to packages
            if modulemd_results:
                modulemd_index = mmdlib.ModuleIndex.new()
                open_func = gzip.open if modulemd_results.url.endswith(
                    '.gz') else open
                with open_func(modulemd_results.path, 'r') as moduleyaml:
                    modulemd_index.update_from_string(
                        moduleyaml.read().decode(), True)

                modulemd_names = modulemd_index.get_module_names() or []
                modulemd_all = parse_modulemd(modulemd_names, modulemd_index)

                modulemd_pb.total = len(modulemd_all)
                modulemd_pb.state = 'running'
                modulemd_pb.save()

                for modulemd in modulemd_all:
                    artifact = modulemd.pop('artifact')
                    relative_path = '{}{}{}{}{}snippet'.format(
                        modulemd[PULP_MODULE_ATTR.NAME],
                        modulemd[PULP_MODULE_ATTR.STREAM],
                        modulemd[PULP_MODULE_ATTR.VERSION],
                        modulemd[PULP_MODULE_ATTR.CONTEXT],
                        modulemd[PULP_MODULE_ATTR.ARCH])
                    da = DeclarativeArtifact(artifact=artifact,
                                             relative_path=relative_path,
                                             url=modules_url)
                    modulemd_content = Modulemd(**modulemd)
                    dc = DeclarativeContent(content=modulemd_content,
                                            d_artifacts=[da])
                    dc.extra_data = defaultdict(list)

                    # dc.content.artifacts are Modulemd artifacts
                    for artifact in json.loads(dc.content.artifacts):
                        nevra_to_module.setdefault(artifact, set()).add(dc)
                    modulemd_list.append(dc)

                modulemd_default_names = parse_defaults(modulemd_index)

                modulemd_defaults_pb.total = len(modulemd_default_names)
                modulemd_defaults_pb.state = 'running'
                modulemd_defaults_pb.save()

                for default in modulemd_default_names:
                    artifact = default.pop('artifact')
                    relative_path = '{}{}snippet'.format(
                        default[PULP_MODULEDEFAULTS_ATTR.MODULE],
                        default[PULP_MODULEDEFAULTS_ATTR.STREAM])
                    da = DeclarativeArtifact(artifact=artifact,
                                             relative_path=relative_path,
                                             url=modules_url)
                    default_content = ModulemdDefaults(**default)
                    modulemd_defaults_pb.increment()
                    dc = DeclarativeContent(content=default_content,
                                            d_artifacts=[da])
                    await self.put(dc)

            if comps_downloader:
                comps_result = await comps_downloader.run()

                comps = libcomps.Comps()
                comps.fromxml_f(comps_result.path)

                comps_pb.total = (len(comps.groups) + len(comps.categories) +
                                  len(comps.environments))
                comps_pb.state = 'running'
                comps_pb.save()

                if comps.langpacks:
                    langpack_dict = PackageLangpacks.libcomps_to_dict(
                        comps.langpacks)
                    packagelangpack = PackageLangpacks(
                        matches=strdict_to_dict(comps.langpacks),
                        digest=dict_digest(langpack_dict))
                    dc = DeclarativeContent(content=packagelangpack)
                    dc.extra_data = defaultdict(list)
                    await self.put(dc)

                if comps.categories:
                    for category in comps.categories:
                        category_dict = PackageCategory.libcomps_to_dict(
                            category)
                        category_dict['digest'] = dict_digest(category_dict)
                        packagecategory = PackageCategory(**category_dict)
                        dc = DeclarativeContent(content=packagecategory)
                        dc.extra_data = defaultdict(list)

                        if packagecategory.group_ids:
                            for group_id in packagecategory.group_ids:
                                group_to_categories[group_id['name']].append(
                                    dc)
                        dc_categories.append(dc)

                if comps.environments:
                    for environment in comps.environments:
                        environment_dict = PackageEnvironment.libcomps_to_dict(
                            environment)
                        environment_dict['digest'] = dict_digest(
                            environment_dict)
                        packageenvironment = PackageEnvironment(
                            **environment_dict)
                        dc = DeclarativeContent(content=packageenvironment)
                        dc.extra_data = defaultdict(list)

                        if packageenvironment.option_ids:
                            for option_id in packageenvironment.option_ids:
                                optionalgroup_to_environments[
                                    option_id['name']].append(dc)

                        if packageenvironment.group_ids:
                            for group_id in packageenvironment.group_ids:
                                group_to_environments[group_id['name']].append(
                                    dc)

                        dc_environments.append(dc)

                if comps.groups:
                    for group in comps.groups:
                        group_dict = PackageGroup.libcomps_to_dict(group)
                        group_dict['digest'] = dict_digest(group_dict)
                        packagegroup = PackageGroup(**group_dict)
                        dc = DeclarativeContent(content=packagegroup)
                        dc.extra_data = defaultdict(list)

                        if packagegroup.packages:
                            for package in packagegroup.packages:
                                pkgname_to_groups[package['name']].append(dc)

                        if dc.content.id in group_to_categories.keys():
                            for dc_category in group_to_categories[
                                    dc.content.id]:
                                dc.extra_data['category_relations'].append(
                                    dc_category)
                                dc_category.extra_data['packagegroups'].append(
                                    dc)

                        if dc.content.id in group_to_environments.keys():
                            for dc_environment in group_to_environments[
                                    dc.content.id]:
                                dc.extra_data['environment_relations'].append(
                                    dc_environment)
                                dc_environment.extra_data[
                                    'packagegroups'].append(dc)

                        if dc.content.id in optionalgroup_to_environments.keys(
                        ):
                            for dc_environment in optionalgroup_to_environments[
                                    dc.content.id]:
                                dc.extra_data['env_relations_optional'].append(
                                    dc_environment)
                                dc_environment.extra_data[
                                    'optionalgroups'].append(dc)

                        dc_groups.append(dc)

                for dc_category in dc_categories:
                    comps_pb.increment()
                    await self.put(dc_category)

                for dc_environment in dc_environments:
                    comps_pb.increment()
                    await self.put(dc_environment)

            # to preserve order, downloaders are created after all repodata urls are identified
            package_repodata_downloaders = []
            for repodata_type in PACKAGE_REPODATA:
                downloader = self.remote.get_downloader(
                    url=package_repodata_urls[repodata_type])
                package_repodata_downloaders.append(downloader.run())

            downloaders.append(package_repodata_downloaders)

            # asyncio.gather is used to preserve the order of results for package repodata
            pending = [
                asyncio.gather(*downloaders_group)
                for downloaders_group in downloaders
            ]

            while pending:
                done, pending = await asyncio.wait(
                    pending, return_when=asyncio.FIRST_COMPLETED)
                for downloader in done:
                    results = downloader.result()
                    if results[0].url == package_repodata_urls['primary']:
                        primary_xml_path = results[0].path
                        filelists_xml_path = results[1].path
                        other_xml_path = results[2].path
                        metadata_pb.done += 3
                        metadata_pb.save()

                        packages = await RpmFirstStage.parse_repodata(
                            primary_xml_path, filelists_xml_path,
                            other_xml_path)
                        packages_pb.total = len(packages)
                        packages_pb.state = 'running'
                        packages_pb.save()

                        for pkg in packages.values():
                            package = Package(
                                **Package.createrepo_to_dict(pkg))
                            artifact = Artifact(size=package.size_package)
                            checksum_type = getattr(
                                CHECKSUM_TYPES, package.checksum_type.upper())
                            setattr(artifact, checksum_type, package.pkgId)
                            url = urljoin(remote_url, package.location_href)
                            filename = os.path.basename(package.location_href)
                            da = DeclarativeArtifact(
                                artifact=artifact,
                                url=url,
                                relative_path=filename,
                                remote=self.remote,
                                deferred_download=self.deferred_download)
                            dc = DeclarativeContent(content=package,
                                                    d_artifacts=[da])
                            dc.extra_data = defaultdict(list)

                            # find if a package relates to a modulemd
                            if dc.content.nevra in nevra_to_module.keys():
                                dc.content.is_modular = True
                                for dc_modulemd in nevra_to_module[
                                        dc.content.nevra]:
                                    dc.extra_data['modulemd_relation'].append(
                                        dc_modulemd)
                                    dc_modulemd.extra_data[
                                        'package_relation'].append(dc)

                            if dc.content.name in pkgname_to_groups.keys():
                                for dc_group in pkgname_to_groups[
                                        dc.content.name]:
                                    dc.extra_data['group_relations'].append(
                                        dc_group)
                                    dc_group.extra_data[
                                        'related_packages'].append(dc)

                            packages_pb.increment()
                            await self.put(dc)

                    elif results[0].url == updateinfo_url:
                        updateinfo_xml_path = results[0].path
                        metadata_pb.increment()

                        updates = await RpmFirstStage.parse_updateinfo(
                            updateinfo_xml_path)

                        errata_pb.total = len(updates)
                        errata_pb.state = 'running'
                        errata_pb.save()

                        for update in updates:
                            update_record = UpdateRecord(
                                **UpdateRecord.createrepo_to_dict(update))
                            update_record.digest = RpmFirstStage.hash_update_record(
                                update)
                            future_relations = {
                                'collections': defaultdict(list),
                                'references': []
                            }

                            for collection in update.collections:
                                coll_dict = UpdateCollection.createrepo_to_dict(
                                    collection)
                                coll = UpdateCollection(**coll_dict)

                                for package in collection.packages:
                                    pkg_dict = UpdateCollectionPackage.createrepo_to_dict(
                                        package)
                                    pkg = UpdateCollectionPackage(**pkg_dict)
                                    future_relations['collections'][
                                        coll].append(pkg)

                            for reference in update.references:
                                reference_dict = UpdateReference.createrepo_to_dict(
                                    reference)
                                ref = UpdateReference(**reference_dict)
                                future_relations['references'].append(ref)

                            errata_pb.increment()
                            dc = DeclarativeContent(content=update_record)
                            dc.extra_data = future_relations
                            await self.put(dc)

            # now send modules down the pipeline since all relations have been set up
            for modulemd in modulemd_list:
                modulemd_pb.increment()
                await self.put(modulemd)

            for dc_group in dc_groups:
                comps_pb.increment()
                await self.put(dc_group)

        packages_pb.state = 'completed'
        errata_pb.state = 'completed'
        modulemd_pb.state = 'completed'
        modulemd_defaults_pb.state = 'completed'
        comps_pb.state = 'completed'
        packages_pb.save()
        errata_pb.save()
        modulemd_pb.save()
        modulemd_defaults_pb.save()
        comps_pb.save()
Beispiel #16
0
def create_repomd_xml(content,
                      publication,
                      checksum_types,
                      extra_repomdrecords,
                      sub_folder=None,
                      metadata_signing_service=None):
    """
    Creates a repomd.xml file.

    Args:
        content(app.models.Content): content set
        publication(pulpcore.plugin.models.Publication): the publication
        extra_repomdrecords(list): list with data relative to repo metadata files
        sub_folder(str): name of the folder for sub repos
        metadata_signing_service (pulpcore.app.models.AsciiArmoredDetachedSigningService):
            A reference to an associated signing service.

    """
    cwd = os.getcwd()
    repodata_path = REPODATA_PATH
    has_modules = False
    has_comps = False
    package_checksum_type = checksum_types.get("package")

    if sub_folder:
        cwd = os.path.join(cwd, sub_folder)
        repodata_path = os.path.join(sub_folder, repodata_path)

    # Prepare metadata files
    repomd_path = os.path.join(cwd, "repomd.xml")
    pri_xml_path = os.path.join(cwd, "primary.xml.gz")
    fil_xml_path = os.path.join(cwd, "filelists.xml.gz")
    oth_xml_path = os.path.join(cwd, "other.xml.gz")
    pri_db_path = os.path.join(cwd, "primary.sqlite")
    fil_db_path = os.path.join(cwd, "filelists.sqlite")
    oth_db_path = os.path.join(cwd, "other.sqlite")
    upd_xml_path = os.path.join(cwd, "updateinfo.xml.gz")
    mod_yml_path = os.path.join(cwd, "modules.yaml")
    comps_xml_path = os.path.join(cwd, "comps.xml")

    pri_xml = cr.PrimaryXmlFile(pri_xml_path)
    fil_xml = cr.FilelistsXmlFile(fil_xml_path)
    oth_xml = cr.OtherXmlFile(oth_xml_path)
    pri_db = cr.PrimarySqlite(pri_db_path)
    fil_db = cr.FilelistsSqlite(fil_db_path)
    oth_db = cr.OtherSqlite(oth_db_path)
    upd_xml = cr.UpdateInfoXmlFile(upd_xml_path)

    packages = Package.objects.filter(pk__in=content)
    total_packages = packages.count()

    pri_xml.set_num_of_pkgs(total_packages)
    fil_xml.set_num_of_pkgs(total_packages)
    oth_xml.set_num_of_pkgs(total_packages)

    # Process all packages
    for package in packages.iterator():
        pkg = package.to_createrepo_c(package_checksum_type)
        pkg_filename = os.path.basename(package.location_href)
        # this can cause an issue when two same RPM package names appears
        # a/name1.rpm b/name1.rpm
        pkg.location_href = os.path.join(PACKAGES_DIRECTORY,
                                         pkg_filename[0].lower(), pkg_filename)
        pri_xml.add_pkg(pkg)
        fil_xml.add_pkg(pkg)
        oth_xml.add_pkg(pkg)
        pri_db.add_pkg(pkg)
        fil_db.add_pkg(pkg)
        oth_db.add_pkg(pkg)

    # Process update records
    for update_record in UpdateRecord.objects.filter(
            pk__in=content).iterator():
        upd_xml.add_chunk(
            cr.xml_dump_updaterecord(update_record.to_createrepo_c()))

    # Process modulemd and modulemd_defaults
    with open(mod_yml_path, 'ab') as mod_yml:
        for modulemd in Modulemd.objects.filter(pk__in=content).iterator():
            mod_yml.write(modulemd._artifacts.get().file.read())
            has_modules = True
        for default in ModulemdDefaults.objects.filter(
                pk__in=content).iterator():
            mod_yml.write(default._artifacts.get().file.read())
            has_modules = True

    # Process comps
    comps = libcomps.Comps()
    for pkg_grp in PackageGroup.objects.filter(pk__in=content).iterator():
        group = pkg_grp.pkg_grp_to_libcomps()
        comps.groups.append(group)
        has_comps = True
    for pkg_cat in PackageCategory.objects.filter(pk__in=content).iterator():
        cat = pkg_cat.pkg_cat_to_libcomps()
        comps.categories.append(cat)
        has_comps = True
    for pkg_env in PackageEnvironment.objects.filter(
            pk__in=content).iterator():
        env = pkg_env.pkg_env_to_libcomps()
        comps.environments.append(env)
        has_comps = True
    for pkg_lng in PackageLangpacks.objects.filter(pk__in=content).iterator():
        comps.langpacks = dict_to_strdict(pkg_lng.matches)
        has_comps = True

    comps.toxml_f(comps_xml_path,
                  xml_options={
                      "default_explicit": True,
                      "empty_groups": True,
                      "uservisible_explicit": True
                  })

    pri_xml.close()
    fil_xml.close()
    oth_xml.close()
    upd_xml.close()

    repomd = cr.Repomd()

    repomdrecords = [("primary", pri_xml_path, pri_db),
                     ("filelists", fil_xml_path, fil_db),
                     ("other", oth_xml_path, oth_db),
                     ("primary_db", pri_db_path, None),
                     ("filelists_db", fil_db_path, None),
                     ("other_db", oth_db_path, None),
                     ("updateinfo", upd_xml_path, None)]

    if has_modules:
        repomdrecords.append(("modules", mod_yml_path, None))

    if has_comps:
        repomdrecords.append(("group", comps_xml_path, None))

    repomdrecords.extend(extra_repomdrecords)

    sqlite_files = ("primary_db", "filelists_db", "other_db")
    for name, path, db_to_update in repomdrecords:
        record = cr.RepomdRecord(name, path)
        checksum_type = get_checksum_type(name, checksum_types)
        if name in sqlite_files:
            record_bz = record.compress_and_fill(checksum_type, cr.BZ2)
            record_bz.type = name
            record_bz.rename_file()
            path = record_bz.location_href.split('/')[-1]
            repomd.set_record(record_bz)
        else:
            record.fill(checksum_type)
            if (db_to_update):
                db_to_update.dbinfo_update(record.checksum)
                db_to_update.close()
            record.rename_file()
            path = record.location_href.split('/')[-1]
            repomd.set_record(record)

        if sub_folder:
            path = os.path.join(sub_folder, path)

        PublishedMetadata.create_from_file(relative_path=os.path.join(
            repodata_path, os.path.basename(path)),
                                           publication=publication,
                                           file=File(open(path, 'rb')))

    with open(repomd_path, "w") as repomd_f:
        repomd_f.write(repomd.xml_dump())

    if metadata_signing_service:
        signing_service = AsciiArmoredDetachedSigningService.objects.get(
            pk=metadata_signing_service.pk)
        sign_results = signing_service.sign(repomd_path)

        # publish a signed file
        PublishedMetadata.create_from_file(
            relative_path=os.path.join(repodata_path,
                                       os.path.basename(sign_results['file'])),
            publication=publication,
            file=File(open(sign_results['file'], 'rb')))

        # publish a detached signature
        PublishedMetadata.create_from_file(relative_path=os.path.join(
            repodata_path, os.path.basename(sign_results['signature'])),
                                           publication=publication,
                                           file=File(
                                               open(sign_results['signature'],
                                                    'rb')))

        # publish a public key required for further verification
        PublishedMetadata.create_from_file(
            relative_path=os.path.join(repodata_path,
                                       os.path.basename(sign_results['key'])),
            publication=publication,
            file=File(open(sign_results['key'], 'rb')))
    else:
        PublishedMetadata.create_from_file(relative_path=os.path.join(
            repodata_path, os.path.basename(repomd_path)),
                                           publication=publication,
                                           file=File(open(repomd_path, 'rb')))
Beispiel #17
0
def sanity_check_repodata(myurl, repo_type):
    """
    Sanity check the repodata for a given repository.

    Args:
        myurl (str): A path to a repodata directory.
        repo_type (str): This should be set to 'yum' for Yum repositories, 'module' for module
            repositories, or 'source' for source repositories.
    Raises:
        RepodataException: If the repodata is not valid or does not exist.
        ValueError: If repo_type is not an acceptable value.
    """
    if repo_type not in ('module', 'source', 'yum'):
        raise ValueError('repo_type must be one of module, source, or yum.')

    with tempfile.TemporaryDirectory(prefix='bodhi_repotest_') as tmpdir:
        os.mkdir(os.path.join(tmpdir, 'lrodir'))

        h = librepo.Handle()
        h.setopt(librepo.LRO_REPOTYPE, librepo.LR_YUMREPO)
        h.setopt(librepo.LRO_DESTDIR, os.path.join(tmpdir, 'lrodir'))

        if myurl[-1] != '/':
            myurl += '/'
        if myurl.endswith('repodata/'):
            myurl = myurl.replace('repodata/', '')

        h.setopt(librepo.LRO_URLS, [myurl])
        h.setopt(librepo.LRO_LOCAL, True)
        h.setopt(librepo.LRO_CHECKSUM, True)
        h.setopt(librepo.LRO_IGNOREMISSING, False)
        r = librepo.Result()
        try:
            h.perform(r)
        except librepo.LibrepoException as e:
            rc, msg, general_msg = e.args
            raise RepodataException(msg)

        repo_info = r.getinfo(librepo.LRR_YUM_REPO)
        reqparts = ['filelists', 'primary', 'repomd', 'updateinfo']
        # Source and module repos don't have DRPMs.
        if repo_type == 'yum':
            reqparts.append('prestodelta')
            reqparts.append('group')
        elif repo_type == 'module':
            reqparts.append('modules')
        missing = []
        for part in reqparts:
            if part not in repo_info:
                missing.append(part)
        if missing:
            raise RepodataException(f'Required parts not in repomd.xml: {", ".join(missing)}')

        # Only yum repos have comps
        if repo_type == 'yum':
            # Test comps
            comps = libcomps.Comps()
            try:
                ret = comps.fromxml_f(repo_info['group'])
            except Exception:
                raise RepodataException('Comps file unable to be parsed')
            if len(comps.groups) < 1:
                raise RepodataException('Comps file empty')

        # Test updateinfo
        ret = subprocess.call(['zgrep', '<id/>', repo_info['updateinfo']])
        if not ret:
            raise RepodataException('updateinfo.xml.gz contains empty ID tags')

        # Now call out to DNF to check if the repo is usable
        # "tests" is a list of tuples with (dnf args, expected output) to run.
        # For every test, DNF is run with the arguments, and if the expected output is not found,
        #  an error is raised.
        tests = []

        if repo_type in ('yum', 'source'):
            tests.append((['list', 'available'], 'testrepo'))
        else:  # repo_type == 'module', verified above
            tests.append((['module', 'list'], '.*'))

        for test in tests:
            dnfargs, expout = test

            # Make sure every DNF test runs in a new temp dir
            testdir = tempfile.mkdtemp(dir=tmpdir)
            output = sanity_check_repodata_dnf(testdir, myurl, *dnfargs)
            if (expout == ".*" and len(output.strip()) != 0) or (expout in output):
                continue
            else:
                raise RepodataException(
                    "DNF did not return expected output when running test!"
                    + f" Test: {dnfargs}, expected: {expout}, output: {output}")
Beispiel #18
0
def generate_repo_metadata(
    content,
    publication,
    checksum_types,
    extra_repomdrecords,
    sub_folder=None,
    metadata_signing_service=None,
):
    """
    Creates a repomd.xml file.

    Args:
        content(app.models.Content): content set
        publication(pulpcore.plugin.models.Publication): the publication
        extra_repomdrecords(list): list with data relative to repo metadata files
        sub_folder(str): name of the folder for sub repos
        metadata_signing_service (pulpcore.app.models.AsciiArmoredDetachedSigningService):
            A reference to an associated signing service.

    """
    cwd = os.getcwd()
    repodata_path = REPODATA_PATH
    has_modules = False
    has_comps = False
    package_checksum_type = checksum_types.get("package")

    if sub_folder:
        cwd = os.path.join(cwd, sub_folder)
        repodata_path = os.path.join(sub_folder, repodata_path)

    if package_checksum_type and package_checksum_type not in settings.ALLOWED_CONTENT_CHECKSUMS:
        raise ValueError(
            "Repository contains disallowed package checksum type '{}', "
            "thus can't be published. {}".format(package_checksum_type,
                                                 ALLOWED_CHECKSUM_ERROR_MSG))

    # Prepare metadata files
    repomd_path = os.path.join(cwd, "repomd.xml")
    pri_xml_path = os.path.join(cwd, "primary.xml.gz")
    fil_xml_path = os.path.join(cwd, "filelists.xml.gz")
    oth_xml_path = os.path.join(cwd, "other.xml.gz")
    upd_xml_path = os.path.join(cwd, "updateinfo.xml.gz")
    mod_yml_path = os.path.join(cwd, "modules.yaml")
    comps_xml_path = os.path.join(cwd, "comps.xml")

    pri_xml = cr.PrimaryXmlFile(pri_xml_path)
    fil_xml = cr.FilelistsXmlFile(fil_xml_path)
    oth_xml = cr.OtherXmlFile(oth_xml_path)
    upd_xml = cr.UpdateInfoXmlFile(upd_xml_path)

    if publication.sqlite_metadata:
        pri_db_path = os.path.join(cwd, "primary.sqlite")
        fil_db_path = os.path.join(cwd, "filelists.sqlite")
        oth_db_path = os.path.join(cwd, "other.sqlite")
        pri_db = cr.PrimarySqlite(pri_db_path)
        fil_db = cr.FilelistsSqlite(fil_db_path)
        oth_db = cr.OtherSqlite(oth_db_path)

    packages = Package.objects.filter(pk__in=content)

    # We want to support publishing with a different checksum type than the one built-in to the
    # package itself, so we need to get the correct checksums somehow if there is an override.
    # We must also take into consideration that if the package has not been downloaded the only
    # checksum that is available is the one built-in.
    #
    # Since this lookup goes from Package->Content->ContentArtifact->Artifact, performance is a
    # challenge. We use ContentArtifact as our starting point because it enables us to work with
    # simple foreign keys and avoid messing with the many-to-many relationship, which doesn't
    # work with select_related() and performs poorly with prefetch_related(). This is fine
    # because we know that Packages should only ever have one artifact per content.
    contentartifact_qs = (
        ContentArtifact.objects.filter(content__in=packages.only("pk")).
        select_related(
            # content__rpm_package is a bit of a hack, exploiting the way django sets up model
            # inheritance, but it works and is unlikely to break. All content artifacts being
            # accessed here have an associated Package since they originally came from the
            # Package queryset.
            "artifact",
            "content__rpm_package",
        ).only("artifact", "content__rpm_package__checksum_type",
               "content__rpm_package__pkgId"))

    pkg_to_hash = {}
    for ca in contentartifact_qs.iterator():
        if package_checksum_type:
            package_checksum_type = package_checksum_type.lower()
            pkgid = getattr(ca.artifact, package_checksum_type, None)

        if not package_checksum_type or not pkgid:
            if ca.content.rpm_package.checksum_type not in settings.ALLOWED_CONTENT_CHECKSUMS:
                raise ValueError(
                    "Package {} as content unit {} contains forbidden checksum type '{}', "
                    "thus can't be published. {}".format(
                        ca.content.rpm_package.nevra,
                        ca.content.pk,
                        ca.content.rpm_package.checksum_type,
                        ALLOWED_CHECKSUM_ERROR_MSG,
                    ))
            package_checksum_type = ca.content.rpm_package.checksum_type
            pkgid = ca.content.rpm_package.pkgId

        pkg_to_hash[ca.content_id] = (package_checksum_type, pkgid)

    # TODO: this is meant to be a !! *temporary* !! fix for
    # https://github.com/pulp/pulp_rpm/issues/2407
    pkg_pks_to_ignore = set()
    latest_build_time_by_nevra = defaultdict(list)
    for pkg in packages.only("pk", "name", "epoch", "version", "release",
                             "arch", "time_build").iterator():
        latest_build_time_by_nevra[pkg.nevra].append((pkg.time_build, pkg.pk))
    for nevra, pkg_data in latest_build_time_by_nevra.items():
        # sort the packages by when they were built
        if len(pkg_data) > 1:
            pkg_data.sort(key=lambda p: p[0], reverse=True)
            pkg_pks_to_ignore |= set(entry[1] for entry in pkg_data[1:])
            log.warning(
                "Duplicate packages found competing for NEVRA {nevra}, selected the one with "
                "the most recent build time, excluding {others} others.".
                format(nevra=nevra, others=len(pkg_data[1:])))

    total_packages = packages.count() - len(pkg_pks_to_ignore)

    pri_xml.set_num_of_pkgs(total_packages)
    fil_xml.set_num_of_pkgs(total_packages)
    oth_xml.set_num_of_pkgs(total_packages)

    # Process all packages
    for package in packages.order_by("name", "evr").iterator():
        if package.pk in pkg_pks_to_ignore:  # Temporary!
            continue
        pkg = package.to_createrepo_c()

        # rewrite the checksum and checksum type with the desired ones
        (checksum, pkgId) = pkg_to_hash[package.pk]
        pkg.checksum_type = checksum
        pkg.pkgId = pkgId

        pkg_filename = os.path.basename(package.location_href)
        # this can cause an issue when two same RPM package names appears
        # a/name1.rpm b/name1.rpm
        pkg.location_href = os.path.join(PACKAGES_DIRECTORY,
                                         pkg_filename[0].lower(), pkg_filename)
        pri_xml.add_pkg(pkg)
        fil_xml.add_pkg(pkg)
        oth_xml.add_pkg(pkg)
        if publication.sqlite_metadata:
            pri_db.add_pkg(pkg)
            fil_db.add_pkg(pkg)
            oth_db.add_pkg(pkg)

    # Process update records
    for update_record in UpdateRecord.objects.filter(
            pk__in=content).iterator():
        upd_xml.add_chunk(
            cr.xml_dump_updaterecord(update_record.to_createrepo_c()))

    # Process modulemd, modulemd_defaults and obsoletes
    with open(mod_yml_path, "ab") as mod_yml:
        for modulemd in Modulemd.objects.filter(pk__in=content).iterator():
            mod_yml.write(modulemd.snippet.encode())
            has_modules = True
        for default in ModulemdDefaults.objects.filter(
                pk__in=content).iterator():
            mod_yml.write(default.snippet.encode())
            has_modules = True
        for obsolete in ModulemdObsolete.objects.filter(
                pk__in=content).iterator():
            mod_yml.write(obsolete.snippet.encode())
            has_modules = True

    # Process comps
    comps = libcomps.Comps()
    for pkg_grp in PackageGroup.objects.filter(pk__in=content).iterator():
        group = pkg_grp.pkg_grp_to_libcomps()
        comps.groups.append(group)
        has_comps = True
    for pkg_cat in PackageCategory.objects.filter(pk__in=content).iterator():
        cat = pkg_cat.pkg_cat_to_libcomps()
        comps.categories.append(cat)
        has_comps = True
    for pkg_env in PackageEnvironment.objects.filter(
            pk__in=content).iterator():
        env = pkg_env.pkg_env_to_libcomps()
        comps.environments.append(env)
        has_comps = True
    for pkg_lng in PackageLangpacks.objects.filter(pk__in=content).iterator():
        comps.langpacks = dict_to_strdict(pkg_lng.matches)
        has_comps = True

    comps.toxml_f(
        comps_xml_path,
        xml_options={
            "default_explicit": True,
            "empty_groups": True,
            "empty_packages": True,
            "uservisible_explicit": True,
        },
    )

    pri_xml.close()
    fil_xml.close()
    oth_xml.close()
    upd_xml.close()

    repomd = cr.Repomd()
    # If the repository is empty, use a revision of 0
    # See: https://pulp.plan.io/issues/9402
    if not content.exists():
        repomd.revision = "0"

    if publication.sqlite_metadata:
        repomdrecords = [
            ("primary", pri_xml_path, pri_db),
            ("filelists", fil_xml_path, fil_db),
            ("other", oth_xml_path, oth_db),
            ("primary_db", pri_db_path, None),
            ("filelists_db", fil_db_path, None),
            ("other_db", oth_db_path, None),
            ("updateinfo", upd_xml_path, None),
        ]
    else:
        repomdrecords = [
            ("primary", pri_xml_path, None),
            ("filelists", fil_xml_path, None),
            ("other", oth_xml_path, None),
            ("updateinfo", upd_xml_path, None),
        ]

    if has_modules:
        repomdrecords.append(("modules", mod_yml_path, None))

    if has_comps:
        repomdrecords.append(("group", comps_xml_path, None))

    repomdrecords.extend(extra_repomdrecords)

    sqlite_files = ("primary_db", "filelists_db", "other_db")

    for name, path, db_to_update in repomdrecords:
        record = cr.RepomdRecord(name, path)
        checksum_type = cr_checksum_type_from_string(
            get_checksum_type(name,
                              checksum_types,
                              default=publication.metadata_checksum_type))
        if name in sqlite_files:
            record_bz = record.compress_and_fill(checksum_type, cr.BZ2)
            record_bz.type = name
            record_bz.rename_file()
            path = record_bz.location_href.split("/")[-1]
            repomd.set_record(record_bz)
        else:
            record.fill(checksum_type)
            if db_to_update:
                db_to_update.dbinfo_update(record.checksum)
                db_to_update.close()
            record.rename_file()
            path = record.location_href.split("/")[-1]
            repomd.set_record(record)

        if sub_folder:
            path = os.path.join(sub_folder, path)

        with open(path, "rb") as repodata_fd:
            PublishedMetadata.create_from_file(
                relative_path=os.path.join(repodata_path,
                                           os.path.basename(path)),
                publication=publication,
                file=File(repodata_fd),
            )

    with open(repomd_path, "w") as repomd_f:
        repomd_f.write(repomd.xml_dump())

    if metadata_signing_service:
        signing_service = AsciiArmoredDetachedSigningService.objects.get(
            pk=metadata_signing_service)
        sign_results = signing_service.sign(repomd_path)

        # publish a signed file
        with open(sign_results["file"], "rb") as signed_file_fd:
            PublishedMetadata.create_from_file(
                relative_path=os.path.join(
                    repodata_path, os.path.basename(sign_results["file"])),
                publication=publication,
                file=File(signed_file_fd),
            )

        # publish a detached signature
        with open(sign_results["signature"], "rb") as signature_fd:
            PublishedMetadata.create_from_file(
                relative_path=os.path.join(
                    repodata_path,
                    os.path.basename(sign_results["signature"])),
                publication=publication,
                file=File(signature_fd),
            )

        # publish a public key required for further verification
        pubkey_name = "repomd.xml.key"
        with open(pubkey_name, "wb+") as f:
            f.write(signing_service.public_key.encode("utf-8"))
            f.flush()
            PublishedMetadata.create_from_file(
                relative_path=os.path.join(repodata_path, pubkey_name),
                publication=publication,
                file=File(f),
            )
    else:
        with open(repomd_path, "rb") as repomd_fd:
            PublishedMetadata.create_from_file(
                relative_path=os.path.join(repodata_path,
                                           os.path.basename(repomd_path)),
                publication=publication,
                file=File(repomd_fd),
            )