Esempio n. 1
0
 def from_local_repomd(cls, repomd_path):
     """Create OriginRepo object from the local repomd.xml.
     @param path      path to the repomd.xml"""
     repomd = cr.Repomd(repomd_path)
     repo = cls()
     repo._fill_from_repomd_object(repomd)
     return repo
Esempio n. 2
0
    async def run(self):
        """Build `DeclarativeContent` from the repodata."""
        self.data.remote_url = self.new_url or self.remote.url

        progress_data = dict(message="Downloading Metadata Files",
                             code="downloading.metadata")
        with ProgressReport(**progress_data) as metadata_pb:
            self.data.metadata_pb = metadata_pb

            downloader = self.remote.get_downloader(
                url=urljoin(self.data.remote_url, "repodata/repomd.xml"))
            result = await downloader.run()
            metadata_pb.increment()

            repomd_path = result.path
            self.data.repomd = cr.Repomd(repomd_path)

            self.repository.last_sync_revision_number = self.data.repomd.revision
            self.repository.last_sync_repomd_checksum = get_sha256(repomd_path)

            await self.parse_distribution_tree()
            await self.parse_repository_metadata()
            await self.parse_modules_metadata()
            await self.parse_packages_components()
            await self.parse_content()

            # now send modules down the pipeline since all relations have been set up
            for modulemd in self.data.modulemd_list:
                await self.put(modulemd)

            for dc_group in self.data.dc_groups:
                await self.put(dc_group)
Esempio n. 3
0
def calculate_contenthash(path):
    if not os.path.isdir(path) or \
       not os.path.isdir(os.path.join(path, "repodata/")):
        raise AttributeError("Not a repo: {0}".format(path))

    repomd_path = os.path.join(path, "repodata/repomd.xml")
    repomd = cr.Repomd(repomd_path)

    primary_path = None
    for rec in repomd.records:
        if rec.type == "primary":
            primary_path = rec.location_href
            break

    if not primary_path:
        raise CalculationException("primary metadata are missing")

    pkgids = []

    def pkgcb(pkg):
        pkgids.append("{0}{1}{2}".format(pkg.pkgId, pkg.location_href,
                                         pkg.location_base or ''))

    cr.xml_parse_primary(os.path.join(path, primary_path), pkgcb=pkgcb)

    contenthash = hashlib.new("sha256")
    for pkgid in sorted(pkgids):
        contenthash.update(pkgid.encode('utf-8'))
    return contenthash.hexdigest()
Esempio n. 4
0
def insert_in_repo(comp_type, repodata, filetype, extension, source):
    """
    Inject a file into the repodata with the help of createrepo_c.

    Args:
        comp_type (int): createrepo_c compression type indication.
        repodata (basestring): The path to the repo where the metadata will be inserted.
        filetype (basestring): What type of metadata will be inserted by createrepo_c.
            This does allow any string to be inserted (custom types). There are some
            types which are used with dnf repos as primary, updateinfo, comps, filelist etc.
        extension (basestring): The file extension (xml, sqlite).
        source (basestring): A file path. File holds the dump of metadata until
            copied to the repodata folder.
    """
    log.info('Inserting %s.%s into %s', filetype, extension, repodata)
    target_fname = os.path.join(repodata, '%s.%s' % (filetype, extension))
    shutil.copyfile(source, target_fname)
    repomd_xml = os.path.join(repodata, 'repomd.xml')
    repomd = cr.Repomd(repomd_xml)
    # create a new record for our repomd.xml
    rec = cr.RepomdRecord(filetype, target_fname)
    # compress our metadata file with the comp_type
    rec_comp = rec.compress_and_fill(cr.SHA256, comp_type)
    # add hash to the compresed metadata file
    rec_comp.rename_file()
    # set type of metadata
    rec_comp.type = filetype
    # insert metadata about our metadata in repomd.xml
    repomd.set_record(rec_comp)
    with open(repomd_xml, 'w') as repomd_file:
        repomd_file.write(repomd.xml_dump())
    os.unlink(target_fname)
Esempio n. 5
0
    def test_repomd_with_path_in_constructor_repo01(self):

        repomd = cr.Repomd(REPO_01_REPOMD)
        self.assertEqual(repomd.revision, "1334667230")
        self.assertEqual(repomd.repo_tags, [])
        self.assertEqual(repomd.distro_tags, [])
        self.assertEqual(repomd.content_tags, [])
        self.assertEqual(len(repomd.records), 3)
Esempio n. 6
0
    def test_xml_parser_repomd_repo01_nowarningcb(self):

        repomd = cr.Repomd()
        cr.xml_parse_repomd(REPO_01_REPOMD, repomd)
        self.assertEqual(repomd.revision, "1334667230")
        self.assertEqual(repomd.repo_tags, [])
        self.assertEqual(repomd.distro_tags, [])
        self.assertEqual(repomd.content_tags, [])
        self.assertEqual(len(repomd.records), 3)
Esempio n. 7
0
    def save(self):
        self.primary.xml.close()
        self.filelists.xml.close()
        self.other.xml.close()

        repomd = createrepo_c.Repomd()
        repomd.set_record(self.primary.get_record())
        repomd.set_record(self.filelists.get_record())
        repomd.set_record(self.other.get_record())

        with (self.path / 'repomd.xml').open(mode='w') as f:
            f.write(repomd.xml_dump())
Esempio n. 8
0
    def test_repomd_indexing_and_iteration_repo01(self):
        repomd = cr.Repomd(REPO_01_REPOMD)

        types = []
        for rec in repomd:
            types.append(rec.type)
        self.assertEqual(types, ['filelists', 'other', 'primary'])

        rec = repomd["primary"]
        self.assertEqual(rec.type, "primary")

        self.assertRaises(KeyError, repomd.__getitem__, "foobar")

        self.assertTrue("primary" in repomd)
Esempio n. 9
0
def modifyrepo(filename, repodata):
    repodata = os.path.join(repodata, 'repodata')
    uinfo_xml = os.path.join(repodata, os.path.basename(filename))
    shutil.copyfile(filename, uinfo_xml)

    uinfo_rec = cr.RepomdRecord('updateinfo', uinfo_xml)
    uinfo_rec.fill(cr.SHA256)
    uinfo_rec.rename_file()

    repomd_xml = os.path.join(repodata, 'repomd.xml')
    repomd = cr.Repomd(repomd_xml)
    repomd.set_record(uinfo_rec)
    with file(repomd_xml, 'w') as repomd_file:
        repomd_file.write(repomd.xml_dump())
Esempio n. 10
0
    def _fill_from_path(self,
                        path,
                        contenthash=True,
                        contenthash_type="sha256"):
        """Fill attributes from a repository specified by path.

        :param path: Path to repository (a dir that contains repodata/ subdirectory)
        :type path: str
        :param contenthash: Do content hash calculation (primary metadata must be available in the repo)
        :type contenthash: bool
        :param contenthash_type: type of the calculated content hash
        :type contenthash_type: str
        """

        if not os.path.isdir(path) or \
           not os.path.isdir(os.path.join(path, "repodata/")) or \
           not os.path.isfile(os.path.join(path, "repodata/repomd.xml")):
            raise DeltaRepoError("Not a repository: {0}".format(path))

        repomd_path = os.path.join(path, "repodata/repomd.xml")
        repomd = cr.Repomd(repomd_path)

        self.repomd_contenthash = repomd.contenthash
        self.repomd_contenthash_type = repomd.contenthash_type

        self._fill_from_repomd_object(repomd)

        # Find a primary path
        primary_path = None
        for rec in repomd.records:
            md_path = os.path.join(path, rec.location_href)
            if os.path.isfile(md_path):
                self.present_metadata.append(rec.type)
            if rec.type == "primary":
                primary_path = md_path

        if contenthash:
            if not primary_path:
                raise DeltaRepoError("{0} - primary metadata are missing"
                                     "".format(primary_path))
            self.contenthash = calculate_content_hash(primary_path,
                                                      contenthash_type)
            self.contenthash_type = contenthash_type

        self.path = path
        self.repodata = os.path.join(path, "repodata")
        self.basename = os.path.basename(path)
        self.repomd_size = os.path.getsize(repomd_path)
Esempio n. 11
0
 def modifyrepo(self, filename):
     """Inject a file into the repodata for each architecture"""
     for arch in os.listdir(self.repo_path):
         repodata = os.path.join(self.repo_path, arch, 'repodata')
         log.info('Inserting %s into %s', filename, repodata)
         uinfo_xml = os.path.join(repodata, 'updateinfo.xml')
         shutil.copyfile(filename, uinfo_xml)
         repomd_xml = os.path.join(repodata, 'repomd.xml')
         repomd = cr.Repomd(repomd_xml)
         uinfo_rec = cr.RepomdRecord('updateinfo', uinfo_xml)
         uinfo_rec_comp = uinfo_rec.compress_and_fill(self.hash_type, self.comp_type)
         uinfo_rec_comp.rename_file()
         uinfo_rec_comp.type = 'updateinfo'
         repomd.set_record(uinfo_rec_comp)
         with file(repomd_xml, 'w') as repomd_file:
             repomd_file.write(repomd.xml_dump())
         os.unlink(uinfo_xml)
def hande_repomd(args, merger, repomd_filename):
    try:
        repomd = cr.Repomd(repomd_filename)
    except (RuntimeError, ValueError) as err:
        if not args.ignore_no_input:
            raise err
        logging.debug("{}: error loading repomd.xml: {}".format(
            repomd_filename, str(err)))
        return False

    # repomd was loaded and decoded successfully
    modules_path = False
    for record in repomd.records:
        if record.type == "modules":
            modules_path = record.location_href

    if not modules_path:
        logging.debug("{fn}: no modules section found in repomd.xml".format(
            fn=repomd_filename))
        if not args.ignore_no_input:
            raise ValueError('{fn} does not contain a modules section'.format(
                fn=repomd_filename))
        return False

    # strip repodata-prefix-dir from location_href
    filename = os.path.join(os.path.dirname(repomd_filename),
                            os.path.basename(modules_path))
    if os.path.isfile(filename):
        return merge_file(merger, filename)

    filename = os.path.join(os.path.dirname(repomd_filename), "../",
                            modules_path)
    if os.path.isfile(filename):
        return merge_file(merger, filename)

    logging.debug(
        "{fn}: modules section found in repomd.xml, but href file {href} does not exist"
        .format(fn=repomd_filename, href=filename))
    if not args.ignore_no_input:
        raise ValueError(
            "{fn}: modules section found in repomd.xml, but href file {href} does "
            "not exist".format(fn=repomd_filename, href=filename))
    return False
Esempio n. 13
0
def is_optimized_sync(repository, remote, url):
    """
    Check whether it is possible to optimize the synchronization or not.

    Caution: we are not storing when the remote was last updated, so the order of this
    logic must remain in this order where we first check the version number as other
    changes than sync could have taken place such that the date or repo version will be
    different from last sync.

    Args:
        repository(RpmRepository): An RpmRepository to check optimization for.
        remote(RpmRemote): An RPMRemote to check optimization for.
        url(str): A remote repository URL.

    Returns:
        bool: True, if sync is optimized; False, otherwise.

    """
    with WorkingDirectory():
        result = get_repomd_file(remote, url)
        if not result:
            return False

        repomd_path = result.path
        repomd = cr.Repomd(repomd_path)
        repomd_checksum = get_sha256(repomd_path)

    is_optimized = (
        repository.last_sync_remote
        and remote.pk == repository.last_sync_remote.pk and
        repository.last_sync_repo_version == repository.latest_version().number
        and
        remote.pulp_last_updated <= repository.latest_version().pulp_created
        and is_previous_version(repomd.revision,
                                repository.last_sync_revision_number)
        and repository.last_sync_repomd_checksum == repomd_checksum)
    if is_optimized:
        optimize_data = dict(message="Optimizing Sync", code="optimizing.sync")
        with ProgressReport(**optimize_data) as optimize_pb:
            optimize_pb.done = 1
            optimize_pb.save()

    return is_optimized
    def get_repomd_record_xml_path(repo_path, record_type):
        """
        Returns a file path of the specified repomd record.

        Parameters
        ----------
        repo_path : str
            Repository path.

        Returns
        -------
        str or None
            primary.xml file path or None if a record is not found in the
            repository metadata.
        """
        repomd_path = os.path.join(repo_path, 'repodata/repomd.xml')
        repomd = createrepo_c.Repomd(repomd_path)
        for rec in repomd.records:
            if rec.type == record_type:
                return os.path.join(repo_path, rec.location_href)
Esempio n. 15
0
def third_method():
    """Parsing main metadata types (primary, filelists, other) at the same time.
    This approach significantly reduces memory footprint because we don't need
    to keep all the packages in memory, user can handle them one by one.

    The API reflects xml_parse_primary/filelists/other except that it handles
    all of them at the same time.

    """
    def warningcb(warning_type, message):
        print("PARSER WARNING: %s" % message)
        return True

    repomd = cr.Repomd()
    cr.xml_parse_repomd(os.path.join(REPO_PATH, "repodata/repomd.xml"), repomd,
                        warningcb)

    primary_xml_path = None
    filelists_xml_path = None
    other_xml_path = None
    for record in repomd.records:
        if record.type == "primary":
            primary_xml_path = os.path.join(REPO_PATH, record.location_href)
        elif record.type == "filelists":
            filelists_xml_path = os.path.join(REPO_PATH, record.location_href)
        elif record.type == "other":
            other_xml_path = os.path.join(REPO_PATH, record.location_href)

    #
    # Main XML metadata parsing (primary, filelists, other)
    #

    def pkgcb(pkg):
        # Called when whole package entry from all 3 metadata xml files is parsed
        print_package_info(pkg)

    cr.xml_parse_main_metadata_together(primary_xml_path, filelists_xml_path,
                                        other_xml_path, None, pkgcb, warningcb,
                                        False)
Esempio n. 16
0
def insert_in_repo(comp_type, repodata, filetype, extension, source, zchunk):
    """
    Inject a file into the repodata with the help of createrepo_c.

    Args:
        comp_type (int): createrepo_c compression type indication.
        repodata (str): The path to the repo where the metadata will be inserted.
        filetype (str): What type of metadata will be inserted by createrepo_c.
            This does allow any string to be inserted (custom types). There are some
            types which are used with dnf repos as primary, updateinfo, comps, filelist etc.
        extension (str): The file extension (xml, sqlite).
        source (str): A file path. File holds the dump of metadata until
            copied to the repodata folder.
        zchunk (bool): Whether zchunk data is supported for clients of this repo.
    """
    log.info('Inserting %s.%s into %s', filetype, extension, repodata)
    target_fname = os.path.join(repodata, '%s.%s' % (filetype, extension))
    shutil.copyfile(source, target_fname)
    repomd_xml = os.path.join(repodata, 'repomd.xml')
    repomd = cr.Repomd(repomd_xml)
    add_list = [(filetype, comp_type)]
    if zchunk and hasattr(
            cr, 'ZCK_COMPRESSION') and comp_type != cr.ZCK_COMPRESSION:
        add_list.append((filetype + "_zck", cr.ZCK_COMPRESSION))
    for (ft, ct) in add_list:
        # create a new record for our repomd.xml
        rec = cr.RepomdRecord(ft, target_fname)
        # compress our metadata file with the comp_type
        rec_comp = rec.compress_and_fill(cr.SHA256, ct)
        # add hash to the compressed metadata file
        rec_comp.rename_file()
        # set type of metadata
        rec_comp.type = ft
        # insert metadata about our metadata in repomd.xml
        repomd.set_record(rec_comp)
    with open(repomd_xml, 'w') as repomd_file:
        repomd_file.write(repomd.xml_dump())
    os.unlink(target_fname)
Esempio n. 17
0
def streaming_iterator():
    """Parsing main metadata types (primary, filelists, other) at the same time.
    This approach significantly reduces memory footprint because we don't need
    to keep all the packages in memory, user can handle them one by one.

    This is the most flexible method, and the recommended one if you need all of the
    RPM metadata. If you only need to parse one file it might not be the most efficient.
    """
    def warningcb(warning_type, message):
        print("PARSER WARNING: %s" % message)
        return True

    repomd = cr.Repomd()
    cr.xml_parse_repomd(os.path.join(REPO_PATH, "repodata/repomd.xml"), repomd,
                        warningcb)

    primary_xml_path = None
    filelists_xml_path = None
    other_xml_path = None
    for record in repomd.records:
        if record.type == "primary":
            primary_xml_path = os.path.join(REPO_PATH, record.location_href)
        elif record.type == "filelists":
            filelists_xml_path = os.path.join(REPO_PATH, record.location_href)
        elif record.type == "other":
            other_xml_path = os.path.join(REPO_PATH, record.location_href)

    #
    # Main XML metadata parsing (primary, filelists, other)
    #
    package_iterator = cr.PackageIterator(primary_path=primary_xml_path,
                                          filelists_path=filelists_xml_path,
                                          other_path=other_xml_path,
                                          warningcb=warningcb)

    for pkg in package_iterator:
        # Called when whole package entry from all 3 metadata xml files is parsed
        print_package_info(pkg)
Esempio n. 18
0
def parse_repodata(path):
    """
    Return a list of packages included in this repository
    """
    try:
        repomd = cr.Repomd(os.path.join(path, "repodata/repomd.xml"))
    except OSError as e:
        logging.error(e)
        exit(2)

    for record in repomd.records:
        if record.type == "primary":
            primary_xml_path = record.location_href

    def warningcb(warning_type, message):
        """Optional callback for warnings about
        wierd stuff and formatting in XML.
        :param warning_type: Integer value. One from
                             the XML_WARNING_* constants.
        :param message: String message.
        """
        logging.warning("PARSER WARNING: %s" % message)
        return True

    packages = []

    def pkgcb(pkg):
        # Called when whole package entry in xml is parsed
        packages.append(pkg)

    cr.xml_parse_primary(os.path.join(path, primary_xml_path),
                         pkgcb=pkgcb,
                         do_files=False,
                         warningcb=warningcb)

    return packages
Esempio n. 19
0
def parse_repomd(path):
    repomd = cr.Repomd(path)
    print "Revision:", repomd.revision
    if repomd.contenthash:
        print "Contenthash:", repomd.contenthash
        print "Contenthash type:", repomd.contenthash_type
    print "Repo tags:", repomd.repo_tags
    print "Content tags:", repomd.content_tags
    print "Distro tags:", repomd.distro_tags
    print
    for rec in repomd.records:
        print "Type:", rec.type
        print "Location href:", rec.location_href
        print "Location base:", rec.location_base
        print "Checksum:", rec.checksum
        print "Checksum type:", rec.checksum_type
        print "Checksum open:", rec.checksum_open
        print "Checksum open type:", rec.checksum_open_type
        print "Timestamp:", rec.timestamp
        print "Size:", rec.size
        print "Size open:", rec.size_open
        if rec.db_ver:
            print "Db version:", rec.db_ver
        print
Esempio n. 20
0
def parse_repomd(path):
    repomd = cr.Repomd(path)
    print("Revision:", repomd.revision)
    if repomd.contenthash:
        print("Contenthash:", repomd.contenthash)
        print("Contenthash type:", repomd.contenthash_type)
    print("Repo tags:", repomd.repo_tags)
    print("Content tags:", repomd.content_tags)
    print("Distro tags:", repomd.distro_tags)
    print()
    for rec in repomd.records:
        print("Type:", rec.type)
        print("Location href:", rec.location_href)
        print("Location base:", rec.location_base)
        print("Checksum:", rec.checksum)
        print("Checksum type:", rec.checksum_type)
        print("Checksum open:", rec.checksum_open)
        print("Checksum open type:", rec.checksum_open_type)
        print("Timestamp:", rec.timestamp)
        print("Size:", rec.size)
        print("Size open:", rec.size_open)
        if rec.db_ver:
            print("Db version:", rec.db_ver)
        print()
Esempio n. 21
0
def deltareposrecord_from_repopath(path, prefix_to_strip=None, logger=None):
    """Create DeltaRepoRecord object from a delta repository

    :param path: Path to a directory were a deltarepo lives
    :type path: str
    :param prefix_to_strip: Path prefix to strip from a path in the record
    :type prefix_to_strip: str or None
    :param logger: A logger
    :type logger: logging.Logger or None
    """

    # Prepare paths
    path = os.path.abspath(path)
    stripped_path = path
    if prefix_to_strip:
        abs_prefix_to_strip = os.path.abspath(prefix_to_strip)
        if path.startswith(abs_prefix_to_strip):
            stripped_path = os.path.relpath(path, abs_prefix_to_strip)

    # Parse repomd.xml of the delta repo
    repomd_path = os.path.join(path, "repodata/repomd.xml")
    repomd = cr.Repomd(repomd_path)

    deltametadata_path = None
    for repomd_rec in repomd.records:
        if repomd_rec.type == "deltametadata" and repomd_rec.location_href:
            deltametadata_path = os.path.join(path, repomd_rec.location_href)

    if not deltametadata_path:
        raise DeltaRepoError("Not a delta repository: {0}".format(path))

    # Parse deltametadata.xml of the delta repo
    dm = deltarepo.DeltaMetadata()
    dm.load(deltametadata_path)

    # Prepare DeltaRepoRecord aka <deltarepo>
    rec = deltarepo.DeltaRepoRecord()
    rec.location_base = None
    rec.location_href = stripped_path
    rec.revision_src = dm.revision_src
    rec.revision_dst = dm.revision_dst
    rec.contenthash_src = dm.contenthash_src
    rec.contenthash_dst = dm.contenthash_dst
    rec.contenthash_type = dm.contenthash_type
    rec.timestamp_src = dm.timestamp_src
    rec.timestamp_dst = dm.timestamp_dst

    # Parepare <data> elements with info about files in the repo
    for repomd_rec in repomd.records:
        if not repomd_rec.type:
            continue
        if isnonnegativeint(repomd_rec.size):
            rec.set_data(repomd_rec.type, repomd_rec.size)
        elif isnonnegativeint(repomd_rec.open_size):
            rec.set_data(repomd_rec.type, repomd_rec.open_size)

    # Collect info about repomd.xml file of the delta repo
    rec.repomd_timestamp = int(os.path.getmtime(repomd_path))
    rec.repomd_size = os.path.getsize(repomd_path)
    checksumval = compute_file_checksum(repomd_path)
    rec.repomd_checksums = [("sha256", checksumval)]

    return rec
Esempio n. 22
0
def publish(repository_version_pk):
    """
    Create a Publication based on a RepositoryVersion.

    Args:
        repository_version_pk (str): Create a publication from this repository version.
    """
    repository_version = RepositoryVersion.objects.get(
        pk=repository_version_pk)

    log.info(
        _('Publishing: repository={repo}, version={version}').format(
            repo=repository_version.repository.name,
            version=repository_version.number,
        ))

    with WorkingDirectory():
        with RpmPublication.create(repository_version) as publication:
            packages = populate(publication)

            # Prepare metadata files
            repomd_path = os.path.join(os.getcwd(), "repomd.xml")
            pri_xml_path = os.path.join(os.getcwd(), "primary.xml.gz")
            fil_xml_path = os.path.join(os.getcwd(), "filelists.xml.gz")
            oth_xml_path = os.path.join(os.getcwd(), "other.xml.gz")
            pri_db_path = os.path.join(os.getcwd(), "primary.sqlite")
            fil_db_path = os.path.join(os.getcwd(), "filelists.sqlite")
            oth_db_path = os.path.join(os.getcwd(), "other.sqlite")
            upd_xml_path = os.path.join(os.getcwd(), "updateinfo.xml.gz")

            pri_xml = cr.PrimaryXmlFile(pri_xml_path)
            fil_xml = cr.FilelistsXmlFile(fil_xml_path)
            oth_xml = cr.OtherXmlFile(oth_xml_path)
            pri_db = cr.PrimarySqlite(pri_db_path)
            fil_db = cr.FilelistsSqlite(fil_db_path)
            oth_db = cr.OtherSqlite(oth_db_path)
            upd_xml = cr.UpdateInfoXmlFile(upd_xml_path)

            pri_xml.set_num_of_pkgs(len(packages))
            fil_xml.set_num_of_pkgs(len(packages))
            oth_xml.set_num_of_pkgs(len(packages))

            # Process all packages
            for package in packages:
                pkg = package.to_createrepo_c()
                pkg.location_href = package.contentartifact_set.first(
                ).relative_path
                pri_xml.add_pkg(pkg)
                fil_xml.add_pkg(pkg)
                oth_xml.add_pkg(pkg)
                pri_db.add_pkg(pkg)
                fil_db.add_pkg(pkg)
                oth_db.add_pkg(pkg)

            # Process update records
            for update_record in UpdateRecord.objects.filter(
                    pk__in=publication.repository_version.content):
                upd_xml.add_chunk(update_record_xml(update_record))

            pri_xml.close()
            fil_xml.close()
            oth_xml.close()
            upd_xml.close()

            repomd = cr.Repomd()

            repomdrecords = (("primary", pri_xml_path,
                              pri_db), ("filelists", fil_xml_path, fil_db),
                             ("other", oth_xml_path,
                              oth_db), ("primary_db", pri_db_path, None),
                             ("filelists_db", fil_db_path,
                              None), ("other_db", oth_db_path, None),
                             ("updateinfo", upd_xml_path, None))

            sqlite_files = ("primary_db", "filelists_db", "other_db")
            for name, path, db_to_update in repomdrecords:
                record = cr.RepomdRecord(name, path)
                if name in sqlite_files:
                    record_bz = record.compress_and_fill(cr.SHA256, cr.BZ2)
                    record_bz.type = name
                    record_bz.rename_file()
                    path = record_bz.location_href.split('/')[-1]
                    repomd.set_record(record_bz)
                else:
                    record.fill(cr.SHA256)
                    if (db_to_update):
                        db_to_update.dbinfo_update(record.checksum)
                        db_to_update.close()
                    record.rename_file()
                    path = record.location_href.split('/')[-1]
                    repomd.set_record(record)
                metadata = PublishedMetadata(
                    relative_path=os.path.join(REPODATA_PATH,
                                               os.path.basename(path)),
                    publication=publication,
                    file=File(open(os.path.basename(path), 'rb')))
                metadata.save()

            with open(repomd_path, "w") as repomd_f:
                repomd_f.write(repomd.xml_dump())

            metadata = PublishedMetadata(
                relative_path=os.path.join(REPODATA_PATH,
                                           os.path.basename(repomd_path)),
                publication=publication,
                file=File(open(os.path.basename(repomd_path), 'rb')))
            metadata.save()
Esempio n. 23
0
    def test_xml_parser_repomd_repo01(self):

        warnings = []

        def warningcb(warn_type, msg):
            warnings.append((warn_type, msg))

        repomd = cr.Repomd()

        cr.xml_parse_repomd(REPO_01_REPOMD, repomd, warningcb)

        self.assertEqual(warnings, [])

        self.assertEqual(repomd.revision, "1334667230")
        self.assertEqual(repomd.repo_tags, [])
        self.assertEqual(repomd.distro_tags, [])
        self.assertEqual(repomd.content_tags, [])
        self.assertEqual(len(repomd.records), 3)

        self.assertEqual(repomd.records[0].type, "filelists")
        self.assertEqual(repomd.records[0].location_real, None)
        self.assertEqual(
            repomd.records[0].location_href,
            "repodata/c7db035d0e6f1b2e883a7fa3229e2d2be70c05a8b8d2b57dbb5f9c1a67483b6c-filelists.xml.gz"
        )
        self.assertEqual(
            repomd.records[0].checksum,
            "c7db035d0e6f1b2e883a7fa3229e2d2be70c05a8b8d2b57dbb5f9c1a67483b6c")
        self.assertEqual(repomd.records[0].checksum_type, "sha256")
        self.assertEqual(
            repomd.records[0].checksum_open,
            "85bc611be5d81ac8da2fe01e98ef741d243d1518fcc46ada70660020803fbf09")
        self.assertEqual(repomd.records[0].checksum_open_type, "sha256")
        self.assertEqual(repomd.records[0].timestamp, 1334667230)
        self.assertEqual(repomd.records[0].size, 273)
        self.assertEqual(repomd.records[0].size_open, 389)
        self.assertEqual(repomd.records[0].db_ver, 0)

        self.assertEqual(repomd.records[1].type, "other")
        self.assertEqual(repomd.records[1].location_real, None)
        self.assertEqual(
            repomd.records[1].location_href,
            "repodata/b752a73d9efd4006d740f943db5fb7c2dd77a8324bd99da92e86bd55a2c126ef-other.xml.gz"
        )
        self.assertEqual(
            repomd.records[1].checksum,
            "b752a73d9efd4006d740f943db5fb7c2dd77a8324bd99da92e86bd55a2c126ef")
        self.assertEqual(repomd.records[1].checksum_type, "sha256")
        self.assertEqual(
            repomd.records[1].checksum_open,
            "da6096c924349af0c326224a33be0cdb26897fbe3d25477ac217261652449445")
        self.assertEqual(repomd.records[1].checksum_open_type, "sha256")
        self.assertEqual(repomd.records[1].timestamp, 1334667230)
        self.assertEqual(repomd.records[1].size, 332)
        self.assertEqual(repomd.records[1].size_open, 530)
        self.assertEqual(repomd.records[1].db_ver, 0)

        self.assertEqual(repomd.records[2].type, "primary")
        self.assertEqual(repomd.records[2].location_real, None)
        self.assertEqual(
            repomd.records[2].location_href,
            "repodata/6c662d665c24de9a0f62c17d8fa50622307739d7376f0d19097ca96c6d7f5e3e-primary.xml.gz"
        )
        self.assertEqual(
            repomd.records[2].checksum,
            "6c662d665c24de9a0f62c17d8fa50622307739d7376f0d19097ca96c6d7f5e3e")
        self.assertEqual(repomd.records[2].checksum_type, "sha256")
        self.assertEqual(
            repomd.records[2].checksum_open,
            "0fc6cadf97d515e87491d24dc9712d8ddaf2226a21ae7f131ff42d71a877c496")
        self.assertEqual(repomd.records[2].checksum_open_type, "sha256")
        self.assertEqual(repomd.records[2].timestamp, 1334667230)
        self.assertEqual(repomd.records[2].size, 782)
        self.assertEqual(repomd.records[2].size_open, 2085)
        self.assertEqual(repomd.records[2].db_ver, 0)
Esempio n. 24
0
    async def run(self):
        """
        Build `DeclarativeContent` from the repodata.
        """
        remote_url = self.new_url or self.remote.url
        remote_url = remote_url if remote_url[-1] == "/" else f"{remote_url}/"
        optimize_sync = self.optimize

        progress_data = dict(message='Downloading Metadata Files',
                             code='downloading.metadata')
        with ProgressReport(**progress_data) as metadata_pb:
            downloader = self.remote.get_downloader(
                url=urljoin(remote_url, 'repodata/repomd.xml'))
            # TODO: decide how to distinguish between a mirror list and a normal repo
            result = await downloader.run()
            metadata_pb.increment()

            repomd_path = result.path
            repomd = cr.Repomd(repomd_path)

            # Caution: we are not storing when the remote was last updated, so the order of this
            # logic must remain in this order where we first check the version number as other
            # changes than sync could have taken place such that the date or repo version will be
            # different from last sync
            if (optimize_sync and self.repository.last_sync_remote
                    and self.remote.pk == self.repository.last_sync_remote.pk
                    and (self.repository.last_sync_repo_version
                         == self.repository.latest_version().number)
                    and (self.remote.pulp_last_updated <=
                         self.repository.latest_version().pulp_created)
                    and is_previous_version(
                        repomd.revision,
                        self.repository.last_sync_revision_number)):
                optimize_data = dict(message='Optimizing Sync',
                                     code='optimizing.sync')
                with ProgressReport(**optimize_data) as optimize_pb:
                    optimize_pb.done = 1
                    optimize_pb.save()
                    return

            self.repository.last_sync_revision_number = repomd.revision

            if self.treeinfo:
                d_artifacts = [
                    DeclarativeArtifact(
                        artifact=Artifact(),
                        url=urljoin(remote_url, self.treeinfo["filename"]),
                        relative_path=".treeinfo",
                        remote=self.remote,
                        deferred_download=False,
                    )
                ]
                for path, checksum in self.treeinfo["download"][
                        "images"].items():
                    artifact = Artifact(**checksum)
                    da = DeclarativeArtifact(
                        artifact=artifact,
                        url=urljoin(remote_url, path),
                        relative_path=path,
                        remote=self.remote,
                        deferred_download=self.deferred_download)
                    d_artifacts.append(da)

                distribution_tree = DistributionTree(
                    **self.treeinfo["distribution_tree"])
                dc = DeclarativeContent(content=distribution_tree,
                                        d_artifacts=d_artifacts)
                dc.extra_data = self.treeinfo
                await self.put(dc)

            package_repodata_urls = {}
            downloaders = []
            modulemd_list = list()
            dc_groups = []
            dc_categories = []
            dc_environments = []
            nevra_to_module = defaultdict(dict)
            pkgname_to_groups = defaultdict(list)
            group_to_categories = defaultdict(list)
            group_to_environments = defaultdict(list)
            optionalgroup_to_environments = defaultdict(list)
            modulemd_results = None
            comps_downloader = None
            main_types = set()
            checksums = {}

            for record in repomd.records:
                checksums[record.type] = record.checksum_type.upper()
                if record.type in PACKAGE_REPODATA:
                    main_types.update([record.type])
                    package_repodata_urls[record.type] = urljoin(
                        remote_url, record.location_href)

                elif record.type in UPDATE_REPODATA:
                    updateinfo_url = urljoin(remote_url, record.location_href)
                    downloader = self.remote.get_downloader(url=updateinfo_url)
                    downloaders.append([downloader.run()])

                elif record.type in COMPS_REPODATA:
                    comps_url = urljoin(remote_url, record.location_href)
                    comps_downloader = self.remote.get_downloader(
                        url=comps_url)

                elif record.type in SKIP_REPODATA:
                    continue

                elif '_zck' in record.type:
                    continue

                elif record.type in MODULAR_REPODATA:
                    modules_url = urljoin(remote_url, record.location_href)
                    modulemd_downloader = self.remote.get_downloader(
                        url=modules_url)
                    modulemd_results = await modulemd_downloader.run()

                elif record.type not in PACKAGE_DB_REPODATA:
                    file_data = {
                        record.checksum_type: record.checksum,
                        "size": record.size
                    }
                    da = DeclarativeArtifact(
                        artifact=Artifact(**file_data),
                        url=urljoin(remote_url, record.location_href),
                        relative_path=record.location_href,
                        remote=self.remote,
                        deferred_download=False)
                    repo_metadata_file = RepoMetadataFile(
                        data_type=record.type,
                        checksum_type=record.checksum_type,
                        checksum=record.checksum,
                    )
                    dc = DeclarativeContent(content=repo_metadata_file,
                                            d_artifacts=[da])
                    await self.put(dc)

            missing_type = set(PACKAGE_REPODATA) - main_types
            if missing_type:
                raise FileNotFoundError(
                    _("XML file(s): {filename} not found").format(
                        filename=", ".join(missing_type)))

            self.repository.original_checksum_types = checksums

            # we have to sync module.yaml first if it exists, to make relations to packages
            if modulemd_results:
                modulemd_index = mmdlib.ModuleIndex.new()
                open_func = gzip.open if modulemd_results.url.endswith(
                    '.gz') else open
                with open_func(modulemd_results.path, 'r') as moduleyaml:
                    content = moduleyaml.read()
                    module_content = content if isinstance(
                        content, str) else content.decode()
                    modulemd_index.update_from_string(module_content, True)

                modulemd_names = modulemd_index.get_module_names() or []
                modulemd_all = parse_modulemd(modulemd_names, modulemd_index)

                # Parsing modules happens all at one time, and from here on no useful work happens.
                # So just report that it finished this stage.
                modulemd_pb_data = {
                    'message': 'Parsed Modulemd',
                    'code': 'parsing.modulemds'
                }
                with ProgressReport(**modulemd_pb_data) as modulemd_pb:
                    modulemd_total = len(modulemd_all)
                    modulemd_pb.total = modulemd_total
                    modulemd_pb.done = modulemd_total

                for modulemd in modulemd_all:
                    artifact = modulemd.pop('artifact')
                    relative_path = '{}{}{}{}{}snippet'.format(
                        modulemd[PULP_MODULE_ATTR.NAME],
                        modulemd[PULP_MODULE_ATTR.STREAM],
                        modulemd[PULP_MODULE_ATTR.VERSION],
                        modulemd[PULP_MODULE_ATTR.CONTEXT],
                        modulemd[PULP_MODULE_ATTR.ARCH])
                    da = DeclarativeArtifact(artifact=artifact,
                                             relative_path=relative_path,
                                             url=modules_url)
                    modulemd_content = Modulemd(**modulemd)
                    dc = DeclarativeContent(content=modulemd_content,
                                            d_artifacts=[da])
                    dc.extra_data = defaultdict(list)

                    # dc.content.artifacts are Modulemd artifacts
                    for artifact in dc.content.artifacts:
                        nevra_to_module.setdefault(artifact, set()).add(dc)
                    modulemd_list.append(dc)

                # delete list now that we're done with it for memory savings
                del modulemd_all

                modulemd_default_names = parse_defaults(modulemd_index)

                # Parsing module-defaults happens all at one time, and from here on no useful
                # work happens. So just report that it finished this stage.
                modulemd_defaults_pb_data = {
                    'message': 'Parsed Modulemd-defaults',
                    'code': 'parsing.modulemd_defaults'
                }
                with ProgressReport(
                        **modulemd_defaults_pb_data) as modulemd_defaults_pb:
                    modulemd_defaults_total = len(modulemd_default_names)
                    modulemd_defaults_pb.total = modulemd_defaults_total
                    modulemd_defaults_pb.done = modulemd_defaults_total

                for default in modulemd_default_names:
                    artifact = default.pop('artifact')
                    relative_path = '{}{}snippet'.format(
                        default[PULP_MODULEDEFAULTS_ATTR.MODULE],
                        default[PULP_MODULEDEFAULTS_ATTR.STREAM])
                    da = DeclarativeArtifact(artifact=artifact,
                                             relative_path=relative_path,
                                             url=modules_url)
                    default_content = ModulemdDefaults(**default)
                    dc = DeclarativeContent(content=default_content,
                                            d_artifacts=[da])
                    await self.put(dc)

                # delete list now that we're done with it for memory savings
                del modulemd_default_names

            if comps_downloader:
                comps_result = await comps_downloader.run()

                comps = libcomps.Comps()
                comps.fromxml_f(comps_result.path)

                with ProgressReport(message='Parsed Comps',
                                    code='parsing.comps') as comps_pb:
                    comps_total = (len(comps.groups) + len(comps.categories) +
                                   len(comps.environments))
                    comps_pb.total = comps_total
                    comps_pb.done = comps_total

                if comps.langpacks:
                    langpack_dict = PackageLangpacks.libcomps_to_dict(
                        comps.langpacks)
                    packagelangpack = PackageLangpacks(
                        matches=strdict_to_dict(comps.langpacks),
                        digest=dict_digest(langpack_dict))
                    dc = DeclarativeContent(content=packagelangpack)
                    dc.extra_data = defaultdict(list)
                    await self.put(dc)

                if comps.categories:
                    for category in comps.categories:
                        category_dict = PackageCategory.libcomps_to_dict(
                            category)
                        category_dict['digest'] = dict_digest(category_dict)
                        packagecategory = PackageCategory(**category_dict)
                        dc = DeclarativeContent(content=packagecategory)
                        dc.extra_data = defaultdict(list)

                        if packagecategory.group_ids:
                            for group_id in packagecategory.group_ids:
                                group_to_categories[group_id['name']].append(
                                    dc)
                        dc_categories.append(dc)

                if comps.environments:
                    for environment in comps.environments:
                        environment_dict = PackageEnvironment.libcomps_to_dict(
                            environment)
                        environment_dict['digest'] = dict_digest(
                            environment_dict)
                        packageenvironment = PackageEnvironment(
                            **environment_dict)
                        dc = DeclarativeContent(content=packageenvironment)
                        dc.extra_data = defaultdict(list)

                        if packageenvironment.option_ids:
                            for option_id in packageenvironment.option_ids:
                                optionalgroup_to_environments[
                                    option_id['name']].append(dc)

                        if packageenvironment.group_ids:
                            for group_id in packageenvironment.group_ids:
                                group_to_environments[group_id['name']].append(
                                    dc)

                        dc_environments.append(dc)

                if comps.groups:
                    for group in comps.groups:
                        group_dict = PackageGroup.libcomps_to_dict(group)
                        group_dict['digest'] = dict_digest(group_dict)
                        packagegroup = PackageGroup(**group_dict)
                        dc = DeclarativeContent(content=packagegroup)
                        dc.extra_data = defaultdict(list)

                        if packagegroup.packages:
                            for package in packagegroup.packages:
                                pkgname_to_groups[package['name']].append(dc)

                        if dc.content.id in group_to_categories.keys():
                            for dc_category in group_to_categories[
                                    dc.content.id]:
                                dc.extra_data['category_relations'].append(
                                    dc_category)
                                dc_category.extra_data['packagegroups'].append(
                                    dc)

                        if dc.content.id in group_to_environments.keys():
                            for dc_environment in group_to_environments[
                                    dc.content.id]:
                                dc.extra_data['environment_relations'].append(
                                    dc_environment)
                                dc_environment.extra_data[
                                    'packagegroups'].append(dc)

                        if dc.content.id in optionalgroup_to_environments.keys(
                        ):
                            for dc_environment in optionalgroup_to_environments[
                                    dc.content.id]:
                                dc.extra_data['env_relations_optional'].append(
                                    dc_environment)
                                dc_environment.extra_data[
                                    'optionalgroups'].append(dc)

                        dc_groups.append(dc)

                for dc_category in dc_categories:
                    await self.put(dc_category)

                for dc_environment in dc_environments:
                    await self.put(dc_environment)

            # delete lists now that we're done with them for memory savings
            del dc_environments
            del dc_categories

            # to preserve order, downloaders are created after all repodata urls are identified
            package_repodata_downloaders = []
            for repodata_type in PACKAGE_REPODATA:
                downloader = self.remote.get_downloader(
                    url=package_repodata_urls[repodata_type])
                package_repodata_downloaders.append(downloader.run())

            downloaders.append(package_repodata_downloaders)

            # asyncio.gather is used to preserve the order of results for package repodata
            pending = [
                asyncio.gather(*downloaders_group)
                for downloaders_group in downloaders
            ]

            while pending:
                done, pending = await asyncio.wait(
                    pending, return_when=asyncio.FIRST_COMPLETED)
                for downloader in done:
                    try:
                        results = downloader.result()
                    except ClientResponseError as exc:
                        raise HTTPNotFound(
                            reason=_("File not found: {filename}").format(
                                filename=exc.request_info.url))
                    if results[0].url == package_repodata_urls['primary']:
                        primary_xml_path = results[0].path
                        filelists_xml_path = results[1].path
                        other_xml_path = results[2].path
                        metadata_pb.done += 3
                        metadata_pb.save()

                        packages = await RpmFirstStage.parse_repodata(
                            primary_xml_path, filelists_xml_path,
                            other_xml_path)
                        # skip SRPM if defined
                        if 'srpm' in self.skip_types:
                            packages = {
                                pkgId: pkg
                                for pkgId, pkg in packages.items()
                                if pkg.arch != 'src'
                            }

                        progress_data = {
                            'message': 'Parsed Packages',
                            'code': 'parsing.packages',
                            'total': len(packages),
                        }
                        with ProgressReport(**progress_data) as packages_pb:
                            for pkg in packages.values():
                                package = Package(
                                    **Package.createrepo_to_dict(pkg))
                                artifact = Artifact(size=package.size_package)
                                checksum_type = getattr(
                                    CHECKSUM_TYPES,
                                    package.checksum_type.upper())
                                setattr(artifact, checksum_type, package.pkgId)
                                url = urljoin(remote_url,
                                              package.location_href)
                                filename = os.path.basename(
                                    package.location_href)
                                da = DeclarativeArtifact(
                                    artifact=artifact,
                                    url=url,
                                    relative_path=filename,
                                    remote=self.remote,
                                    deferred_download=self.deferred_download)
                                dc = DeclarativeContent(content=package,
                                                        d_artifacts=[da])
                                dc.extra_data = defaultdict(list)

                                # find if a package relates to a modulemd
                                if dc.content.nevra in nevra_to_module.keys():
                                    dc.content.is_modular = True
                                    for dc_modulemd in nevra_to_module[
                                            dc.content.nevra]:
                                        dc.extra_data[
                                            'modulemd_relation'].append(
                                                dc_modulemd)
                                        dc_modulemd.extra_data[
                                            'package_relation'].append(dc)

                                if dc.content.name in pkgname_to_groups.keys():
                                    for dc_group in pkgname_to_groups[
                                            dc.content.name]:
                                        dc.extra_data[
                                            'group_relations'].append(dc_group)
                                        dc_group.extra_data[
                                            'related_packages'].append(dc)

                                packages_pb.increment()
                                await self.put(dc)

                    elif results[0].url == updateinfo_url:
                        updateinfo_xml_path = results[0].path
                        metadata_pb.increment()

                        updates = await RpmFirstStage.parse_updateinfo(
                            updateinfo_xml_path)

                        progress_data = {
                            'message': 'Parsed Advisories',
                            'code': 'parsing.advisories',
                            'total': len(updates),
                        }
                        with ProgressReport(**progress_data) as advisories_pb:
                            for update in updates:
                                update_record = UpdateRecord(
                                    **UpdateRecord.createrepo_to_dict(update))
                                update_record.digest = hash_update_record(
                                    update)
                                future_relations = {
                                    'collections': defaultdict(list),
                                    'references': []
                                }

                                for collection in update.collections:
                                    coll_dict = UpdateCollection.createrepo_to_dict(
                                        collection)
                                    coll = UpdateCollection(**coll_dict)

                                    for package in collection.packages:
                                        pkg_dict = UpdateCollectionPackage.createrepo_to_dict(
                                            package)
                                        pkg = UpdateCollectionPackage(
                                            **pkg_dict)
                                        future_relations['collections'][
                                            coll].append(pkg)

                                for reference in update.references:
                                    reference_dict = UpdateReference.createrepo_to_dict(
                                        reference)
                                    ref = UpdateReference(**reference_dict)
                                    future_relations['references'].append(ref)

                                advisories_pb.increment()
                                dc = DeclarativeContent(content=update_record)
                                dc.extra_data = future_relations
                                await self.put(dc)

            # now send modules down the pipeline since all relations have been set up
            for modulemd in modulemd_list:
                await self.put(modulemd)

            for dc_group in dc_groups:
                await self.put(dc_group)
Esempio n. 25
0
def create_repomd_xml(
    content,
    publication,
    checksum_types,
    extra_repomdrecords,
    sub_folder=None,
    metadata_signing_service=None,
):
    """
    Creates a repomd.xml file.

    Args:
        content(app.models.Content): content set
        publication(pulpcore.plugin.models.Publication): the publication
        extra_repomdrecords(list): list with data relative to repo metadata files
        sub_folder(str): name of the folder for sub repos
        metadata_signing_service (pulpcore.app.models.AsciiArmoredDetachedSigningService):
            A reference to an associated signing service.

    """
    cwd = os.getcwd()
    repodata_path = REPODATA_PATH
    has_modules = False
    has_comps = False
    package_checksum_type = checksum_types.get("package")

    if sub_folder:
        cwd = os.path.join(cwd, sub_folder)
        repodata_path = os.path.join(sub_folder, repodata_path)

    # Prepare metadata files
    repomd_path = os.path.join(cwd, "repomd.xml")
    pri_xml_path = os.path.join(cwd, "primary.xml.gz")
    fil_xml_path = os.path.join(cwd, "filelists.xml.gz")
    oth_xml_path = os.path.join(cwd, "other.xml.gz")
    upd_xml_path = os.path.join(cwd, "updateinfo.xml.gz")
    mod_yml_path = os.path.join(cwd, "modules.yaml")
    comps_xml_path = os.path.join(cwd, "comps.xml")

    pri_xml = cr.PrimaryXmlFile(pri_xml_path)
    fil_xml = cr.FilelistsXmlFile(fil_xml_path)
    oth_xml = cr.OtherXmlFile(oth_xml_path)
    upd_xml = cr.UpdateInfoXmlFile(upd_xml_path)

    if publication.sqlite_metadata:
        pri_db_path = os.path.join(cwd, "primary.sqlite")
        fil_db_path = os.path.join(cwd, "filelists.sqlite")
        oth_db_path = os.path.join(cwd, "other.sqlite")
        pri_db = cr.PrimarySqlite(pri_db_path)
        fil_db = cr.FilelistsSqlite(fil_db_path)
        oth_db = cr.OtherSqlite(oth_db_path)

    packages = Package.objects.filter(pk__in=content)
    total_packages = packages.count()

    pri_xml.set_num_of_pkgs(total_packages)
    fil_xml.set_num_of_pkgs(total_packages)
    oth_xml.set_num_of_pkgs(total_packages)

    # We want to support publishing with a different checksum type than the one built-in to the
    # package itself, so we need to get the correct checksums somehow if there is an override.
    # We must also take into consideration that if the package has not been downloaded the only
    # checksum that is available is the one built-in.
    #
    # Since this lookup goes from Package->Content->ContentArtifact->Artifact, performance is a
    # challenge. We use ContentArtifact as our starting point because it enables us to work with
    # simple foreign keys and avoid messing with the many-to-many relationship, which doesn't
    # work with select_related() and performs poorly with prefetch_related(). This is fine
    # because we know that Packages should only ever have one artifact per content.
    contentartifact_qs = (
        ContentArtifact.objects.filter(content__in=packages.only("pk")).
        select_related(
            # content__rpm_package is a bit of a hack, exploiting the way django sets up model
            # inheritance, but it works and is unlikely to break. All content artifacts being
            # accessed here have an associated Package since they originally came from the
            # Package queryset.
            "artifact",
            "content__rpm_package",
        ).only("artifact", "content__rpm_package__checksum_type",
               "content__rpm_package__pkgId"))

    pkg_to_hash = {}
    for ca in contentartifact_qs.iterator():
        pkgid = None
        if package_checksum_type:
            package_checksum_type = package_checksum_type.lower()
            pkgid = getattr(ca.artifact, package_checksum_type, None)
        if pkgid:
            pkg_to_hash[ca.content_id] = (package_checksum_type, pkgid)
        else:
            pkg_to_hash[ca.content_id] = (
                ca.content.rpm_package.checksum_type,
                ca.content.rpm_package.pkgId,
            )

    # Process all packages
    for package in packages.iterator():
        pkg = package.to_createrepo_c()

        # rewrite the checksum and checksum type with the desired ones
        (checksum, pkgId) = pkg_to_hash[package.pk]
        pkg.checksum_type = checksum
        pkg.pkgId = pkgId

        pkg_filename = os.path.basename(package.location_href)
        # this can cause an issue when two same RPM package names appears
        # a/name1.rpm b/name1.rpm
        pkg.location_href = os.path.join(PACKAGES_DIRECTORY,
                                         pkg_filename[0].lower(), pkg_filename)
        pri_xml.add_pkg(pkg)
        fil_xml.add_pkg(pkg)
        oth_xml.add_pkg(pkg)
        if publication.sqlite_metadata:
            pri_db.add_pkg(pkg)
            fil_db.add_pkg(pkg)
            oth_db.add_pkg(pkg)

    # Process update records
    for update_record in UpdateRecord.objects.filter(
            pk__in=content).iterator():
        upd_xml.add_chunk(
            cr.xml_dump_updaterecord(update_record.to_createrepo_c()))

    # Process modulemd and modulemd_defaults
    with open(mod_yml_path, "ab") as mod_yml:
        for modulemd in Modulemd.objects.filter(pk__in=content).iterator():
            mod_yml.write(modulemd._artifacts.get().file.read())
            has_modules = True
        for default in ModulemdDefaults.objects.filter(
                pk__in=content).iterator():
            mod_yml.write(default._artifacts.get().file.read())
            has_modules = True

    # Process comps
    comps = libcomps.Comps()
    for pkg_grp in PackageGroup.objects.filter(pk__in=content).iterator():
        group = pkg_grp.pkg_grp_to_libcomps()
        comps.groups.append(group)
        has_comps = True
    for pkg_cat in PackageCategory.objects.filter(pk__in=content).iterator():
        cat = pkg_cat.pkg_cat_to_libcomps()
        comps.categories.append(cat)
        has_comps = True
    for pkg_env in PackageEnvironment.objects.filter(
            pk__in=content).iterator():
        env = pkg_env.pkg_env_to_libcomps()
        comps.environments.append(env)
        has_comps = True
    for pkg_lng in PackageLangpacks.objects.filter(pk__in=content).iterator():
        comps.langpacks = dict_to_strdict(pkg_lng.matches)
        has_comps = True

    comps.toxml_f(
        comps_xml_path,
        xml_options={
            "default_explicit": True,
            "empty_groups": True,
            "uservisible_explicit": True
        },
    )

    pri_xml.close()
    fil_xml.close()
    oth_xml.close()
    upd_xml.close()

    repomd = cr.Repomd()

    if publication.sqlite_metadata:
        repomdrecords = [
            ("primary", pri_xml_path, pri_db),
            ("filelists", fil_xml_path, fil_db),
            ("other", oth_xml_path, oth_db),
            ("primary_db", pri_db_path, None),
            ("filelists_db", fil_db_path, None),
            ("other_db", oth_db_path, None),
            ("updateinfo", upd_xml_path, None),
        ]
    else:
        repomdrecords = [
            ("primary", pri_xml_path, None),
            ("filelists", fil_xml_path, None),
            ("other", oth_xml_path, None),
            ("updateinfo", upd_xml_path, None),
        ]

    if has_modules:
        repomdrecords.append(("modules", mod_yml_path, None))

    if has_comps:
        repomdrecords.append(("group", comps_xml_path, None))

    repomdrecords.extend(extra_repomdrecords)

    sqlite_files = ("primary_db", "filelists_db", "other_db")
    for name, path, db_to_update in repomdrecords:
        record = cr.RepomdRecord(name, path)
        checksum_type = get_checksum_type(name, checksum_types)
        if name in sqlite_files:
            record_bz = record.compress_and_fill(checksum_type, cr.BZ2)
            record_bz.type = name
            record_bz.rename_file()
            path = record_bz.location_href.split("/")[-1]
            repomd.set_record(record_bz)
        else:
            record.fill(checksum_type)
            if db_to_update:
                db_to_update.dbinfo_update(record.checksum)
                db_to_update.close()
            record.rename_file()
            path = record.location_href.split("/")[-1]
            repomd.set_record(record)

        if sub_folder:
            path = os.path.join(sub_folder, path)

        PublishedMetadata.create_from_file(
            relative_path=os.path.join(repodata_path, os.path.basename(path)),
            publication=publication,
            file=File(open(path, "rb")),
        )

    with open(repomd_path, "w") as repomd_f:
        repomd_f.write(repomd.xml_dump())

    if metadata_signing_service:
        signing_service = AsciiArmoredDetachedSigningService.objects.get(
            pk=metadata_signing_service.pk)
        sign_results = signing_service.sign(repomd_path)

        # publish a signed file
        PublishedMetadata.create_from_file(
            relative_path=os.path.join(repodata_path,
                                       os.path.basename(sign_results["file"])),
            publication=publication,
            file=File(open(sign_results["file"], "rb")),
        )

        # publish a detached signature
        PublishedMetadata.create_from_file(
            relative_path=os.path.join(
                repodata_path, os.path.basename(sign_results["signature"])),
            publication=publication,
            file=File(open(sign_results["signature"], "rb")),
        )

        # publish a public key required for further verification
        PublishedMetadata.create_from_file(
            relative_path=os.path.join(repodata_path,
                                       os.path.basename(sign_results["key"])),
            publication=publication,
            file=File(open(sign_results["key"], "rb")),
        )
    else:
        PublishedMetadata.create_from_file(
            relative_path=os.path.join(repodata_path,
                                       os.path.basename(repomd_path)),
            publication=publication,
            file=File(open(repomd_path, "rb")),
        )
Esempio n. 26
0
def create_repomd_xml(content, publication, extra_repomdrecords, sub_folder=None):
    """
    Creates a repomd.xml file.

    Args:
        content(app.models.Content): content set
        publication(pulpcore.plugin.models.Publication): the publication
        extra_repomdrecords(list): list with data relative to repo metadata files
        sub_folder(str): name of the folder for sub repos

    """
    cwd = os.getcwd()
    repodata_path = REPODATA_PATH
    has_modules = False
    has_comps = False

    if sub_folder:
        cwd = os.path.join(cwd, sub_folder)
        repodata_path = os.path.join(sub_folder, repodata_path)

    # Prepare metadata files
    repomd_path = os.path.join(cwd, "repomd.xml")
    pri_xml_path = os.path.join(cwd, "primary.xml.gz")
    fil_xml_path = os.path.join(cwd, "filelists.xml.gz")
    oth_xml_path = os.path.join(cwd, "other.xml.gz")
    pri_db_path = os.path.join(cwd, "primary.sqlite")
    fil_db_path = os.path.join(cwd, "filelists.sqlite")
    oth_db_path = os.path.join(cwd, "other.sqlite")
    upd_xml_path = os.path.join(cwd, "updateinfo.xml.gz")
    mod_yml_path = os.path.join(cwd, "modules.yaml")
    comps_xml_path = os.path.join(cwd, "comps.xml")

    pri_xml = cr.PrimaryXmlFile(pri_xml_path)
    fil_xml = cr.FilelistsXmlFile(fil_xml_path)
    oth_xml = cr.OtherXmlFile(oth_xml_path)
    pri_db = cr.PrimarySqlite(pri_db_path)
    fil_db = cr.FilelistsSqlite(fil_db_path)
    oth_db = cr.OtherSqlite(oth_db_path)
    upd_xml = cr.UpdateInfoXmlFile(upd_xml_path)

    packages = Package.objects.filter(pk__in=content)
    total_packages = packages.count()

    pri_xml.set_num_of_pkgs(total_packages)
    fil_xml.set_num_of_pkgs(total_packages)
    oth_xml.set_num_of_pkgs(total_packages)

    # Process all packages
    for package in packages.iterator():
        pkg = package.to_createrepo_c()
        pkg.location_href = package.contentartifact_set.only('relative_path').first().relative_path
        pri_xml.add_pkg(pkg)
        fil_xml.add_pkg(pkg)
        oth_xml.add_pkg(pkg)
        pri_db.add_pkg(pkg)
        fil_db.add_pkg(pkg)
        oth_db.add_pkg(pkg)

    # Process update records
    for update_record in UpdateRecord.objects.filter(pk__in=content).iterator():
        upd_xml.add_chunk(cr.xml_dump_updaterecord(update_record.to_createrepo_c()))

    # Process modulemd and modulemd_defaults
    with open(mod_yml_path, 'ab') as mod_yml:
        for modulemd in Modulemd.objects.filter(pk__in=content).iterator():
            mod_yml.write(modulemd._artifacts.get().file.read())
            has_modules = True
        for default in ModulemdDefaults.objects.filter(pk__in=content).iterator():
            mod_yml.write(default._artifacts.get().file.read())
            has_modules = True

    # Process comps
    comps = libcomps.Comps()
    for pkg_grp in PackageGroup.objects.filter(pk__in=content).iterator():
        group = pkg_grp.pkg_grp_to_libcomps()
        comps.groups.append(group)
        has_comps = True
    for pkg_cat in PackageCategory.objects.filter(pk__in=content).iterator():
        cat = pkg_cat.pkg_cat_to_libcomps()
        comps.categories.append(cat)
        has_comps = True
    for pkg_env in PackageEnvironment.objects.filter(pk__in=content).iterator():
        env = pkg_env.pkg_env_to_libcomps()
        comps.environments.append(env)
        has_comps = True
    for pkg_lng in PackageLangpacks.objects.filter(pk__in=content).iterator():
        comps.langpacks = dict_to_strdict(pkg_lng.matches)
        has_comps = True

    comps.toxml_f(comps_xml_path, xml_options={"default_explicit": True,
                                               "empty_groups": True,
                                               "uservisible_explicit": True})

    pri_xml.close()
    fil_xml.close()
    oth_xml.close()
    upd_xml.close()

    repomd = cr.Repomd()

    repomdrecords = [("primary", pri_xml_path, pri_db),
                     ("filelists", fil_xml_path, fil_db),
                     ("other", oth_xml_path, oth_db),
                     ("primary_db", pri_db_path, None),
                     ("filelists_db", fil_db_path, None),
                     ("other_db", oth_db_path, None),
                     ("updateinfo", upd_xml_path, None)]

    if has_modules:
        repomdrecords.append(("modules", mod_yml_path, None))

    if has_comps:
        repomdrecords.append(("group", comps_xml_path, None))

    repomdrecords.extend(extra_repomdrecords)

    sqlite_files = ("primary_db", "filelists_db", "other_db")
    for name, path, db_to_update in repomdrecords:
        record = cr.RepomdRecord(name, path)
        if name in sqlite_files:
            record_bz = record.compress_and_fill(cr.SHA256, cr.BZ2)
            record_bz.type = name
            record_bz.rename_file()
            path = record_bz.location_href.split('/')[-1]
            repomd.set_record(record_bz)
        else:
            record.fill(cr.SHA256)
            if (db_to_update):
                db_to_update.dbinfo_update(record.checksum)
                db_to_update.close()
            record.rename_file()
            path = record.location_href.split('/')[-1]
            repomd.set_record(record)

        if sub_folder:
            path = os.path.join(sub_folder, path)

        PublishedMetadata.create_from_file(
            relative_path=os.path.join(repodata_path, os.path.basename(path)),
            publication=publication,
            file=File(open(path, 'rb'))
        )

    with open(repomd_path, "w") as repomd_f:
        repomd_f.write(repomd.xml_dump())

    PublishedMetadata.create_from_file(
        relative_path=os.path.join(repodata_path, os.path.basename(repomd_path)),
        publication=publication,
        file=File(open(repomd_path, 'rb'))
    )
Esempio n. 27
0
def cr_create_md(repodata_path, pkglist=None, log=sys.stdout):
    if pkglist is None:
        pkglist = cr_get_pkg_list(repo_base, log)

    pri_xml_path = os.path.join(repodata_path, 'primary.xml.gz')
    fil_xml_path = os.path.join(repodata_path, 'filelists.xml.gz')
    oth_xml_path = os.path.join(repodata_path, 'other.xml.gz')
    pri_db_path = os.path.join(repodata_path, 'primary.sqlite')
    fil_db_path = os.path.join(repodata_path, 'filelists.sqlite')
    oth_db_path = os.path.join(repodata_path, 'other.sqlite')

    def __create_xml(queues, xml_path, xml_func, name):
        cs = cr.ContentStat(cr.SHA256)
        xml = xml_func(xml_path, contentstat=cs)

        xml.set_num_of_pkgs(len(pkglist))

        for pkg in pkglist:
            xml.add_pkg(pkg)

        xml.close()

        queues['master'].put(
            ((name, xml_path), (cs.checksum, cs.size, cs.checksum_type)), True)

    def __create_db(queues, db_path, db_func, name):
        db = db_func(db_path)

        for pkg in pkglist:
            db.add_pkg(pkg)

        db.dbinfo_update(queues[name].get(True))

        db.close()

        cs = cr.ContentStat(cr.SHA256)
        cr.compress_file_with_stat(
            db_path, db_path + cr.compression_suffix(cr.BZ2_COMPRESSION),
            cr.BZ2_COMPRESSION, cs)
        os.remove(db_path)
        queues['master'].put(
            ((name + '_db',
              db_path + cr.compression_suffix(cr.BZ2_COMPRESSION)),
             (cs.checksum, cs.size, cs.checksum_type)), True)

    queue_manager = multiprocessing.Manager()
    queues = dict({
        'master': queue_manager.Queue(),
        'primary': queue_manager.Queue(),
        'filelists': queue_manager.Queue(),
        'other': queue_manager.Queue(),
    })

    log.write('[%s] Generating metadata in %s\n' % (stamp(), repodata_path))

    th = [0] * 6
    th[0] = multiprocessing.Process(target=__create_xml,
                                    args=(queues, pri_xml_path,
                                          cr.PrimaryXmlFile, 'primary'))
    th[0].start()
    th[1] = multiprocessing.Process(target=__create_xml,
                                    args=(queues, fil_xml_path,
                                          cr.FilelistsXmlFile, 'filelists'))
    th[1].start()
    th[2] = multiprocessing.Process(target=__create_xml,
                                    args=(queues, oth_xml_path,
                                          cr.OtherXmlFile, 'other'))
    th[2].start()
    th[3] = multiprocessing.Process(target=__create_db,
                                    args=(queues, pri_db_path,
                                          cr.PrimarySqlite, 'primary'))
    th[3].start()
    th[4] = multiprocessing.Process(target=__create_db,
                                    args=(queues, fil_db_path,
                                          cr.FilelistsSqlite, 'filelists'))
    th[4].start()
    th[5] = multiprocessing.Process(target=__create_db,
                                    args=(queues, oth_db_path, cr.OtherSqlite,
                                          'other'))
    th[5].start()

    repomd = cr.Repomd()

    data_files = set()
    for i in range(0, 6):
        rf = queues['master'].get(True)
        r = cr.RepomdRecord(*rf[0])
        r.checksum_open_type = cr.checksum_name_str(rf[1][2])
        r.checksum_open = rf[1][0]
        r.size_open = rf[1][1]
        r.fill(cr.SHA256)
        if not rf[0][0].endswith('_db'):
            queues[rf[0][0]].put(r.checksum, True)
        r.rename_file()
        r.location_href = os.path.join('repodata',
                                       os.path.basename(r.location_href))
        data_files.add(r.location_real)
        repomd.set_record(r)

    for t in th:
        t.join()

    repomd.sort_records()
    return (repomd.xml_dump(), data_files)
Esempio n. 28
0
def create_rempomd_xml(packages,
                       publication,
                       extra_repomdrecords,
                       sub_folder=None):
    """
    Creates a repomd.xml file.

    Args:
        packages(app.models.Package): set of packages
        publication(pulpcore.plugin.models.Publication): the publication
        extra_repomdrecords(list): list with data relative to repo metadata files
        sub_folder(str): name of the folder for sub repos

    """
    cwd = os.getcwd()
    repodata_path = REPODATA_PATH
    has_modules = False

    if sub_folder:
        cwd = os.path.join(cwd, sub_folder)
        repodata_path = os.path.join(sub_folder, repodata_path)

    # Prepare metadata files
    repomd_path = os.path.join(cwd, "repomd.xml")
    pri_xml_path = os.path.join(cwd, "primary.xml.gz")
    fil_xml_path = os.path.join(cwd, "filelists.xml.gz")
    oth_xml_path = os.path.join(cwd, "other.xml.gz")
    pri_db_path = os.path.join(cwd, "primary.sqlite")
    fil_db_path = os.path.join(cwd, "filelists.sqlite")
    oth_db_path = os.path.join(cwd, "other.sqlite")
    upd_xml_path = os.path.join(cwd, "updateinfo.xml.gz")
    mod_yml_path = os.path.join(cwd, "modules.yaml")

    pri_xml = cr.PrimaryXmlFile(pri_xml_path)
    fil_xml = cr.FilelistsXmlFile(fil_xml_path)
    oth_xml = cr.OtherXmlFile(oth_xml_path)
    pri_db = cr.PrimarySqlite(pri_db_path)
    fil_db = cr.FilelistsSqlite(fil_db_path)
    oth_db = cr.OtherSqlite(oth_db_path)
    upd_xml = cr.UpdateInfoXmlFile(upd_xml_path)

    pri_xml.set_num_of_pkgs(len(packages))
    fil_xml.set_num_of_pkgs(len(packages))
    oth_xml.set_num_of_pkgs(len(packages))

    # Process all packages
    for package in packages:
        pkg = package.to_createrepo_c()
        pkg.location_href = package.contentartifact_set.first().relative_path
        pri_xml.add_pkg(pkg)
        fil_xml.add_pkg(pkg)
        oth_xml.add_pkg(pkg)
        pri_db.add_pkg(pkg)
        fil_db.add_pkg(pkg)
        oth_db.add_pkg(pkg)

    # Process update records
    for update_record in UpdateRecord.objects.filter(
            pk__in=publication.repository_version.content):
        upd_xml.add_chunk(update_record_xml(update_record))

    # Process modulemd and modulemd_defaults
    with open(mod_yml_path, 'ab') as mod_yml:
        for modulemd in Modulemd.objects.filter(
                pk__in=publication.repository_version.content):
            mod_yml.write(modulemd._artifacts.get().file.read())
            has_modules = True
        for default in ModulemdDefaults.objects.filter(
                pk__in=publication.repository_version.content):
            mod_yml.write(default._artifacts.get().file.read())
            has_modules = True

    pri_xml.close()
    fil_xml.close()
    oth_xml.close()
    upd_xml.close()

    repomd = cr.Repomd()

    repomdrecords = [("primary", pri_xml_path, pri_db),
                     ("filelists", fil_xml_path, fil_db),
                     ("other", oth_xml_path, oth_db),
                     ("primary_db", pri_db_path, None),
                     ("filelists_db", fil_db_path, None),
                     ("other_db", oth_db_path, None),
                     ("updateinfo", upd_xml_path, None)]

    if has_modules:
        repomdrecords.append(("modules", mod_yml_path, None))

    repomdrecords.extend(extra_repomdrecords)

    sqlite_files = ("primary_db", "filelists_db", "other_db")
    for name, path, db_to_update in repomdrecords:
        record = cr.RepomdRecord(name, path)
        if name in sqlite_files:
            record_bz = record.compress_and_fill(cr.SHA256, cr.BZ2)
            record_bz.type = name
            record_bz.rename_file()
            path = record_bz.location_href.split('/')[-1]
            repomd.set_record(record_bz)
        elif name == "modules":
            record_md = record.compress_and_fill(cr.SHA256, cr.GZ)
            record_md.type = name
            record_md.rename_file()
            path = record_md.location_href.split('/')[-1]
            repomd.set_record(record_md)
        else:
            record.fill(cr.SHA256)
            if (db_to_update):
                db_to_update.dbinfo_update(record.checksum)
                db_to_update.close()
            record.rename_file()
            path = record.location_href.split('/')[-1]
            repomd.set_record(record)

        if sub_folder:
            path = os.path.join(sub_folder, path)

        PublishedMetadata.create_from_file(relative_path=os.path.join(
            repodata_path, os.path.basename(path)),
                                           publication=publication,
                                           file=File(open(path, 'rb')))

    with open(repomd_path, "w") as repomd_f:
        repomd_f.write(repomd.xml_dump())

    PublishedMetadata.create_from_file(relative_path=os.path.join(
        repodata_path, os.path.basename(repomd_path)),
                                       publication=publication,
                                       file=File(open(repomd_path, 'rb')))
Esempio n. 29
0
    async def run(self):
        """
        Build `DeclarativeContent` from the repodata.
        """
        packages_pb = ProgressBar(message='Parsed Packages')
        erratum_pb = ProgressBar(message='Parsed Erratum')

        packages_pb.save()
        erratum_pb.save()

        with ProgressBar(message='Downloading Metadata Files') as metadata_pb:
            downloader = self.remote.get_downloader(
                url=urljoin(self.remote.url, 'repodata/repomd.xml'))
            # TODO: decide how to distinguish between a mirror list and a normal repo
            result = await downloader.run()
            metadata_pb.increment()

            repomd_path = result.path
            repomd = cr.Repomd(repomd_path)
            package_repodata_urls = {}
            downloaders = []

            for record in repomd.records:
                if record.type in PACKAGE_REPODATA:
                    package_repodata_urls[record.type] = urljoin(
                        self.remote.url, record.location_href)
                elif record.type in UPDATE_REPODATA:
                    updateinfo_url = urljoin(self.remote.url,
                                             record.location_href)
                    downloader = self.remote.get_downloader(url=updateinfo_url)
                    downloaders.append([downloader.run()])
                else:
                    log.info(
                        _('Unknown repodata type: {t}. Skipped.').format(
                            t=record.type))
                    # TODO: skip databases, save unknown types to publish them as-is

            # to preserve order, downloaders are created after all repodata urls are identified
            package_repodata_downloaders = []
            for repodata_type in PACKAGE_REPODATA:
                downloader = self.remote.get_downloader(
                    url=package_repodata_urls[repodata_type])
                package_repodata_downloaders.append(downloader.run())

            downloaders.append(package_repodata_downloaders)

            # asyncio.gather is used to preserve the order of results for package repodata
            pending = [
                asyncio.gather(*downloaders_group)
                for downloaders_group in downloaders
            ]

            while pending:
                done, pending = await asyncio.wait(
                    pending, return_when=asyncio.FIRST_COMPLETED)
                for downloader in done:
                    results = downloader.result()
                    if results[0].url == package_repodata_urls['primary']:
                        primary_xml_path = results[0].path
                        filelists_xml_path = results[1].path
                        other_xml_path = results[2].path
                        metadata_pb.done += 3
                        metadata_pb.save()

                        packages = await RpmFirstStage.parse_repodata(
                            primary_xml_path, filelists_xml_path,
                            other_xml_path)
                        packages_pb.total = len(packages)
                        packages_pb.state = 'running'
                        packages_pb.save()

                        for pkg in packages.values():
                            package = Package(
                                **Package.createrepo_to_dict(pkg))
                            artifact = Artifact(size=package.size_package)
                            checksum_type = getattr(
                                CHECKSUM_TYPES, package.checksum_type.upper())
                            setattr(artifact, checksum_type, package.pkgId)
                            url = urljoin(self.remote.url,
                                          package.location_href)
                            filename = os.path.basename(package.location_href)
                            da = DeclarativeArtifact(
                                artifact=artifact,
                                url=url,
                                relative_path=filename,
                                remote=self.remote,
                                deferred_download=self.deferred_download)
                            dc = DeclarativeContent(content=package,
                                                    d_artifacts=[da])
                            packages_pb.increment()
                            await self.put(dc)

                    elif results[0].url == updateinfo_url:
                        updateinfo_xml_path = results[0].path
                        metadata_pb.increment()

                        updates = await RpmFirstStage.parse_updateinfo(
                            updateinfo_xml_path)

                        erratum_pb.total = len(updates)
                        erratum_pb.state = 'running'
                        erratum_pb.save()

                        for update in updates:
                            update_record = UpdateRecord(
                                **UpdateRecord.createrepo_to_dict(update))
                            update_record.digest = RpmFirstStage.hash_update_record(
                                update)
                            future_relations = {
                                'collections': defaultdict(list),
                                'references': []
                            }

                            for collection in update.collections:
                                coll_dict = UpdateCollection.createrepo_to_dict(
                                    collection)
                                coll = UpdateCollection(**coll_dict)

                                for package in collection.packages:
                                    pkg_dict = UpdateCollectionPackage.createrepo_to_dict(
                                        package)
                                    pkg = UpdateCollectionPackage(**pkg_dict)
                                    future_relations['collections'][
                                        coll].append(pkg)

                            for reference in update.references:
                                reference_dict = UpdateReference.createrepo_to_dict(
                                    reference)
                                ref = UpdateReference(**reference_dict)
                                future_relations['references'].append(ref)

                            erratum_pb.increment()
                            dc = DeclarativeContent(content=update_record)
                            dc.extra_data = future_relations
                            await self.put(dc)

        packages_pb.state = 'completed'
        erratum_pb.state = 'completed'
        packages_pb.save()
        erratum_pb.save()
Esempio n. 30
0
def oneshot_callback():
    """Parse one file at a time into a set of packages.

    Use of this method is discouraged.

    newpkgcb
    --------
    Via newpkgcb (Package callback) you could directly
    affect if the current package element should be parsed
    or not. This decision could be based on
    three values that are available as attributtes
    in the <package> element. This values are:
     - pkgId (package checksum)
     - name (package name)
     - arch (package architecture)
    (Note: This is applicable only for filelists.xml and other.xml,
     primary.xml doesn't contain this information in <package> element)

    If newpkgcb returns a package object, the parsed data
    will be loaded to this package object. If it returns a None,
    package element is skiped.

    This could help you to reduce a memory requirements because
    non wanted packages could be skiped without need to
    store them into the memory.

    If no newpkgcb is specified, default callback returning
    a new package object is used.

    pkgcb
    -----
    Callback called when a <package> element parsing is done.
    Its argument is a package object that has been previously
    returned by the newpkgcb.
    This function should return True if parsing should continue
    or False if parsing should be interrupted.

    Note: Both callbacks are optional, BUT at least one
          MUST be used (newpkgcb or pkgcb)!

    warningcb
    ---------
    Warning callbacks is called when a non-fatal oddity of prased XML
    is detected.
    If True is returned, parsing continues. If return value is False,
    parsing is terminated.
    This callback is optional.
    """

    primary_xml_path = None
    filelists_xml_path = None
    other_xml_path = None

    #
    # repomd.xml parsing
    #

    # Parse repomd.xml to get paths (1. Method - Repomd object based)
    #   Pros: Easy to use
    repomd = cr.Repomd(os.path.join(REPO_PATH, "repodata/repomd.xml"))

    # Parse repomd.xml (2. Method - Parser based)
    #   Pros: Warning callback could be specified
    def warningcb(warning_type, message):
        """Optional callback for warnings about
        wierd stuff and formatting in XML.

        :param warning_type: Integer value. One from
                             the XML_WARNING_* constants.
        :param message: String message.
        """
        print("PARSER WARNING: %s" % message)
        return True

    repomd2 = cr.Repomd()
    cr.xml_parse_repomd(os.path.join(REPO_PATH, "repodata/repomd.xml"),
                        repomd2, warningcb)

    # Get stuff we need
    #   (repomd or repomd2 could be used, both have the same values)
    for record in repomd.records:
        if record.type == "primary":
            primary_xml_path = record.location_href
        elif record.type == "filelists":
            filelists_xml_path = record.location_href
        elif record.type == "other":
            other_xml_path = record.location_href

    #
    # Main XML metadata parsing (primary, filelists, other)
    #

    packages = {}

    def pkgcb(pkg):
        # Called when whole package entry in xml is parsed
        packages[pkg.pkgId] = pkg

    def newpkgcb(pkgId, name, arch):
        # Called when new package entry is encountered
        # And only opening <package> element is parsed
        # This function has to return a package to which
        # parsed data will be added or None if this package
        # should be skiped.
        return packages.get(pkgId, None)

    # Option do_files tells primary parser to skip <file> element of package.
    # If you plan to parse filelists.xml after the primary.xml, always
    # set do_files to False.
    cr.xml_parse_primary(os.path.join(REPO_PATH, primary_xml_path),
                         pkgcb=pkgcb,
                         do_files=False,
                         warningcb=warningcb)

    cr.xml_parse_filelists(os.path.join(REPO_PATH, filelists_xml_path),
                           newpkgcb=newpkgcb,
                           warningcb=warningcb)

    cr.xml_parse_other(os.path.join(REPO_PATH, other_xml_path),
                       newpkgcb=newpkgcb,
                       warningcb=warningcb)

    for pkg in packages.values():
        print_package_info(pkg)