Esempio n. 1
0
    def test_xml_parser_filelists_warnings(self):

        userdata = {
                "pkgs": [],
                "warnings": []
            }

        def newpkgcb(pkgId, name, arch):
            pkg = cr.Package()
            userdata["pkgs"].append(pkg)
            return pkg

        def warningcb(warn_type, msg):
            userdata["warnings"].append((warn_type, msg))

        cr.xml_parse_filelists(FILELISTS_MULTI_WARN_00_PATH,
                               newpkgcb,
                               None,
                               warningcb)

        self.assertEqual([pkg.name for pkg in userdata["pkgs"]],
            ['fake_bash', 'super_kernel'])
        self.assertEqual(userdata["warnings"],
            [(1, 'Missing attribute "arch" of a package element'),
             (2, 'Unknown file type "xxx"'),
             (0, 'Unknown element "bar"')])
Esempio n. 2
0
    def test_xml_parser_filelists_repo01(self):

        userdata = {
                "pkgs": [],
                "pkgcb_calls": 0,
                "warnings": []
            }

        def newpkgcb(pkgId, name, arch):
            pkg = cr.Package()
            userdata["pkgs"].append(pkg)
            return pkg

        def pkgcb(pkg):
            userdata["pkgcb_calls"] += 1

        def warningcb(warn_type, msg):
            userdata["warnings"].append((warn_type, msg))

        cr.xml_parse_filelists(REPO_01_FILXML, newpkgcb, pkgcb, warningcb)

        self.assertEqual([pkg.name for pkg in userdata["pkgs"]],
            ['super_kernel'])
        self.assertEqual(userdata["pkgcb_calls"], 1)
        self.assertEqual(userdata["warnings"], [])

        pkg = userdata["pkgs"][0]
        self.assertEqual(pkg.pkgId, "152824bff2aa6d54f429d43e87a3ff3a0286505c6d93ec87692b5e3a9e3b97bf")
        self.assertEqual(pkg.name, "super_kernel")
        self.assertEqual(pkg.arch, "x86_64")
        self.assertEqual(pkg.version, "6.0.1")
        self.assertEqual(pkg.epoch, "0")
        self.assertEqual(pkg.release, "2")
        self.assertEqual(pkg.summary, None)
        self.assertEqual(pkg.description, None)
        self.assertEqual(pkg.url, None)
        self.assertEqual(pkg.time_file, 0)
        self.assertEqual(pkg.time_build, 0)
        self.assertEqual(pkg.rpm_license, None)
        self.assertEqual(pkg.rpm_vendor, None)
        self.assertEqual(pkg.rpm_group, None)
        self.assertEqual(pkg.rpm_buildhost, None)
        self.assertEqual(pkg.rpm_sourcerpm, None)
        self.assertEqual(pkg.rpm_header_start, 0)
        self.assertEqual(pkg.rpm_header_end, 0)
        self.assertEqual(pkg.rpm_packager, None)
        self.assertEqual(pkg.size_package, 0)
        self.assertEqual(pkg.size_installed, 0)
        self.assertEqual(pkg.size_archive, 0)
        self.assertEqual(pkg.location_href, None)
        self.assertEqual(pkg.location_base, None)
        self.assertEqual(pkg.checksum_type, None)
        self.assertEqual(pkg.requires, [])
        self.assertEqual(pkg.provides, [])
        self.assertEqual(pkg.conflicts, [])
        self.assertEqual(pkg.obsoletes, [])
        self.assertEqual(pkg.files,
                [(None, '/usr/bin/', 'super_kernel'),
                 (None, '/usr/share/man/', 'super_kernel.8.gz')])
        self.assertEqual(pkg.changelogs, [])
Esempio n. 3
0
    def test_xml_parser_filelists_repo02(self):

        userdata = {
                "pkgs": [],
                "pkgcb_calls": 0,
                "warnings": []
            }

        def newpkgcb(pkgId, name, arch):
            pkg = cr.Package()
            userdata["pkgs"].append(pkg)
            return pkg

        def pkgcb(pkg):
            userdata["pkgcb_calls"] += 1

        def warningcb(warn_type, msg):
            userdata["warnings"].append((warn_type, msg))

        cr.xml_parse_filelists(REPO_02_FILXML, newpkgcb, pkgcb, warningcb)

        self.assertEqual([pkg.name for pkg in userdata["pkgs"]],
            ['fake_bash', 'super_kernel'])
        self.assertEqual(userdata["pkgcb_calls"], 2)
        self.assertEqual(userdata["warnings"], [])
Esempio n. 4
0
    def test_xml_parser_filelists_repo01(self):

        userdata = {
                "pkgs": [],
                "pkgcb_calls": 0,
                "warnings": []
            }

        def newpkgcb(pkgId, name, arch):
            pkg = cr.Package()
            userdata["pkgs"].append(pkg)
            return pkg

        def pkgcb(pkg):
            userdata["pkgcb_calls"] += 1

        def warningcb(warn_type, msg):
            userdata["warnings"].append((warn_type, msg))

        cr.xml_parse_filelists(REPO_01_FILXML, newpkgcb, pkgcb, warningcb)

        self.assertEqual([pkg.name for pkg in userdata["pkgs"]],
            ['super_kernel'])
        self.assertEqual(userdata["pkgcb_calls"], 1)
        self.assertEqual(userdata["warnings"], [])

        pkg = userdata["pkgs"][0]
        self.assertEqual(pkg.pkgId, "152824bff2aa6d54f429d43e87a3ff3a0286505c6d93ec87692b5e3a9e3b97bf")
        self.assertEqual(pkg.name, "super_kernel")
        self.assertEqual(pkg.arch, "x86_64")
        self.assertEqual(pkg.version, "6.0.1")
        self.assertEqual(pkg.epoch, "0")
        self.assertEqual(pkg.release, "2")
        self.assertEqual(pkg.summary, None)
        self.assertEqual(pkg.description, None)
        self.assertEqual(pkg.url, None)
        self.assertEqual(pkg.time_file, 0)
        self.assertEqual(pkg.time_build, 0)
        self.assertEqual(pkg.rpm_license, None)
        self.assertEqual(pkg.rpm_vendor, None)
        self.assertEqual(pkg.rpm_group, None)
        self.assertEqual(pkg.rpm_buildhost, None)
        self.assertEqual(pkg.rpm_sourcerpm, None)
        self.assertEqual(pkg.rpm_header_start, 0)
        self.assertEqual(pkg.rpm_header_end, 0)
        self.assertEqual(pkg.rpm_packager, None)
        self.assertEqual(pkg.size_package, 0)
        self.assertEqual(pkg.size_installed, 0)
        self.assertEqual(pkg.size_archive, 0)
        self.assertEqual(pkg.location_href, None)
        self.assertEqual(pkg.location_base, None)
        self.assertEqual(pkg.checksum_type, None)
        self.assertEqual(pkg.requires, [])
        self.assertEqual(pkg.provides, [])
        self.assertEqual(pkg.conflicts, [])
        self.assertEqual(pkg.obsoletes, [])
        self.assertEqual(pkg.files,
                [(None, '/usr/bin/', 'super_kernel'),
                 (None, '/usr/share/man/', 'super_kernel.8.gz')])
        self.assertEqual(pkg.changelogs, [])
Esempio n. 5
0
    def test_xml_parser_filelists_repo02(self):

        userdata = {
                "pkgs": [],
                "pkgcb_calls": 0,
                "warnings": []
            }

        def newpkgcb(pkgId, name, arch):
            pkg = cr.Package()
            userdata["pkgs"].append(pkg)
            return pkg

        def pkgcb(pkg):
            userdata["pkgcb_calls"] += 1

        def warningcb(warn_type, msg):
            userdata["warnings"].append((warn_type, msg))

        cr.xml_parse_filelists(REPO_02_FILXML, newpkgcb, pkgcb, warningcb)

        self.assertEqual([pkg.name for pkg in userdata["pkgs"]],
            ['fake_bash', 'super_kernel'])
        self.assertEqual(userdata["pkgcb_calls"], 2)
        self.assertEqual(userdata["warnings"], [])
Esempio n. 6
0
    def test_xml_parser_filelists_warnings(self):

        userdata = {
                "pkgs": [],
                "warnings": []
            }

        def newpkgcb(pkgId, name, arch):
            pkg = cr.Package()
            userdata["pkgs"].append(pkg)
            return pkg

        def warningcb(warn_type, msg):
            userdata["warnings"].append((warn_type, msg))

        cr.xml_parse_filelists(FILELISTS_MULTI_WARN_00_PATH,
                               newpkgcb,
                               None,
                               warningcb)

        self.assertEqual([pkg.name for pkg in userdata["pkgs"]],
            ['fake_bash', 'super_kernel'])
        self.assertEqual(userdata["warnings"],
            [(1, 'Missing attribute "arch" of a package element'),
             (2, 'Unknown file type "xxx"'),
             (0, 'Unknown element "bar"')])
Esempio n. 7
0
    async def parse_repodata(primary_xml_path, filelists_xml_path,
                             other_xml_path):
        """
        Parse repodata to extract package info.

        Args:
            primary_xml_path(str): a path to a downloaded primary.xml
            filelists_xml_path(str): a path to a downloaded filelists.xml
            other_xml_path(str): a path to a downloaded other.xml

        Returns:
            dict: createrepo_c package objects with the pkgId as a key

        """
        def pkgcb(pkg):
            """
            A callback which is used when a whole package entry in xml is parsed.

            Args:
                pkg(preaterepo_c.Package): a parsed metadata for a package

            """
            packages[pkg.pkgId] = pkg

        def newpkgcb(pkgId, name, arch):
            """
            A callback which is used when a new package entry is encountered.

            Only opening <package> element is parsed at that moment.
            This function has to return a package which parsed data will be added to
            or None if a package should be skipped.

            pkgId, name and arch of a package can be used to skip further parsing. Available
            only for filelists.xml and other.xml.

            Args:
                pkgId(str): pkgId of a package
                name(str): name of a package
                arch(str): arch of a package

            Returns:
                createrepo_c.Package: a package which parsed data should be added to.

                If None is returned, further parsing of a package will be skipped.

            """
            return packages.get(pkgId, None)

        packages = {}

        # TODO: handle parsing errors/warnings, warningcb callback can be used below
        cr.xml_parse_primary(primary_xml_path, pkgcb=pkgcb, do_files=False)
        cr.xml_parse_filelists(filelists_xml_path, newpkgcb=newpkgcb)
        cr.xml_parse_other(other_xml_path, newpkgcb=newpkgcb)
        return packages
Esempio n. 8
0
    async def parse_repodata(primary_xml_path, filelists_xml_path, other_xml_path):
        """
        Parse repodata to extract package info.

        Args:
            primary_xml_path(str): a path to a downloaded primary.xml
            filelists_xml_path(str): a path to a downloaded filelists.xml
            other_xml_path(str): a path to a downloaded other.xml

        Returns:
            dict: createrepo_c package objects with the pkgId as a key

        """
        def pkgcb(pkg):
            """
            A callback which is used when a whole package entry in xml is parsed.

            Args:
                pkg(preaterepo_c.Package): a parsed metadata for a package

            """
            packages[pkg.pkgId] = pkg

        def newpkgcb(pkgId, name, arch):
            """
            A callback which is used when a new package entry is encountered.

            Only opening <package> element is parsed at that moment.
            This function has to return a package which parsed data will be added to
            or None if a package should be skipped.

            pkgId, name and arch of a package can be used to skip further parsing. Available
            only for filelists.xml and other.xml.

            Args:
                pkgId(str): pkgId of a package
                name(str): name of a package
                arch(str): arch of a package

            Returns:
                createrepo_c.Package: a package which parsed data should be added to.

                If None is returned, further parsing of a package will be skipped.

            """
            return packages.get(pkgId, None)

        packages = {}

        # TODO: handle parsing errors/warnings, warningcb callback can be used below
        cr.xml_parse_primary(primary_xml_path, pkgcb=pkgcb, do_files=False)
        cr.xml_parse_filelists(filelists_xml_path, newpkgcb=newpkgcb)
        cr.xml_parse_other(other_xml_path, newpkgcb=newpkgcb)
        return packages
Esempio n. 9
0
    def test_xml_parser_filelists_repo02_only_pkgcb(self):

        pkgs = []

        def pkgcb(pkg):
            pkgs.append(pkg)

        cr.xml_parse_filelists(REPO_02_FILXML, None, pkgcb, None)

        self.assertEqual([pkg.name for pkg in pkgs],
                         ['fake_bash', 'super_kernel'])
Esempio n. 10
0
    def test_xml_parser_filelists_repo02_only_pkgcb(self):

        pkgs = []

        def pkgcb(pkg):
            pkgs.append(pkg)

        cr.xml_parse_filelists(REPO_02_FILXML, None, pkgcb, None)

        self.assertEqual([pkg.name for pkg in pkgs],
            ['fake_bash', 'super_kernel'])
Esempio n. 11
0
    def apply(self, metadata):
        # Check input arguments
        if "primary" not in metadata:
            self._error("primary.xml metadata file is missing")
            raise DeltaRepoPluginError("Primary metadata missing")

        gen_repomd_recs = []

        removed_packages = {}

        pri_md = metadata.get("primary")
        fil_md = metadata.get("filelists")
        oth_md = metadata.get("other")

        def try_simple_delta(md, dbclass):
            if not md:
                return

            notes = self._metadata_notes_from_plugin_bundle(md.metadata_type)
            if not notes:
                self._warning("Metadata \"{0}\" doesn't have a record in "
                              "deltametadata.xml - Ignoring")
                return True
            rc, rec = self._apply_basic_delta(md, notes)
            if not rc:
                return False
            if rec:
                gen_repomd_recs.append(rec)

            if not md.new_fn_exists:
                return True

            # Gen DB here
            if self.globalbundle.force_database or notes.get(
                    "database") == "1":
                rec = self._gen_db_from_xml(md)
                gen_repomd_recs.append(rec)

            return True

        # At first try to simple delta

        simple_pri_delta = try_simple_delta(pri_md, cr.PrimarySqlite)
        simple_fil_delta = try_simple_delta(fil_md, cr.FilelistsSqlite)
        simple_oth_delta = try_simple_delta(oth_md, cr.OtherSqlite)

        if simple_pri_delta:
            assert simple_fil_delta
            assert simple_oth_delta
            return gen_repomd_recs

        # Ignore already processed metadata
        if simple_fil_delta:
            fil_md = None
        if simple_oth_delta:
            oth_md = None

        # Make a dict of removed packages key is location_href,
        # value is location_base
        for record in self.pluginbundle.get_list("removedpackage", []):
            location_href = record.get("location_href")
            if not location_href:
                continue
            location_base = record.get("location_base")
            removed_packages[location_href] = location_base

        # Prepare output xml files and check if dbs should be generated
        # Note: This information are stored directly to the Metadata
        # object which someone could see as little hacky.
        def prepare_paths_in_metadata(md, xmlclass, dbclass):
            if md is None:
                return

            notes = self._metadata_notes_from_plugin_bundle(md.metadata_type)
            if not notes:
                # TODO: Add flag to ignore this kind of warnings (?)
                self._warning("Metadata \"{0}\" doesn't have a record in "
                              "deltametadata.xml - Ignoring")
                return

            suffix = cr.compression_suffix(md.compression_type) or ""
            md.new_fn = os.path.join(
                md.out_dir, "{0}.xml{1}".format(md.metadata_type, suffix))
            md.new_f_stat = cr.ContentStat(md.checksum_type)
            md.new_f = xmlclass(md.new_fn, md.compression_type, md.new_f_stat)

            if self.globalbundle.force_database or notes.get(
                    "database") == "1":
                md.db_fn = os.path.join(md.out_dir,
                                        "{0}.sqlite".format(md.metadata_type))
                md.db = dbclass(md.db_fn)
            else:
                md.db_fn = None
                md.db = None

        # Primary
        prepare_paths_in_metadata(pri_md, cr.PrimaryXmlFile, cr.PrimarySqlite)

        # Filelists
        prepare_paths_in_metadata(fil_md, cr.FilelistsXmlFile,
                                  cr.FilelistsSqlite)

        # Other
        prepare_paths_in_metadata(oth_md, cr.OtherXmlFile, cr.OtherSqlite)

        # Apply delta
        all_packages = {}  # dict { 'pkgId': pkg }

        old_contenthash_strings = []
        new_contenthash_strings = []

        def old_pkgcb(pkg):
            old_contenthash_strings.append(self._pkg_id_str(pkg))
            if pkg.location_href in removed_packages:
                if removed_packages[pkg.location_href] == pkg.location_base:
                    # This package won't be in new metadata
                    return
            new_contenthash_strings.append(self._pkg_id_str(pkg))
            all_packages[pkg.pkgId] = pkg

        def delta_pkgcb(pkg):
            new_contenthash_strings.append(self._pkg_id_str(pkg))
            all_packages[pkg.pkgId] = pkg

        filelists_from_primary = True
        if fil_md:
            filelists_from_primary = False

        # Parse both old and delta primary.xml files
        cr.xml_parse_primary(pri_md.old_fn,
                             pkgcb=old_pkgcb,
                             do_files=filelists_from_primary)
        cr.xml_parse_primary(pri_md.delta_fn,
                             pkgcb=delta_pkgcb,
                             do_files=filelists_from_primary)

        # Calculate content hashes
        h = hashlib.new(self.globalbundle.contenthash_type_str)
        old_contenthash_strings.sort()
        for i in old_contenthash_strings:
            h.update(i)
        self.globalbundle.calculated_old_contenthash = h.hexdigest()

        h = hashlib.new(self.globalbundle.contenthash_type_str)
        new_contenthash_strings.sort()
        for i in new_contenthash_strings:
            h.update(i)
        self.globalbundle.calculated_new_contenthash = h.hexdigest()

        # Sort packages
        def cmp_pkgs(x, y):
            # Compare only by filename
            ret = cmp(os.path.basename(x.location_href),
                      os.path.basename(y.location_href))
            if ret != 0:
                return ret

            # Compare by full location_href path
            return cmp(x.location_href, y.location_href)

        all_packages_sorted = sorted(all_packages.values(), cmp=cmp_pkgs)

        def newpkgcb(pkgId, name, arch):
            return all_packages.get(pkgId, None)

        # Parse filelists
        if fil_md:
            self._debug("Parsing filelists xmls")
            cr.xml_parse_filelists(fil_md.old_fn, newpkgcb=newpkgcb)
            cr.xml_parse_filelists(fil_md.delta_fn, newpkgcb=newpkgcb)

        if oth_md:
            self._debug("Parsing other xmls")
            cr.xml_parse_other(oth_md.old_fn, newpkgcb=newpkgcb)
            cr.xml_parse_other(oth_md.delta_fn, newpkgcb=newpkgcb)

        num_of_packages = len(all_packages_sorted)

        # Write out primary
        self._debug("Writing primary xml: {0}".format(pri_md.new_fn))
        pri_md.new_f.set_num_of_pkgs(num_of_packages)
        for pkg in all_packages_sorted:
            pri_md.new_f.add_pkg(pkg)
            if pri_md.db:
                pri_md.db.add_pkg(pkg)

        # Write out filelists
        if fil_md:
            self._debug("Writing filelists xml: {0}".format(fil_md.new_fn))
            fil_md.new_f.set_num_of_pkgs(num_of_packages)
            for pkg in all_packages_sorted:
                fil_md.new_f.add_pkg(pkg)
                if fil_md.db:
                    fil_md.db.add_pkg(pkg)

        # Write out other
        if oth_md:
            self._debug("Writing other xml: {0}".format(oth_md.new_fn))
            oth_md.new_f.set_num_of_pkgs(num_of_packages)
            for pkg in all_packages_sorted:
                oth_md.new_f.add_pkg(pkg)
                if oth_md.db:
                    oth_md.db.add_pkg(pkg)

        # Finish metadata
        def finish_metadata(md):
            if md is None:
                return

            # Close XML file
            md.new_f.close()

            # Prepare repomd record of xml file
            rec = cr.RepomdRecord(md.metadata_type, md.new_fn)
            rec.load_contentstat(md.new_f_stat)
            rec.fill(md.checksum_type)
            if self.globalbundle.unique_md_filenames:
                rec.rename_file()

            md.new_rec = rec
            md.new_fn_exists = True

            gen_repomd_recs.append(rec)

            # Prepare database
            if hasattr(md, "db") and md.db:
                self._debug("Generating database: {0}".format(md.db_fn))
                md.db.dbinfo_update(rec.checksum)
                md.db.close()
                db_stat = cr.ContentStat(md.checksum_type)
                db_compressed = md.db_fn + ".bz2"
                cr.compress_file(md.db_fn, None, cr.BZ2, db_stat)
                os.remove(md.db_fn)

                # Prepare repomd record of database file
                db_rec = cr.RepomdRecord("{0}_db".format(md.metadata_type),
                                         db_compressed)
                db_rec.load_contentstat(db_stat)
                db_rec.fill(md.checksum_type)
                if self.globalbundle.unique_md_filenames:
                    db_rec.rename_file()

                gen_repomd_recs.append(db_rec)

        # Add records to the bundle

        finish_metadata(pri_md)
        finish_metadata(fil_md)
        finish_metadata(oth_md)

        return gen_repomd_recs
Esempio n. 12
0
    def gen(self, metadata):
        # Check input arguments
        if "primary" not in metadata:
            self._error("primary.xml metadata file is missing")
            raise DeltaRepoPluginError("Primary metadata missing")

        gen_repomd_recs = []

        # Medadata info that will be persistently stored
        metadata_notes = {}

        pri_md = metadata.get("primary")
        fil_md = metadata.get("filelists")
        oth_md = metadata.get("other")

        def try_simple_delta(md, force_gen=False):
            """Try to do simple delta. If successful, return True"""
            rc, rec, notes = self._gen_basic_delta(md, force_gen=force_gen)
            if not rc:
                return False
            if rec:
                gen_repomd_recs.append(rec)
            if not notes:
                notes = {}
            if metadata.get(md.metadata_type+"_db").new_fn_exists:
                notes["database"] = "1"
            else:
                notes["database"] = "0"
            self._metadata_notes_to_plugin_bundle(md.metadata_type, notes)
            return True

        # At first try to do simple delta for primary
        # If successful, force simple delta for filelists and other too

        simple_pri_delta = try_simple_delta(pri_md)
        simple_fil_delta = try_simple_delta(fil_md, force_gen=simple_pri_delta)
        simple_oth_delta = try_simple_delta(oth_md, force_gen=simple_pri_delta)

        if simple_pri_delta:
            # Simple delta for primary means that simple deltas were done
            # for all other metadata too
            return gen_repomd_recs

        # At this point we know that simple delta for the primary wasn't done
        # This mean that at lest for primary, both metadata files (the new one
        # and the old one) exists, and we have to do a more sophisticated delta

        # Ignore files for which, the simple delta was successful
        if simple_fil_delta:
            fil_md = None
        if simple_oth_delta:
            oth_md = None

        # Prepare output xml files and check if dbs should be generated
        # Note: This information are stored directly to the Metadata
        # object which someone could see as little hacky.
        def prepare_paths_in_metadata(md, xmlclass):
            if md is None:
                return None

            # Make a note about if the database should be generated
            db_available = metadata.get(md.metadata_type+"_db").new_fn_exists
            if db_available or self.globalbundle.force_database:
                metadata_notes.setdefault(md.metadata_type, {})["database"] = "1"
            else:
                metadata_notes.setdefault(md.metadata_type, {})["database"] = "0"

            suffix = cr.compression_suffix(md.compression_type) or ""
            md.delta_fn = os.path.join(md.out_dir,
                                     "{0}.xml{1}".format(
                                     md.metadata_type, suffix))
            md.delta_f_stat = cr.ContentStat(md.checksum_type)
            md.delta_f = xmlclass(md.delta_fn,
                                  md.compression_type,
                                  md.delta_f_stat)
            return md

        # Primary
        pri_md = prepare_paths_in_metadata(pri_md, cr.PrimaryXmlFile)

        # Filelists
        fil_md = prepare_paths_in_metadata(fil_md, cr.FilelistsXmlFile)

        # Other
        oth_md = prepare_paths_in_metadata(oth_md, cr.OtherXmlFile)

        # Gen delta

        old_packages = set()
        added_packages = {}         # dict { 'pkgId': pkg }
        added_packages_ids = []     # list of package ids

        old_contenthash_strings = []
        new_contenthash_strings = []

        def old_pkgcb(pkg):
            old_packages.add(self._pkg_id_tuple(pkg))
            old_contenthash_strings.append(self._pkg_id_str(pkg))

        def new_pkgcb(pkg):
            new_contenthash_strings.append(self._pkg_id_str(pkg))
            pkg_id_tuple = self._pkg_id_tuple(pkg)
            if not pkg_id_tuple in old_packages:
                # This package is only in new repodata
                added_packages[pkg.pkgId] = pkg
                added_packages_ids.append(pkg.pkgId)
            else:
                # This package is also in the old repodata
                old_packages.remove(pkg_id_tuple)

        filelists_from_primary = True
        if fil_md:
            # Filelists will be parsed from filelists
            filelists_from_primary = False

        cr.xml_parse_primary(pri_md.old_fn, pkgcb=old_pkgcb, do_files=False)
        cr.xml_parse_primary(pri_md.new_fn, pkgcb=new_pkgcb,
                             do_files=filelists_from_primary)

        # Calculate content hashes
        h = hashlib.new(self.globalbundle.contenthash_type_str)
        old_contenthash_strings.sort()
        for i in old_contenthash_strings:
            h.update(i)
        src_contenthash = h.hexdigest()
        self.globalbundle.calculated_old_contenthash = src_contenthash

        h = hashlib.new(self.globalbundle.contenthash_type_str)
        new_contenthash_strings.sort()
        for i in new_contenthash_strings:
            h.update(i)
        dst_contenthash = h.hexdigest()
        self.globalbundle.calculated_new_contenthash = dst_contenthash

        # Set the content hashes to the plugin bundle
        self.pluginbundle.set("contenthash_type", self.globalbundle.contenthash_type_str)
        self.pluginbundle.set("src_contenthash", src_contenthash)
        self.pluginbundle.set("dst_contenthash", dst_contenthash)

        # Prepare list of removed packages
        removed_pkgs = sorted(old_packages)
        for _, location_href, location_base in removed_pkgs:
            dictionary = {"location_href": location_href}
            if location_base:
                dictionary["location_base"] = location_base
            self.pluginbundle.append("removedpackage", dictionary)

        num_of_packages = len(added_packages)

        # Filelists and Other cb
        def newpkgcb(pkgId, name, arch):
            return added_packages.get(pkgId, None)

        # Parse filelist.xml and write out its delta
        if fil_md:
            cr.xml_parse_filelists(fil_md.new_fn, newpkgcb=newpkgcb)
            fil_md.delta_f.set_num_of_pkgs(num_of_packages)
            for pkgid in added_packages_ids:
                fil_md.delta_f.add_pkg(added_packages[pkgid])
            fil_md.delta_f.close()

        # Parse other.xml and write out its delta
        if oth_md:
            cr.xml_parse_other(oth_md.new_fn, newpkgcb=newpkgcb)
            oth_md.delta_f.set_num_of_pkgs(num_of_packages)
            for pkgid in added_packages_ids:
                oth_md.delta_f.add_pkg(added_packages[pkgid])
            oth_md.delta_f.close()

        # Write out primary delta
        # Note: Writing of primary delta has to be after parsing of filelists
        # Otherwise cause missing files if filelists_from_primary was False
        pri_md.delta_f.set_num_of_pkgs(num_of_packages)
        for pkgid in added_packages_ids:
            pri_md.delta_f.add_pkg(added_packages[pkgid])
        pri_md.delta_f.close()

        # Finish metadata
        def finish_metadata(md):
            if md is None:
                return

            # Close XML file
            md.delta_f.close()

            # Prepare repomd record of xml file
            rec = cr.RepomdRecord(md.metadata_type, md.delta_fn)
            rec.load_contentstat(md.delta_f_stat)
            rec.fill(md.checksum_type)
            if self.globalbundle.unique_md_filenames:
                rec.rename_file()

            md.delta_rec = rec
            md.delta_fn_exists = True

            gen_repomd_recs.append(rec)

            # Prepare database
            if hasattr(md, "db") and md.db:
                md.db.dbinfo_update(rec.checksum)
                md.db.close()
                db_stat = cr.ContentStat(md.checksum_type)
                db_compressed = md.db_fn+".bz2"
                cr.compress_file(md.db_fn, None, cr.BZ2, db_stat)
                os.remove(md.db_fn)

                # Prepare repomd record of database file
                db_rec = cr.RepomdRecord("{0}_db".format(md.metadata_type),
                                         db_compressed)
                db_rec.load_contentstat(db_stat)
                db_rec.fill(md.checksum_type)
                if self.globalbundle.unique_md_filenames:
                    db_rec.rename_file()

                gen_repomd_recs.append(db_rec)

        # Add records to medata objects
        finish_metadata(pri_md)
        finish_metadata(fil_md)
        finish_metadata(oth_md)

        # Store data persistently
        for metadata_type, notes in metadata_notes.items():
            self._metadata_notes_to_plugin_bundle(metadata_type, notes)

        return gen_repomd_recs
Esempio n. 13
0
    def do(self, pri_old_fn, pri_new_fn, pri_f,
           fil_new_fn, fil_f, oth_new_fn, oth_f, removed):

        old_packages = set()
        added_packages = {}         # dict { 'pkgId': pkg }
        added_packages_ids = []     # list of package ids

        old_repoid_strings = []
        new_repoid_strings = []

        def old_pkgcb(pkg):
            old_packages.add(self._pkg_id_tuple(pkg))
            old_repoid_strings.append(self._pkg_id_str(pkg))

        def new_pkgcb(pkg):
            new_repoid_strings.append(self._pkg_id_str(pkg))
            pkg_id_tuple = self._pkg_id_tuple(pkg)
            if not pkg_id_tuple in old_packages:
                # This package is only in new repodata
                added_packages[pkg.pkgId] = pkg
                added_packages_ids.append(pkg.pkgId)
            else:
                # This package is also in the old repodata
                old_packages.remove(pkg_id_tuple)

        do_new_primary_files = 1
        if fil_f and fil_new_fn:
            # All files will be parsed from filelists
            do_new_primary_files = 0

        cr.xml_parse_primary(pri_old_fn, pkgcb=old_pkgcb, do_files=0)
        cr.xml_parse_primary(pri_new_fn, pkgcb=new_pkgcb,
                             do_files=do_new_primary_files)

        # Calculate RepoIds
        old_repo_id = ""
        new_repo_id = ""

        h = hashlib.new(self.id_type)
        old_repoid_strings.sort()
        for i in old_repoid_strings:
            h.update(i)
        old_repo_id = h.hexdigest()

        h = hashlib.new(self.id_type)
        new_repoid_strings.sort()
        for i in new_repoid_strings:
            h.update(i)
        new_repo_id = h.hexdigest()

        removed_pkgs = sorted(old_packages)
        for _, location_href, location_base in removed_pkgs:
            removed.add_pkg_locations(location_href, location_base)

        num_of_packages = len(added_packages)

        # Filelists and Other cb
        def newpkgcb(pkgId, name, arch):
            return added_packages.get(pkgId, None)

        # Write out filelists delta
        if fil_f and fil_new_fn:
            cr.xml_parse_filelists(fil_new_fn, newpkgcb=newpkgcb)
            fil_f.set_num_of_pkgs(num_of_packages)
            for pkgid in added_packages_ids:
                fil_f.add_pkg(added_packages[pkgid])
            fil_f.close()

        # Write out other delta
        if oth_f and oth_new_fn:
            cr.xml_parse_other(oth_new_fn, newpkgcb=newpkgcb)
            oth_f.set_num_of_pkgs(num_of_packages)
            for pkgid in added_packages_ids:
                oth_f.add_pkg(added_packages[pkgid])
            oth_f.close()

        # Write out primary delta
        # Note: Writing of primary delta has to be after parsing of filelists
        # Otherway cause missing files if do_new_primary_files was 0
        pri_f.set_num_of_pkgs(num_of_packages)
        for pkgid in added_packages_ids:
            pri_f.add_pkg(added_packages[pkgid])
        pri_f.close()

        return (old_repo_id, new_repo_id)
Esempio n. 14
0
def second_method():
    """Prefered method for repodata parsing.

    Important callbacks for repodata parsing:

    newpkgcb
    --------
    Via newpkgcb (Package callback) you could directly
    affect if the current package element shoud be parsed
    or not. This decision could be based on
    three values that are available as attributtes
    in the <package> element. This values are:
     - pkgId (package checksum)
     - name (package name)
     - arch (package architecture)
    (Note: This is applicable only for filelists.xml and other.xml,
     primary.xml doesn't contain this information in <package> element)

    If newpkgcb returns a package object, the parsed data
    will be loaded to this package object. If it returns a None,
    package element is skiped.

    This could help you to reduce a memory requirements because
    non wanted packages could be skiped without need to
    store them into the memory.

    If no newpkgcb is specified, default callback returning
    a new package object is used.

    pkgcb
    -----
    Callback called when a <package> element parsing is done.
    Its argument is a package object that has been previously
    returned by the newpkgcb.
    This function should return True if parsing should continue
    or False if parsing should be interrupted.

    Note: Both callbacks are optional, BUT at least one
          MUST be used (newpkgcb or pkgcb)!

    warningcb
    ---------
    Warning callbacks is called when a non-fatal oddity of prased XML
    is detected.
    If True is returned, parsing continues. If return value is False,
    parsing is terminated.
    This callback is optional.
    """

    primary_xml_path   = None
    filelists_xml_path = None
    other_xml_path     = None

    #
    # repomd.xml parsing
    #

    # Parse repomd.xml to get paths (1. Method - Repomd object based)
    #   Pros: Easy to use
    repomd = cr.Repomd(os.path.join(REPO_PATH, "repodata/repomd.xml"))

    # Parse repomd.xml (2. Method - Parser based)
    #   Pros: Warning callback could be specified
    def warningcb(warning_type, message):
        """Optional callback for warnings about
        wierd stuff and formatting in XML.

        :param warning_type: Integer value. One from
                             the XML_WARNING_* constants.
        :param message: String message.
        """
        print "PARSER WARNING: %s" % message
        return True

    repomd2 = cr.Repomd()
    cr.xml_parse_repomd(os.path.join(REPO_PATH, "repodata/repomd.xml"),
                                     repomd2, warningcb)

    # Get stuff we need
    #   (repomd or repomd2 could be used, both have the same values)
    for record in repomd.records:
        if record.type == "primary":
            primary_xml_path = record.location_href
        elif record.type == "filelists":
            filelists_xml_path = record.location_href
        elif record.type == "other":
            other_xml_path = record.location_href


    #
    # Main XML metadata parsing (primary, filelists, other)
    #

    packages = {}

    def pkgcb(pkg):
        # Called when whole package entry in xml is parsed
        packages[pkg.pkgId] = pkg

    def newpkgcb(pkgId, name, arch):
        # Called when new package entry is encountered
        # And only opening <package> element is parsed
        # This function has to return a package to which
        # parsed data will be added or None if this package
        # should be skiped.
        return packages.get(pkgId, None)

    # Option do_files tells primary parser to skip <file> element of package.
    # If you plan to parse filelists.xml after the primary.xml, always
    # set do_files to False.
    cr.xml_parse_primary(os.path.join(REPO_PATH, primary_xml_path),
                         pkgcb=pkgcb,
                         do_files=False,
                         warningcb=warningcb)

    cr.xml_parse_filelists(os.path.join(REPO_PATH, filelists_xml_path),
                           newpkgcb=newpkgcb,
                           warningcb=warningcb)

    cr.xml_parse_other(os.path.join(REPO_PATH, other_xml_path),
                       newpkgcb=newpkgcb,
                       warningcb=warningcb)

    for pkg in packages.itervalues():
        print_package_info(pkg)
Esempio n. 15
0
    def apply(self, pri_old_fn, pri_delta_fn, pri_f, pri_db, fil_old_fn,
              fil_delta_fn, fil_f, fil_db,oth_old_fn, oth_delta_fn, oth_f,
              oth_db, removed):

        removed_packages = set() # set of pkgIds (hashes)
        all_packages = {}        # dict { 'pkgId': pkg }

        old_repoid_strings = []
        new_repoid_strings = []

        def old_pkgcb(pkg):
            old_repoid_strings.append(self._pkg_id_str(pkg))
            if pkg.location_href in removed.packages:
                if removed.packages[pkg.location_href] == pkg.location_base:
                    # This package won't be in new metadata
                    return
            new_repoid_strings.append(self._pkg_id_str(pkg))
            all_packages[pkg.pkgId] = pkg

        def delta_pkgcb(pkg):
            new_repoid_strings.append(self._pkg_id_str(pkg))
            all_packages[pkg.pkgId] = pkg

        do_primary_files = 1
        if fil_f and fil_delta_fn and fil_old_fn:
            do_primary_files = 0

        cr.xml_parse_primary(pri_old_fn, pkgcb=old_pkgcb,
                             do_files=do_primary_files)
        cr.xml_parse_primary(pri_delta_fn, pkgcb=delta_pkgcb,
                             do_files=do_primary_files)

        # Calculate RepoIds
        old_repo_id = ""
        new_repo_id = ""

        h = hashlib.new(self.id_type)
        old_repoid_strings.sort()
        for i in old_repoid_strings:
            h.update(i)
        old_repo_id = h.hexdigest()

        h = hashlib.new(self.id_type)
        new_repoid_strings.sort()
        for i in new_repoid_strings:
            h.update(i)
        new_repo_id = h.hexdigest()

        # Sort packages
        def cmp_pkgs(x, y):
            # Compare only by filename
            ret = cmp(os.path.basename(x.location_href),
                      os.path.basename(y.location_href))
            if ret != 0:
                return ret

            # Compare by full location_href path
            return  cmp(x.location_href, y.location_href)

        all_packages_sorted = sorted(all_packages.values(), cmp=cmp_pkgs)

        def newpkgcb(pkgId, name, arch):
            return all_packages.get(pkgId, None)

        # Parse filelists
        if fil_f and fil_delta_fn and fil_old_fn:
            cr.xml_parse_filelists(fil_old_fn, newpkgcb=newpkgcb)
            cr.xml_parse_filelists(fil_delta_fn, newpkgcb=newpkgcb)

        # Parse other
        if oth_f and oth_delta_fn and oth_old_fn:
            cr.xml_parse_other(oth_old_fn, newpkgcb=newpkgcb)
            cr.xml_parse_other(oth_delta_fn, newpkgcb=newpkgcb)

        num_of_packages = len(all_packages_sorted)

        # Write out primary
        pri_f.set_num_of_pkgs(num_of_packages)
        for pkg in all_packages_sorted:
            pri_f.add_pkg(pkg)
            if pri_db:
                pri_db.add_pkg(pkg)

        # Write out filelists
        if fil_f:
            fil_f.set_num_of_pkgs(num_of_packages)
            for pkg in all_packages_sorted:
                fil_f.add_pkg(pkg)
                if fil_db:
                    fil_db.add_pkg(pkg)

        # Write out other
        if oth_f:
            oth_f.set_num_of_pkgs(num_of_packages)
            for pkg in all_packages_sorted:
                oth_f.add_pkg(pkg)
                if oth_db:
                    oth_db.add_pkg(pkg)

        return (old_repo_id, new_repo_id)
Esempio n. 16
0
def oneshot_callback():
    """Parse one file at a time into a set of packages.

    Use of this method is discouraged.

    newpkgcb
    --------
    Via newpkgcb (Package callback) you could directly
    affect if the current package element should be parsed
    or not. This decision could be based on
    three values that are available as attributtes
    in the <package> element. This values are:
     - pkgId (package checksum)
     - name (package name)
     - arch (package architecture)
    (Note: This is applicable only for filelists.xml and other.xml,
     primary.xml doesn't contain this information in <package> element)

    If newpkgcb returns a package object, the parsed data
    will be loaded to this package object. If it returns a None,
    package element is skiped.

    This could help you to reduce a memory requirements because
    non wanted packages could be skiped without need to
    store them into the memory.

    If no newpkgcb is specified, default callback returning
    a new package object is used.

    pkgcb
    -----
    Callback called when a <package> element parsing is done.
    Its argument is a package object that has been previously
    returned by the newpkgcb.
    This function should return True if parsing should continue
    or False if parsing should be interrupted.

    Note: Both callbacks are optional, BUT at least one
          MUST be used (newpkgcb or pkgcb)!

    warningcb
    ---------
    Warning callbacks is called when a non-fatal oddity of prased XML
    is detected.
    If True is returned, parsing continues. If return value is False,
    parsing is terminated.
    This callback is optional.
    """

    primary_xml_path = None
    filelists_xml_path = None
    other_xml_path = None

    #
    # repomd.xml parsing
    #

    # Parse repomd.xml to get paths (1. Method - Repomd object based)
    #   Pros: Easy to use
    repomd = cr.Repomd(os.path.join(REPO_PATH, "repodata/repomd.xml"))

    # Parse repomd.xml (2. Method - Parser based)
    #   Pros: Warning callback could be specified
    def warningcb(warning_type, message):
        """Optional callback for warnings about
        wierd stuff and formatting in XML.

        :param warning_type: Integer value. One from
                             the XML_WARNING_* constants.
        :param message: String message.
        """
        print("PARSER WARNING: %s" % message)
        return True

    repomd2 = cr.Repomd()
    cr.xml_parse_repomd(os.path.join(REPO_PATH, "repodata/repomd.xml"),
                        repomd2, warningcb)

    # Get stuff we need
    #   (repomd or repomd2 could be used, both have the same values)
    for record in repomd.records:
        if record.type == "primary":
            primary_xml_path = record.location_href
        elif record.type == "filelists":
            filelists_xml_path = record.location_href
        elif record.type == "other":
            other_xml_path = record.location_href

    #
    # Main XML metadata parsing (primary, filelists, other)
    #

    packages = {}

    def pkgcb(pkg):
        # Called when whole package entry in xml is parsed
        packages[pkg.pkgId] = pkg

    def newpkgcb(pkgId, name, arch):
        # Called when new package entry is encountered
        # And only opening <package> element is parsed
        # This function has to return a package to which
        # parsed data will be added or None if this package
        # should be skiped.
        return packages.get(pkgId, None)

    # Option do_files tells primary parser to skip <file> element of package.
    # If you plan to parse filelists.xml after the primary.xml, always
    # set do_files to False.
    cr.xml_parse_primary(os.path.join(REPO_PATH, primary_xml_path),
                         pkgcb=pkgcb,
                         do_files=False,
                         warningcb=warningcb)

    cr.xml_parse_filelists(os.path.join(REPO_PATH, filelists_xml_path),
                           newpkgcb=newpkgcb,
                           warningcb=warningcb)

    cr.xml_parse_other(os.path.join(REPO_PATH, other_xml_path),
                       newpkgcb=newpkgcb,
                       warningcb=warningcb)

    for pkg in packages.values():
        print_package_info(pkg)
Esempio n. 17
0
    def apply(self, metadata):
        # Check input arguments
        if "primary" not in metadata:
            self._error("primary.xml metadata file is missing")
            raise DeltaRepoPluginError("Primary metadata missing")

        gen_repomd_recs = []

        removed_packages = {}

        pri_md = metadata.get("primary")
        fil_md = metadata.get("filelists")
        oth_md = metadata.get("other")

        def try_simple_delta(md, dbclass):
            if not md:
                return

            notes = self._metadata_notes_from_plugin_bundle(md.metadata_type)
            if not notes:
                self._warning("Metadata \"{0}\" doesn't have a record in "
                              "deltametadata.xml - Ignoring")
                return True
            rc, rec = self._apply_basic_delta(md, notes)
            if not rc:
                return False
            if rec:
                gen_repomd_recs.append(rec)

            if not md.new_fn_exists:
                return True

            # Gen DB here
            if self.globalbundle.force_database or notes.get("database") == "1":
                rec = self._gen_db_from_xml(md)
                gen_repomd_recs.append(rec)

            return True

        # At first try to simple delta

        simple_pri_delta = try_simple_delta(pri_md, cr.PrimarySqlite)
        simple_fil_delta = try_simple_delta(fil_md, cr.FilelistsSqlite)
        simple_oth_delta = try_simple_delta(oth_md, cr.OtherSqlite)

        if simple_pri_delta:
            assert simple_fil_delta
            assert simple_oth_delta
            return gen_repomd_recs

        # Ignore already processed metadata
        if simple_fil_delta:
            fil_md = None
        if simple_oth_delta:
            oth_md = None

        # Make a dict of removed packages key is location_href,
        # value is location_base
        for record in self.pluginbundle.get_list("removedpackage", []):
            location_href = record.get("location_href")
            if not location_href:
                continue
            location_base = record.get("location_base")
            removed_packages[location_href] = location_base

        # Prepare output xml files and check if dbs should be generated
        # Note: This information are stored directly to the Metadata
        # object which someone could see as little hacky.
        def prepare_paths_in_metadata(md, xmlclass, dbclass):
            if md is None:
                return

            notes = self._metadata_notes_from_plugin_bundle(md.metadata_type)
            if not notes:
                # TODO: Add flag to ignore this kind of warnings (?)
                self._warning("Metadata \"{0}\" doesn't have a record in "
                              "deltametadata.xml - Ignoring")
                return

            suffix = cr.compression_suffix(md.compression_type) or ""
            md.new_fn = os.path.join(md.out_dir,
                                     "{0}.xml{1}".format(
                                     md.metadata_type, suffix))
            md.new_f_stat = cr.ContentStat(md.checksum_type)
            md.new_f = xmlclass(md.new_fn,
                                md.compression_type,
                                md.new_f_stat)

            if self.globalbundle.force_database or notes.get("database") == "1":
                md.db_fn = os.path.join(md.out_dir, "{0}.sqlite".format(
                                        md.metadata_type))
                md.db = dbclass(md.db_fn)
            else:
                md.db_fn = None
                md.db = None

        # Primary
        prepare_paths_in_metadata(pri_md,
                                  cr.PrimaryXmlFile,
                                  cr.PrimarySqlite)

        # Filelists
        prepare_paths_in_metadata(fil_md,
                                  cr.FilelistsXmlFile,
                                  cr.FilelistsSqlite)

        # Other
        prepare_paths_in_metadata(oth_md,
                                  cr.OtherXmlFile,
                                  cr.OtherSqlite)

        # Apply delta
        all_packages = {}        # dict { 'pkgId': pkg }

        old_contenthash_strings = []
        new_contenthash_strings = []

        def old_pkgcb(pkg):
            old_contenthash_strings.append(self._pkg_id_str(pkg))
            if pkg.location_href in removed_packages:
                if removed_packages[pkg.location_href] == pkg.location_base:
                    # This package won't be in new metadata
                    return
            new_contenthash_strings.append(self._pkg_id_str(pkg))
            all_packages[pkg.pkgId] = pkg

        def delta_pkgcb(pkg):
            new_contenthash_strings.append(self._pkg_id_str(pkg))
            all_packages[pkg.pkgId] = pkg

        filelists_from_primary = True
        if fil_md:
            filelists_from_primary = False

        # Parse both old and delta primary.xml files
        cr.xml_parse_primary(pri_md.old_fn, pkgcb=old_pkgcb,
                             do_files=filelists_from_primary)
        cr.xml_parse_primary(pri_md.delta_fn, pkgcb=delta_pkgcb,
                             do_files=filelists_from_primary)

        # Calculate content hashes
        h = hashlib.new(self.globalbundle.contenthash_type_str)
        old_contenthash_strings.sort()
        for i in old_contenthash_strings:
            h.update(i)
        self.globalbundle.calculated_old_contenthash = h.hexdigest()

        h = hashlib.new(self.globalbundle.contenthash_type_str)
        new_contenthash_strings.sort()
        for i in new_contenthash_strings:
            h.update(i)
        self.globalbundle.calculated_new_contenthash = h.hexdigest()

        # Sort packages
        def cmp_pkgs(x, y):
            # Compare only by filename
            ret = cmp(os.path.basename(x.location_href),
                      os.path.basename(y.location_href))
            if ret != 0:
                return ret

            # Compare by full location_href path
            return  cmp(x.location_href, y.location_href)

        all_packages_sorted = sorted(all_packages.values(), cmp=cmp_pkgs)

        def newpkgcb(pkgId, name, arch):
            return all_packages.get(pkgId, None)

        # Parse filelists
        if fil_md:
            self._debug("Parsing filelists xmls")
            cr.xml_parse_filelists(fil_md.old_fn, newpkgcb=newpkgcb)
            cr.xml_parse_filelists(fil_md.delta_fn, newpkgcb=newpkgcb)

        if oth_md:
            self._debug("Parsing other xmls")
            cr.xml_parse_other(oth_md.old_fn, newpkgcb=newpkgcb)
            cr.xml_parse_other(oth_md.delta_fn, newpkgcb=newpkgcb)

        num_of_packages = len(all_packages_sorted)

        # Write out primary
        self._debug("Writing primary xml: {0}".format(pri_md.new_fn))
        pri_md.new_f.set_num_of_pkgs(num_of_packages)
        for pkg in all_packages_sorted:
            pri_md.new_f.add_pkg(pkg)
            if pri_md.db:
                pri_md.db.add_pkg(pkg)

        # Write out filelists
        if fil_md:
            self._debug("Writing filelists xml: {0}".format(fil_md.new_fn))
            fil_md.new_f.set_num_of_pkgs(num_of_packages)
            for pkg in all_packages_sorted:
                fil_md.new_f.add_pkg(pkg)
                if fil_md.db:
                    fil_md.db.add_pkg(pkg)

        # Write out other
        if oth_md:
            self._debug("Writing other xml: {0}".format(oth_md.new_fn))
            oth_md.new_f.set_num_of_pkgs(num_of_packages)
            for pkg in all_packages_sorted:
                oth_md.new_f.add_pkg(pkg)
                if oth_md.db:
                    oth_md.db.add_pkg(pkg)

        # Finish metadata
        def finish_metadata(md):
            if md is None:
                return

            # Close XML file
            md.new_f.close()

            # Prepare repomd record of xml file
            rec = cr.RepomdRecord(md.metadata_type, md.new_fn)
            rec.load_contentstat(md.new_f_stat)
            rec.fill(md.checksum_type)
            if self.globalbundle.unique_md_filenames:
                rec.rename_file()

            md.new_rec = rec
            md.new_fn_exists = True

            gen_repomd_recs.append(rec)

            # Prepare database
            if hasattr(md, "db") and md.db:
                self._debug("Generating database: {0}".format(md.db_fn))
                md.db.dbinfo_update(rec.checksum)
                md.db.close()
                db_stat = cr.ContentStat(md.checksum_type)
                db_compressed = md.db_fn+".bz2"
                cr.compress_file(md.db_fn, None, cr.BZ2, db_stat)
                os.remove(md.db_fn)

                # Prepare repomd record of database file
                db_rec = cr.RepomdRecord("{0}_db".format(md.metadata_type),
                                         db_compressed)
                db_rec.load_contentstat(db_stat)
                db_rec.fill(md.checksum_type)
                if self.globalbundle.unique_md_filenames:
                    db_rec.rename_file()

                gen_repomd_recs.append(db_rec)

        # Add records to the bundle

        finish_metadata(pri_md)
        finish_metadata(fil_md)
        finish_metadata(oth_md)

        return gen_repomd_recs
Esempio n. 18
0
    def gen(self, metadata):
        # Check input arguments
        if "primary" not in metadata:
            self._error("primary.xml metadata file is missing")
            raise DeltaRepoPluginError("Primary metadata missing")

        gen_repomd_recs = []

        # Medadata info that will be persistently stored
        metadata_notes = {}

        pri_md = metadata.get("primary")
        fil_md = metadata.get("filelists")
        oth_md = metadata.get("other")

        def try_simple_delta(md, force_gen=False):
            """Try to do simple delta. If successful, return True"""
            rc, rec, notes = self._gen_basic_delta(md, force_gen=force_gen)
            if not rc:
                return False
            if rec:
                gen_repomd_recs.append(rec)
            if not notes:
                notes = {}
            if metadata.get(md.metadata_type + "_db").new_fn_exists:
                notes["database"] = "1"
            else:
                notes["database"] = "0"
            self._metadata_notes_to_plugin_bundle(md.metadata_type, notes)
            return True

        # At first try to do simple delta for primary
        # If successful, force simple delta for filelists and other too

        simple_pri_delta = try_simple_delta(pri_md)
        simple_fil_delta = try_simple_delta(fil_md, force_gen=simple_pri_delta)
        simple_oth_delta = try_simple_delta(oth_md, force_gen=simple_pri_delta)

        if simple_pri_delta:
            # Simple delta for primary means that simple deltas were done
            # for all other metadata too
            return gen_repomd_recs

        # At this point we know that simple delta for the primary wasn't done
        # This mean that at lest for primary, both metadata files (the new one
        # and the old one) exists, and we have to do a more sophisticated delta

        # Ignore files for which, the simple delta was successful
        if simple_fil_delta:
            fil_md = None
        if simple_oth_delta:
            oth_md = None

        # Prepare output xml files and check if dbs should be generated
        # Note: This information are stored directly to the Metadata
        # object which someone could see as little hacky.
        def prepare_paths_in_metadata(md, xmlclass):
            if md is None:
                return None

            # Make a note about if the database should be generated
            db_available = metadata.get(md.metadata_type + "_db").new_fn_exists
            if db_available or self.globalbundle.force_database:
                metadata_notes.setdefault(md.metadata_type,
                                          {})["database"] = "1"
            else:
                metadata_notes.setdefault(md.metadata_type,
                                          {})["database"] = "0"

            suffix = cr.compression_suffix(md.compression_type) or ""
            md.delta_fn = os.path.join(
                md.out_dir, "{0}.xml{1}".format(md.metadata_type, suffix))
            md.delta_f_stat = cr.ContentStat(md.checksum_type)
            md.delta_f = xmlclass(md.delta_fn, md.compression_type,
                                  md.delta_f_stat)
            return md

        # Primary
        pri_md = prepare_paths_in_metadata(pri_md, cr.PrimaryXmlFile)

        # Filelists
        fil_md = prepare_paths_in_metadata(fil_md, cr.FilelistsXmlFile)

        # Other
        oth_md = prepare_paths_in_metadata(oth_md, cr.OtherXmlFile)

        # Gen delta

        old_packages = set()
        added_packages = {}  # dict { 'pkgId': pkg }
        added_packages_ids = []  # list of package ids

        old_contenthash_strings = []
        new_contenthash_strings = []

        def old_pkgcb(pkg):
            old_packages.add(self._pkg_id_tuple(pkg))
            old_contenthash_strings.append(self._pkg_id_str(pkg))

        def new_pkgcb(pkg):
            new_contenthash_strings.append(self._pkg_id_str(pkg))
            pkg_id_tuple = self._pkg_id_tuple(pkg)
            if not pkg_id_tuple in old_packages:
                # This package is only in new repodata
                added_packages[pkg.pkgId] = pkg
                added_packages_ids.append(pkg.pkgId)
            else:
                # This package is also in the old repodata
                old_packages.remove(pkg_id_tuple)

        filelists_from_primary = True
        if fil_md:
            # Filelists will be parsed from filelists
            filelists_from_primary = False

        cr.xml_parse_primary(pri_md.old_fn, pkgcb=old_pkgcb, do_files=False)
        cr.xml_parse_primary(pri_md.new_fn,
                             pkgcb=new_pkgcb,
                             do_files=filelists_from_primary)

        # Calculate content hashes
        h = hashlib.new(self.globalbundle.contenthash_type_str)
        old_contenthash_strings.sort()
        for i in old_contenthash_strings:
            h.update(i)
        src_contenthash = h.hexdigest()
        self.globalbundle.calculated_old_contenthash = src_contenthash

        h = hashlib.new(self.globalbundle.contenthash_type_str)
        new_contenthash_strings.sort()
        for i in new_contenthash_strings:
            h.update(i)
        dst_contenthash = h.hexdigest()
        self.globalbundle.calculated_new_contenthash = dst_contenthash

        # Set the content hashes to the plugin bundle
        self.pluginbundle.set("contenthash_type",
                              self.globalbundle.contenthash_type_str)
        self.pluginbundle.set("src_contenthash", src_contenthash)
        self.pluginbundle.set("dst_contenthash", dst_contenthash)

        # Prepare list of removed packages
        removed_pkgs = sorted(old_packages)
        for _, location_href, location_base in removed_pkgs:
            dictionary = {"location_href": location_href}
            if location_base:
                dictionary["location_base"] = location_base
            self.pluginbundle.append("removedpackage", dictionary)

        num_of_packages = len(added_packages)

        # Filelists and Other cb
        def newpkgcb(pkgId, name, arch):
            return added_packages.get(pkgId, None)

        # Parse filelist.xml and write out its delta
        if fil_md:
            cr.xml_parse_filelists(fil_md.new_fn, newpkgcb=newpkgcb)
            fil_md.delta_f.set_num_of_pkgs(num_of_packages)
            for pkgid in added_packages_ids:
                fil_md.delta_f.add_pkg(added_packages[pkgid])
            fil_md.delta_f.close()

        # Parse other.xml and write out its delta
        if oth_md:
            cr.xml_parse_other(oth_md.new_fn, newpkgcb=newpkgcb)
            oth_md.delta_f.set_num_of_pkgs(num_of_packages)
            for pkgid in added_packages_ids:
                oth_md.delta_f.add_pkg(added_packages[pkgid])
            oth_md.delta_f.close()

        # Write out primary delta
        # Note: Writing of primary delta has to be after parsing of filelists
        # Otherwise cause missing files if filelists_from_primary was False
        pri_md.delta_f.set_num_of_pkgs(num_of_packages)
        for pkgid in added_packages_ids:
            pri_md.delta_f.add_pkg(added_packages[pkgid])
        pri_md.delta_f.close()

        # Finish metadata
        def finish_metadata(md):
            if md is None:
                return

            # Close XML file
            md.delta_f.close()

            # Prepare repomd record of xml file
            rec = cr.RepomdRecord(md.metadata_type, md.delta_fn)
            rec.load_contentstat(md.delta_f_stat)
            rec.fill(md.checksum_type)
            if self.globalbundle.unique_md_filenames:
                rec.rename_file()

            md.delta_rec = rec
            md.delta_fn_exists = True

            gen_repomd_recs.append(rec)

            # Prepare database
            if hasattr(md, "db") and md.db:
                md.db.dbinfo_update(rec.checksum)
                md.db.close()
                db_stat = cr.ContentStat(md.checksum_type)
                db_compressed = md.db_fn + ".bz2"
                cr.compress_file(md.db_fn, None, cr.BZ2, db_stat)
                os.remove(md.db_fn)

                # Prepare repomd record of database file
                db_rec = cr.RepomdRecord("{0}_db".format(md.metadata_type),
                                         db_compressed)
                db_rec.load_contentstat(db_stat)
                db_rec.fill(md.checksum_type)
                if self.globalbundle.unique_md_filenames:
                    db_rec.rename_file()

                gen_repomd_recs.append(db_rec)

        # Add records to medata objects
        finish_metadata(pri_md)
        finish_metadata(fil_md)
        finish_metadata(oth_md)

        # Store data persistently
        for metadata_type, notes in metadata_notes.items():
            self._metadata_notes_to_plugin_bundle(metadata_type, notes)

        return gen_repomd_recs
Esempio n. 19
0
def parse_repodata(primary_xml_path,
                   filelists_xml_path,
                   other_xml_path,
                   only_primary=False,
                   mirror=False):
    """
    Parse repodata to extract package info.

    Args:
        primary_xml_path (str): a path to a downloaded primary.xml
        filelists_xml_path (str): a path to a downloaded filelists.xml
        other_xml_path (str): a path to a downloaded other.xml

    Kwargs:
        only_primary (bool): If true, only the metadata in primary.xml will be parsed.

    Returns:
        dict: createrepo_c package objects with the pkgId as a key
    """
    packages = collections.OrderedDict()

    nevras = set()
    pkgid_warning_triggered = False
    nevra_warning_triggered = False

    def pkgcb(pkg):
        """
        A callback which is used when a whole package entry in xml is parsed.

        Args:
            pkg(preaterepo_c.Package): a parsed metadata for a package

        """
        nonlocal pkgid_warning_triggered
        nonlocal nevra_warning_triggered

        ERR_MSG = _(
            "The repository metadata being synced into Pulp is erroneous in a way that "
            "makes it ambiguous (duplicate {}), and therefore we do not allow it to be synced in "
            "'mirror_complete' mode. Please choose a sync policy which does not mirror "
            "repository metadata.\n\n"
            "Please read https://github.com/pulp/pulp_rpm/issues/2402 for more details."
        )
        WARN_MSG = _(
            "The repository metadata being synced into Pulp is erroneous in a way that "
            "makes it ambiguous (duplicate {}). Yum, DNF and Pulp try to handle these problems, "
            "but unexpected things may happen.\n\n"
            "Please read https://github.com/pulp/pulp_rpm/issues/2402 for more details."
        )

        if not pkgid_warning_triggered and pkg.pkgId in packages:
            pkgid_warning_triggered = True
            if mirror:
                raise Exception(ERR_MSG.format("PKGIDs"))
            else:
                log.warn(WARN_MSG.format("PKGIDs"))
        if not nevra_warning_triggered and pkg.nevra() in nevras:
            nevra_warning_triggered = True
            if mirror:
                raise Exception(ERR_MSG.format("NEVRAs"))
            else:
                log.warn(WARN_MSG.format("NEVRAs"))
        packages[pkg.pkgId] = pkg
        nevras.add(pkg.nevra())

    def newpkgcb(pkgId, name, arch):
        """
        A callback which is used when a new package entry is encountered.

        Only opening <package> element is parsed at that moment.
        This function has to return a package which parsed data will be added to
        or None if a package should be skipped.

        pkgId, name and arch of a package can be used to skip further parsing. Available
        only for filelists.xml and other.xml.

        Args:
            pkgId(str): pkgId of a package
            name(str): name of a package
            arch(str): arch of a package

        Returns:
            createrepo_c.Package: a package which parsed data should be added to.

            If None is returned, further parsing of a package will be skipped.

        """
        return packages.get(pkgId, None)

    cr.xml_parse_primary(primary_xml_path,
                         pkgcb=pkgcb,
                         warningcb=warningcb,
                         do_files=False)
    if not only_primary:
        cr.xml_parse_filelists(filelists_xml_path,
                               newpkgcb=newpkgcb,
                               warningcb=warningcb)
        cr.xml_parse_other(other_xml_path,
                           newpkgcb=newpkgcb,
                           warningcb=warningcb)
    return packages