Esempio n. 1
0
    def init_other_xml(self):
        """
        Initialize the other xml file where metadata snippets are written
        """
        filename = os.path.join(self.temp_working_dir, "other.xml.gz")
        self.other_xml= GzipFile(filename, 'w', compresslevel=9)
        self.other_xml.write("""<?xml version="1.0" encoding="UTF-8"?>
<otherdata xmlns="http://linux.duke.edu/metadata/other" packages="%s"> \n""" % len(self.units))
Esempio n. 2
0
    def init_primary_xml(self):
        """
        Initialize the primary xml file where metadata snippets are written
        """
        filename = os.path.join(self.temp_working_dir, "primary.xml.gz")
        self.primary_xml= GzipFile(filename, 'w', compresslevel=9)
        self.primary_xml.write("""<?xml version="1.0" encoding="UTF-8"?>\n <metadata xmlns="http://linux.duke.edu/metadata/common"
xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%s"> \n""" % len(self.units))
Esempio n. 3
0
    def prepend_text(self, input_path, output_path, text, gzip=True):
        """
        @param input_path: path to input file we want to prepend 'text' to
        @type input_path: str

        @param output_path: output path that will be 'text' plus contents of file referred to with 'input_path'
        @type output_path: str

        @param text: text blob to prepend to file
        @type text: str

        @param gzip: True will gzip file contents
        @type gzip: bool
        @return:
        """
        if gzip:
            in_f = GzipFile(input_path, 'r', compresslevel=9)
            out_f = GzipFile(output_path, 'w', compresslevel=9)
        else:
            in_f = open(input_path, 'w')
            out_f = open(output_path, 'w')

        try:
            out_f.write(text)
            while True:
                data = in_f.read(size=1024*1024)
                if not data:
                    break
                out_f.write(data)
        finally:
            in_f.close()
            out_f.close()
Esempio n. 4
0
 def init_xml(self):
     self.primary_xml= GzipFile(self.temp_primary_xml_path, 'w', compresslevel=9)
     self.filelists_xml= GzipFile(self.temp_filelists_xml_path, 'w', compresslevel=9)
     self.other_xml= GzipFile(self.temp_other_xml_path, 'w', compresslevel=9)
Esempio n. 5
0
class YumMetadataGenerator(object):
    """
    Yum metadata generator using per package snippet approach
    """
    def __init__(self, repodir, checksum_type=DEFAULT_CHECKSUM,
                 skip_metadata_types=None, is_cancelled=False, group_xml_path=None, updateinfo_xml_path=None, custom_metadata_dict=None):
        """
        @param repodir: repository dir where the repodata directory is created/exists
        @type  repodir: str

        @param checksum_type: checksum type to use when generating repodata; default is sha256
        @type  checksum_type: str

        @param skip_metadata_types: list of metadata ftypes to skip from the repodata
        @type  skip_metadata_types: []

        @param group_xml_path: path to comps xml to be merged with repodata
        @type group_xml_path: str

        @param updateinfo_xml_path: path to updateinfo xml to be merged with repodata
        @type updateinfo_xml_path: str

        @param custom_metadata_dict: custom metadata from scratchpad
        @param custom_metadata_dict: {}
        """
        self.repodir = repodir
        self.unit_count = 0

        self.checksum_type = checksum_type
        self.skip = skip_metadata_types or []


        self.backup_repodata_dir = None
        self.is_cancelled=is_cancelled
        self.group_xml_path= group_xml_path
        self.updateinfo_xml_path = updateinfo_xml_path
        self.custom_metadata = custom_metadata_dict or {}
        self.setup_temp_working_dir()
        self.metadata_conf = self.setup_metadata_conf()

        self.primary_xml = None
        self.filelists_xml = None
        self.other_xml = None

        self.temp_primary_xml_path = os.path.join(self.temp_working_dir, "temp_primary.xml.gz")
        self.temp_filelists_xml_path = os.path.join(self.temp_working_dir, "temp_filelists.xml.gz")
        self.temp_other_xml_path =  os.path.join(self.temp_working_dir, "temp_other.xml.gz")

        self.primary_xml_path = os.path.join(self.temp_working_dir, "primary.xml.gz")
        self.filelists_xml_path = os.path.join(self.temp_working_dir, "filelists.xml.gz")
        self.other_xml_path =  os.path.join(self.temp_working_dir, "other.xml.gz")


    def setup_temp_working_dir(self):
        """
        setup a temporary location where we can do all the work and
        finally merge to final location.
        """
        self.temp_working_dir = os.path.join(self.repodir, ".repodata")
        if not os.path.isdir(self.temp_working_dir):
            os.makedirs(self.temp_working_dir, mode=0755)



    def _backup_existing_repodata(self):
        """
        Takes a backup of any existing repodata files. This is used in the final
        step where other file types in repomd.xml such as presto, updateinfo, comps
        are copied back to the repodata.
        """
        current_repo_dir = os.path.join(self.repodir, "repodata")
        # Note: backup_repo_dir is used to store presto metadata and possibly other custom metadata types
        # they will be copied back into new 'repodata' if needed.
        current_repo_dir = encode_unicode(current_repo_dir)
        if os.path.exists(current_repo_dir):
            _LOG.info("existing metadata found; taking backup.")
            self.backup_repodata_dir = os.path.join(self.repodir, "repodata.old")
            if os.path.exists(self.backup_repodata_dir):
                _LOG.debug("clean up any stale dirs")
                shutil.rmtree(self.backup_repodata_dir)
            shutil.copytree(current_repo_dir, self.backup_repodata_dir)
            os.system("chmod -R u+wX %s" % self.backup_repodata_dir)

    def setup_metadata_conf(self):
        """
        Sets up the yum metadata config to perform the sqlitedb and repomd.xml generation.
        """
        conf = MetaDataConfig()
        conf.directory = self.repodir
        conf.database = 1
        conf.verbose = 1
        conf.skip_stat = 1
        conf.sumtype = self.checksum_type
        return conf

    def init_xml(self):
        self.primary_xml= GzipFile(self.temp_primary_xml_path, 'w', compresslevel=9)
        self.filelists_xml= GzipFile(self.temp_filelists_xml_path, 'w', compresslevel=9)
        self.other_xml= GzipFile(self.temp_other_xml_path, 'w', compresslevel=9)

    def prepend_text(self, input_path, output_path, text, gzip=True):
        """
        @param input_path: path to input file we want to prepend 'text' to
        @type input_path: str

        @param output_path: output path that will be 'text' plus contents of file referred to with 'input_path'
        @type output_path: str

        @param text: text blob to prepend to file
        @type text: str

        @param gzip: True will gzip file contents
        @type gzip: bool
        @return:
        """
        if gzip:
            in_f = GzipFile(input_path, 'r', compresslevel=9)
            out_f = GzipFile(output_path, 'w', compresslevel=9)
        else:
            in_f = open(input_path, 'w')
            out_f = open(output_path, 'w')

        try:
            out_f.write(text)
            while True:
                data = in_f.read(size=1024*1024)
                if not data:
                    break
                out_f.write(data)
        finally:
            in_f.close()
            out_f.close()

    def _close_primary_xml(self):
        """
        All the data should be written at this point; invoke this to
        close the primary xml gzipped file
        """
        self.primary_xml.write("""\n </metadata>""")
        self.primary_xml.close()

        blob = """<?xml version="1.0" encoding="UTF-8"?>\n <metadata xmlns="http://linux.duke.edu/metadata/common"
xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%s"> \n""" % self.unit_count
        self.prepend_text(self.temp_primary_xml_path, self.primary_xml_path, blob)


    def _close_filelists_xml(self):
        """
        All the data should be written at this point; invoke this to
        close the filelists xml gzipped file
        """
        self.filelists_xml.write("""\n </filelists>""")
        self.filelists_xml.close()

        blob = """<?xml version="1.0" encoding="UTF-8"?>
<filelists xmlns="http://linux.duke.edu/metadata/filelists" packages="%s"> \n""" % self.unit_count
        self.prepend_text(self.temp_filelists_xml_path, self.filelists_xml_path, blob)

    def _close_other_xml(self):
        """
        All the data should be written at this point; invoke this to
        close the other xml gzipped file
        """
        self.other_xml.write("""\n </otherdata>""")
        self.other_xml.close()

        blob = """<?xml version="1.0" encoding="UTF-8"?>
<otherdata xmlns="http://linux.duke.edu/metadata/other" packages="%s"> \n""" % self.unit_count
        self.prepend_text(self.temp_other_xml_path, self.other_xml_path, blob)

    def close_xml(self):
        """
        Closes all open xml file handles
        @return:
        """
        self._close_primary_xml()
        self._close_filelists_xml()
        self._close_other_xml()

    def merge_unit_metadata(self, units):
        """
        This performs the actual merge of the snippets. The xml files are initialized and
        each unit metadata is written to the xml files. These units here should be rpm
        units. If a unit doesnt have repodata info, log the message and skip that unit.
        Finally the gzipped xmls are closed when all the units are written.

        @param units: List of rpm units from which repodata is taken and merged
        @type units: [AssociatedUnit]
        """
        _LOG.info("Performing per unit metadata merge on %s units" % len(units))
        if self.is_cancelled:
            _LOG.warn("cancelling merge unit metadata")
            raise CancelException()
        start = time.time()
        try:
            for unit in units:
                if self.is_cancelled:
                    _LOG.warn("cancelling merge unit metadata")
                    raise CancelException()
                if unit.metadata.has_key('repodata'):
                    try:
                        self.primary_xml.write(unit.metadata['repodata']['primary'].encode('utf-8'))
                        self.filelists_xml.write(unit.metadata['repodata']['filelists'].encode('utf-8'))
                        self.other_xml.write(unit.metadata['repodata']['other'].encode('utf-8'))
                    except Exception, e:
                        _LOG.error("Error occurred writing metadata to file; Exception: %s" % e)
                        continue
                else:
                    _LOG.debug("No repodata found for the unit; continue")
                    continue
        finally:
            self.unit_count += len(units)
            end = time.time()
        _LOG.info("per unit metadata merge completed in %s seconds" % (end - start))

    def merge_custom_repodata(self):
        """
        merge any repodata preserved on the repo scratchpad
        """
        _LOG.info("check scratchpad for any repodata")
        if not self.custom_metadata:
            # nothing found on scratchpad
            return False
        current_repo_dir = os.path.join(self.repodir, "repodata")

        for ftype, fxml in self.custom_metadata.items():
            if ftype in self.skip:
                continue
            if not fxml:
                continue
            ftype_xml_path = os.path.join(self.repodir, "%s.xml" % ftype)
            f = open(ftype_xml_path, "w")
            try:
                try:
                    data = fxml.encode('utf-8')
                    f.write(data)
                except Exception, e:
                    _LOG.exception("Unable to write file type %s" % ftype)
                    continue
            finally:
                f.close()
            # merge the xml we just wrote with repodata
            if os.path.isfile(ftype_xml_path):
                _LOG.info("Modifying repo for %s metadata" % ftype)
                modify_repo(current_repo_dir, ftype_xml_path, checksum_type=self.checksum_type)
        return True

    def merge_comps_xml(self):
        """
        merge comps xml file to repodata
        """
        if self.group_xml_path is None or not os.path.isfile(self.group_xml_path):
            # no group xml formed nothing to do
            _LOG.info("comps xml path does not exist; skipping merge")
            return
        repodata_working_dir = os.path.join(self.repodir, "repodata")
        _LOG.info("Modifying repo for %s metadata" % "comps")
        modify_repo(repodata_working_dir, self.group_xml_path, checksum_type=self.checksum_type)

    def merge_updateinfo_xml(self):
        """
        merge updateinfo xml file to repodata
        """
        if self.updateinfo_xml_path is None or not os.path.isfile(self.updateinfo_xml_path):
            # no updateinfo xml formed, nothing to do
            _LOG.info("updateinfo xml path does not exist; skipping merge")
            return
        repodata_working_dir = os.path.join(self.repodir, "repodata")
        _LOG.info("Modifying repo for %s metadata" % "updateinfo")
        modify_repo(repodata_working_dir, self.updateinfo_xml_path,
                    checksum_type=self.checksum_type)

    def merge_other_filetypes_from_backup(self):
        """
        Merges any other filetypes in the backed up repodata that needs to be included
        back into the repodata. This is where the presto, updateinfo and comps xmls are
        looked up in old repomd.xml and merged back to the new using modifyrepo.
        primary, filelists and other xmls are excluded from the process.
        """
        _LOG.info("Performing merge on other file types")
        try:
            if not self.backup_repodata_dir:
                _LOG.info("Nothing further to check; we got our fresh metadata")
                return
            current_repo_dir = os.path.join(self.repodir, "repodata")
            #check if presto metadata exist in the backup
            repodata_file = os.path.join(self.backup_repodata_dir, "repomd.xml")
            ftypes = util.get_repomd_filetypes(repodata_file)
            base_ftypes = ['primary', 'primary_db', 'filelists_db', 'filelists', 'other', 'other_db']
            for ftype in ftypes:
                if self.is_cancelled:
                    _LOG.warn("cancel merge other filetype metadata")
                    raise CancelException()
                if ftype in base_ftypes:
                    # no need to process these again
                    continue
                if ftype in self.skip and not self.skip[ftype]:
                    _LOG.info("mdtype %s part of skip metadata; skipping" % ftype)
                    continue
                filetype_path = os.path.join(self.backup_repodata_dir, os.path.basename(util.get_repomd_filetype_path(repodata_file, ftype)))
                # modifyrepo uses filename as mdtype, rename to type.<ext>
                renamed_filetype_path = os.path.join(os.path.dirname(filetype_path),\
                    ftype + '.' + '.'.join(os.path.basename(filetype_path).split('.')[1:]))
                os.rename(filetype_path,  renamed_filetype_path)
                if renamed_filetype_path.endswith('.gz'):
                    # if file is gzipped, decompress before passing to modifyrepo
                    data = gzip.open(renamed_filetype_path).read().decode("utf-8", "replace")
                    renamed_filetype_path = '.'.join(renamed_filetype_path.split('.')[:-1])
                    open(renamed_filetype_path, 'w').write(data.encode("UTF-8"))
                if os.path.isfile(renamed_filetype_path):
                    _LOG.info("Modifying repo for %s metadata" % ftype)
                    modify_repo(current_repo_dir, renamed_filetype_path,
                                checksum_type=self.checksum_type)
        finally:
            if self.backup_repodata_dir:
                shutil.rmtree(self.backup_repodata_dir)

    def final_repodata_move(self):
        # setup the yum config to do the final steps of generating sqlite db files
        try:
            mdgen = MetaDataGenerator(self.metadata_conf)
            mdgen.doRepoMetadata()
            # do the final move to the repodata location from .repodata
            mdgen.doFinalMove()
        except:
            # might have missing metadata count not perform final move
            _LOG.error("Error performing final move, could be missing pkg metadata files")

    def run(self, units):
        """
        Invokes the metadata generation by taking a backup of existing repodata;
        looking up units and merging the per unit snippets; generate sqlite db,
        repomd files using createrepo apis and finally merge back any other
        """
        # backup existing repodata dir
        self._backup_existing_repodata()
        # extract the per rpm unit metadata and merge to create package xml data
        self.init_xml()
        self.merge_unit_metadata(units)
        self.close_xml()

        self.final_repodata_move()
        # lookup and merge updateinfo, comps and other metadata
        self.merge_comps_xml()
        self.merge_updateinfo_xml()
        # merge any custom metadata stored on the scratchpad, this includes prestodelta
        self.merge_custom_repodata()