def init_other_xml(self): """ Initialize the other xml file where metadata snippets are written """ filename = os.path.join(self.temp_working_dir, "other.xml.gz") self.other_xml= GzipFile(filename, 'w', compresslevel=9) self.other_xml.write("""<?xml version="1.0" encoding="UTF-8"?> <otherdata xmlns="http://linux.duke.edu/metadata/other" packages="%s"> \n""" % len(self.units))
def init_primary_xml(self): """ Initialize the primary xml file where metadata snippets are written """ filename = os.path.join(self.temp_working_dir, "primary.xml.gz") self.primary_xml= GzipFile(filename, 'w', compresslevel=9) self.primary_xml.write("""<?xml version="1.0" encoding="UTF-8"?>\n <metadata xmlns="http://linux.duke.edu/metadata/common" xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%s"> \n""" % len(self.units))
def prepend_text(self, input_path, output_path, text, gzip=True): """ @param input_path: path to input file we want to prepend 'text' to @type input_path: str @param output_path: output path that will be 'text' plus contents of file referred to with 'input_path' @type output_path: str @param text: text blob to prepend to file @type text: str @param gzip: True will gzip file contents @type gzip: bool @return: """ if gzip: in_f = GzipFile(input_path, 'r', compresslevel=9) out_f = GzipFile(output_path, 'w', compresslevel=9) else: in_f = open(input_path, 'w') out_f = open(output_path, 'w') try: out_f.write(text) while True: data = in_f.read(size=1024*1024) if not data: break out_f.write(data) finally: in_f.close() out_f.close()
def init_xml(self): self.primary_xml= GzipFile(self.temp_primary_xml_path, 'w', compresslevel=9) self.filelists_xml= GzipFile(self.temp_filelists_xml_path, 'w', compresslevel=9) self.other_xml= GzipFile(self.temp_other_xml_path, 'w', compresslevel=9)
class YumMetadataGenerator(object): """ Yum metadata generator using per package snippet approach """ def __init__(self, repodir, checksum_type=DEFAULT_CHECKSUM, skip_metadata_types=None, is_cancelled=False, group_xml_path=None, updateinfo_xml_path=None, custom_metadata_dict=None): """ @param repodir: repository dir where the repodata directory is created/exists @type repodir: str @param checksum_type: checksum type to use when generating repodata; default is sha256 @type checksum_type: str @param skip_metadata_types: list of metadata ftypes to skip from the repodata @type skip_metadata_types: [] @param group_xml_path: path to comps xml to be merged with repodata @type group_xml_path: str @param updateinfo_xml_path: path to updateinfo xml to be merged with repodata @type updateinfo_xml_path: str @param custom_metadata_dict: custom metadata from scratchpad @param custom_metadata_dict: {} """ self.repodir = repodir self.unit_count = 0 self.checksum_type = checksum_type self.skip = skip_metadata_types or [] self.backup_repodata_dir = None self.is_cancelled=is_cancelled self.group_xml_path= group_xml_path self.updateinfo_xml_path = updateinfo_xml_path self.custom_metadata = custom_metadata_dict or {} self.setup_temp_working_dir() self.metadata_conf = self.setup_metadata_conf() self.primary_xml = None self.filelists_xml = None self.other_xml = None self.temp_primary_xml_path = os.path.join(self.temp_working_dir, "temp_primary.xml.gz") self.temp_filelists_xml_path = os.path.join(self.temp_working_dir, "temp_filelists.xml.gz") self.temp_other_xml_path = os.path.join(self.temp_working_dir, "temp_other.xml.gz") self.primary_xml_path = os.path.join(self.temp_working_dir, "primary.xml.gz") self.filelists_xml_path = os.path.join(self.temp_working_dir, "filelists.xml.gz") self.other_xml_path = os.path.join(self.temp_working_dir, "other.xml.gz") def setup_temp_working_dir(self): """ setup a temporary location where we can do all the work and finally merge to final location. """ self.temp_working_dir = os.path.join(self.repodir, ".repodata") if not os.path.isdir(self.temp_working_dir): os.makedirs(self.temp_working_dir, mode=0755) def _backup_existing_repodata(self): """ Takes a backup of any existing repodata files. This is used in the final step where other file types in repomd.xml such as presto, updateinfo, comps are copied back to the repodata. """ current_repo_dir = os.path.join(self.repodir, "repodata") # Note: backup_repo_dir is used to store presto metadata and possibly other custom metadata types # they will be copied back into new 'repodata' if needed. current_repo_dir = encode_unicode(current_repo_dir) if os.path.exists(current_repo_dir): _LOG.info("existing metadata found; taking backup.") self.backup_repodata_dir = os.path.join(self.repodir, "repodata.old") if os.path.exists(self.backup_repodata_dir): _LOG.debug("clean up any stale dirs") shutil.rmtree(self.backup_repodata_dir) shutil.copytree(current_repo_dir, self.backup_repodata_dir) os.system("chmod -R u+wX %s" % self.backup_repodata_dir) def setup_metadata_conf(self): """ Sets up the yum metadata config to perform the sqlitedb and repomd.xml generation. """ conf = MetaDataConfig() conf.directory = self.repodir conf.database = 1 conf.verbose = 1 conf.skip_stat = 1 conf.sumtype = self.checksum_type return conf def init_xml(self): self.primary_xml= GzipFile(self.temp_primary_xml_path, 'w', compresslevel=9) self.filelists_xml= GzipFile(self.temp_filelists_xml_path, 'w', compresslevel=9) self.other_xml= GzipFile(self.temp_other_xml_path, 'w', compresslevel=9) def prepend_text(self, input_path, output_path, text, gzip=True): """ @param input_path: path to input file we want to prepend 'text' to @type input_path: str @param output_path: output path that will be 'text' plus contents of file referred to with 'input_path' @type output_path: str @param text: text blob to prepend to file @type text: str @param gzip: True will gzip file contents @type gzip: bool @return: """ if gzip: in_f = GzipFile(input_path, 'r', compresslevel=9) out_f = GzipFile(output_path, 'w', compresslevel=9) else: in_f = open(input_path, 'w') out_f = open(output_path, 'w') try: out_f.write(text) while True: data = in_f.read(size=1024*1024) if not data: break out_f.write(data) finally: in_f.close() out_f.close() def _close_primary_xml(self): """ All the data should be written at this point; invoke this to close the primary xml gzipped file """ self.primary_xml.write("""\n </metadata>""") self.primary_xml.close() blob = """<?xml version="1.0" encoding="UTF-8"?>\n <metadata xmlns="http://linux.duke.edu/metadata/common" xmlns:rpm="http://linux.duke.edu/metadata/rpm" packages="%s"> \n""" % self.unit_count self.prepend_text(self.temp_primary_xml_path, self.primary_xml_path, blob) def _close_filelists_xml(self): """ All the data should be written at this point; invoke this to close the filelists xml gzipped file """ self.filelists_xml.write("""\n </filelists>""") self.filelists_xml.close() blob = """<?xml version="1.0" encoding="UTF-8"?> <filelists xmlns="http://linux.duke.edu/metadata/filelists" packages="%s"> \n""" % self.unit_count self.prepend_text(self.temp_filelists_xml_path, self.filelists_xml_path, blob) def _close_other_xml(self): """ All the data should be written at this point; invoke this to close the other xml gzipped file """ self.other_xml.write("""\n </otherdata>""") self.other_xml.close() blob = """<?xml version="1.0" encoding="UTF-8"?> <otherdata xmlns="http://linux.duke.edu/metadata/other" packages="%s"> \n""" % self.unit_count self.prepend_text(self.temp_other_xml_path, self.other_xml_path, blob) def close_xml(self): """ Closes all open xml file handles @return: """ self._close_primary_xml() self._close_filelists_xml() self._close_other_xml() def merge_unit_metadata(self, units): """ This performs the actual merge of the snippets. The xml files are initialized and each unit metadata is written to the xml files. These units here should be rpm units. If a unit doesnt have repodata info, log the message and skip that unit. Finally the gzipped xmls are closed when all the units are written. @param units: List of rpm units from which repodata is taken and merged @type units: [AssociatedUnit] """ _LOG.info("Performing per unit metadata merge on %s units" % len(units)) if self.is_cancelled: _LOG.warn("cancelling merge unit metadata") raise CancelException() start = time.time() try: for unit in units: if self.is_cancelled: _LOG.warn("cancelling merge unit metadata") raise CancelException() if unit.metadata.has_key('repodata'): try: self.primary_xml.write(unit.metadata['repodata']['primary'].encode('utf-8')) self.filelists_xml.write(unit.metadata['repodata']['filelists'].encode('utf-8')) self.other_xml.write(unit.metadata['repodata']['other'].encode('utf-8')) except Exception, e: _LOG.error("Error occurred writing metadata to file; Exception: %s" % e) continue else: _LOG.debug("No repodata found for the unit; continue") continue finally: self.unit_count += len(units) end = time.time() _LOG.info("per unit metadata merge completed in %s seconds" % (end - start)) def merge_custom_repodata(self): """ merge any repodata preserved on the repo scratchpad """ _LOG.info("check scratchpad for any repodata") if not self.custom_metadata: # nothing found on scratchpad return False current_repo_dir = os.path.join(self.repodir, "repodata") for ftype, fxml in self.custom_metadata.items(): if ftype in self.skip: continue if not fxml: continue ftype_xml_path = os.path.join(self.repodir, "%s.xml" % ftype) f = open(ftype_xml_path, "w") try: try: data = fxml.encode('utf-8') f.write(data) except Exception, e: _LOG.exception("Unable to write file type %s" % ftype) continue finally: f.close() # merge the xml we just wrote with repodata if os.path.isfile(ftype_xml_path): _LOG.info("Modifying repo for %s metadata" % ftype) modify_repo(current_repo_dir, ftype_xml_path, checksum_type=self.checksum_type) return True def merge_comps_xml(self): """ merge comps xml file to repodata """ if self.group_xml_path is None or not os.path.isfile(self.group_xml_path): # no group xml formed nothing to do _LOG.info("comps xml path does not exist; skipping merge") return repodata_working_dir = os.path.join(self.repodir, "repodata") _LOG.info("Modifying repo for %s metadata" % "comps") modify_repo(repodata_working_dir, self.group_xml_path, checksum_type=self.checksum_type) def merge_updateinfo_xml(self): """ merge updateinfo xml file to repodata """ if self.updateinfo_xml_path is None or not os.path.isfile(self.updateinfo_xml_path): # no updateinfo xml formed, nothing to do _LOG.info("updateinfo xml path does not exist; skipping merge") return repodata_working_dir = os.path.join(self.repodir, "repodata") _LOG.info("Modifying repo for %s metadata" % "updateinfo") modify_repo(repodata_working_dir, self.updateinfo_xml_path, checksum_type=self.checksum_type) def merge_other_filetypes_from_backup(self): """ Merges any other filetypes in the backed up repodata that needs to be included back into the repodata. This is where the presto, updateinfo and comps xmls are looked up in old repomd.xml and merged back to the new using modifyrepo. primary, filelists and other xmls are excluded from the process. """ _LOG.info("Performing merge on other file types") try: if not self.backup_repodata_dir: _LOG.info("Nothing further to check; we got our fresh metadata") return current_repo_dir = os.path.join(self.repodir, "repodata") #check if presto metadata exist in the backup repodata_file = os.path.join(self.backup_repodata_dir, "repomd.xml") ftypes = util.get_repomd_filetypes(repodata_file) base_ftypes = ['primary', 'primary_db', 'filelists_db', 'filelists', 'other', 'other_db'] for ftype in ftypes: if self.is_cancelled: _LOG.warn("cancel merge other filetype metadata") raise CancelException() if ftype in base_ftypes: # no need to process these again continue if ftype in self.skip and not self.skip[ftype]: _LOG.info("mdtype %s part of skip metadata; skipping" % ftype) continue filetype_path = os.path.join(self.backup_repodata_dir, os.path.basename(util.get_repomd_filetype_path(repodata_file, ftype))) # modifyrepo uses filename as mdtype, rename to type.<ext> renamed_filetype_path = os.path.join(os.path.dirname(filetype_path),\ ftype + '.' + '.'.join(os.path.basename(filetype_path).split('.')[1:])) os.rename(filetype_path, renamed_filetype_path) if renamed_filetype_path.endswith('.gz'): # if file is gzipped, decompress before passing to modifyrepo data = gzip.open(renamed_filetype_path).read().decode("utf-8", "replace") renamed_filetype_path = '.'.join(renamed_filetype_path.split('.')[:-1]) open(renamed_filetype_path, 'w').write(data.encode("UTF-8")) if os.path.isfile(renamed_filetype_path): _LOG.info("Modifying repo for %s metadata" % ftype) modify_repo(current_repo_dir, renamed_filetype_path, checksum_type=self.checksum_type) finally: if self.backup_repodata_dir: shutil.rmtree(self.backup_repodata_dir) def final_repodata_move(self): # setup the yum config to do the final steps of generating sqlite db files try: mdgen = MetaDataGenerator(self.metadata_conf) mdgen.doRepoMetadata() # do the final move to the repodata location from .repodata mdgen.doFinalMove() except: # might have missing metadata count not perform final move _LOG.error("Error performing final move, could be missing pkg metadata files") def run(self, units): """ Invokes the metadata generation by taking a backup of existing repodata; looking up units and merging the per unit snippets; generate sqlite db, repomd files using createrepo apis and finally merge back any other """ # backup existing repodata dir self._backup_existing_repodata() # extract the per rpm unit metadata and merge to create package xml data self.init_xml() self.merge_unit_metadata(units) self.close_xml() self.final_repodata_move() # lookup and merge updateinfo, comps and other metadata self.merge_comps_xml() self.merge_updateinfo_xml() # merge any custom metadata stored on the scratchpad, this includes prestodelta self.merge_custom_repodata()