def test_segv2(self): c1 = libcomps.Comps() c1.fromxml_f(support.COMPS_PATH) c2 = libcomps.Comps() c2.fromxml_f(support.COMPS_PATH) c = c1 + c2 x = c.groups[0].packages[0].name
def load_input_files(self): """ Loads all input xml files. Returns True if at least one file was successfuly loaded """ for file_name in self.opts.load: file_comps = libcomps.Comps() try: if file_name.endswith('.gz'): # libcomps does not support gzipped files - decompress to temporary # location with gzip.open(file_name) as gz_file: temp_file = tempfile.NamedTemporaryFile(delete=False) try: shutil.copyfileobj(gz_file, temp_file) # close temp_file to ensure the content is flushed to disk temp_file.close() file_comps.fromxml_f(temp_file.name) finally: os.unlink(temp_file.name) else: file_comps.fromxml_f(file_name) except (IOError, OSError, libcomps.ParserError) as err: # gzip module raises OSError on reading from malformed gz file # get_last_errors() output often contains duplicit lines, remove them seen = set() for error in file_comps.get_last_errors(): if error in seen: continue logger.error(error.strip()) seen.add(error) raise dnf.exceptions.Error( _("Can't load file \"{}\": {}").format(file_name, err)) else: self.comps += file_comps
def test_environment_parse(self): xml = """\ <?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE comps PUBLIC "-//Red Hat, Inc.//DTD Comps info//EN" "comps.dtd"> <comps> <group> <id>somerset</id> <default>true</default> <uservisible>true</uservisible> <display_order>1024</display_order> <name>Solid Ground</name> <description>--</description> <packagelist> <packagereq type="mandatory">pepper</packagereq> <packagereq type="mandatory">trampoline</packagereq> </packagelist> </group> <environment> <id>minimal</id> <name>Min install</name> <description>Basic functionality.</description> <display_order>5</display_order> <grouplist> <groupid>somerset</groupid> </grouplist> </environment> </comps> """ comps = libcomps.Comps() ret = comps.fromxml_str(xml) self.assertGreaterEqual(ret, 0)
def _add_from_xml_filename(self, fn): comps = libcomps.Comps() ret = comps.fromxml_f(fn) if ret == -1: errors = comps.get_last_parse_errors() raise CompsError(' '.join(errors)) self._i = self._i + comps
def _add_from_xml_filename(self, fn): comps = libcomps.Comps() try: comps.fromxml_f(fn) except libcomps.ParserError: errors = comps.get_last_errors() raise CompsError(' '.join(errors)) self._i += comps
def parse_comps_components(comps_file): """Parse comps-related components found in the specified file.""" # created = {"categories": [], "environments": [], "groups": [], "langpack": None} created_objects = [] all_objects = [] comps = libcomps.Comps() # Read the file and pass the string along because comps.fromxml_f() will only take a # path-string that doesn't work on things like S3 storage with comps_file.file.open("rb") as f: data = f.read() comps.fromxml_str(data.decode("utf-8")) if comps.langpacks: langpack_dict = PackageLangpacks.libcomps_to_dict(comps.langpacks) langpack, created = PackageLangpacks.objects.get_or_create( matches=strdict_to_dict(comps.langpacks), digest=dict_digest(langpack_dict)) if created: created_objects.append(langpack) all_objects.append(langpack) if comps.categories: for category in comps.categories: category_dict = PackageCategory.libcomps_to_dict(category) category_dict["digest"] = dict_digest(category_dict) packagecategory, created = PackageCategory.objects.get_or_create( **category_dict) if created: created_objects.append(packagecategory) all_objects.append(packagecategory) if comps.environments: for environment in comps.environments: environment_dict = PackageEnvironment.libcomps_to_dict(environment) environment_dict["digest"] = dict_digest(environment_dict) packageenvironment, created = PackageEnvironment.objects.get_or_create( **environment_dict) if created: created_objects.append(packageenvironment) all_objects.append(packageenvironment) if comps.groups: for group in comps.groups: group_dict = PackageGroup.libcomps_to_dict(group) group_dict["digest"] = dict_digest(group_dict) packagegroup, created = PackageGroup.objects.get_or_create( **group_dict) if created: created_objects.append(packagegroup) all_objects.append(packagegroup) return created_objects, all_objects
def parse(self): """Parse packages' components.""" comps = libcomps.Comps() comps.fromxml_f(self.comps_result.path) with ProgressReport(message="Parsed Comps", code="parsing.comps") as comps_pb: comps_total = len(comps.groups) + len(comps.categories) + len( comps.environments) comps_pb.total = comps_total comps_pb.done = comps_total if comps.langpacks: langpack_dict = PackageLangpacks.libcomps_to_dict(comps.langpacks) packagelangpack = PackageLangpacks( matches=strdict_to_dict(comps.langpacks), digest=dict_digest(langpack_dict)) self.package_language_pack_dc = DeclarativeContent( content=packagelangpack) self.package_language_pack_dc.extra_data = defaultdict(list) self._init_dc_categories(comps) self._init_dc_environments(comps) self._init_dc_groups(comps)
def test_segv(self): c1 = libcomps.Comps() c2 = libcomps.Comps() c2.fromxml_f(support.COMPS_PATH) c = c1 + c2 # sigsegved here
def __init__(self): self._i = libcomps.Comps() self._langs = _Langs()
def __init__(self, cli): super(GroupsManagerCommand, self).__init__(cli) self.comps = libcomps.Comps()
def create_repomd_xml( content, publication, checksum_types, extra_repomdrecords, sub_folder=None, metadata_signing_service=None, ): """ Creates a repomd.xml file. Args: content(app.models.Content): content set publication(pulpcore.plugin.models.Publication): the publication extra_repomdrecords(list): list with data relative to repo metadata files sub_folder(str): name of the folder for sub repos metadata_signing_service (pulpcore.app.models.AsciiArmoredDetachedSigningService): A reference to an associated signing service. """ cwd = os.getcwd() repodata_path = REPODATA_PATH has_modules = False has_comps = False package_checksum_type = checksum_types.get("package") if sub_folder: cwd = os.path.join(cwd, sub_folder) repodata_path = os.path.join(sub_folder, repodata_path) # Prepare metadata files repomd_path = os.path.join(cwd, "repomd.xml") pri_xml_path = os.path.join(cwd, "primary.xml.gz") fil_xml_path = os.path.join(cwd, "filelists.xml.gz") oth_xml_path = os.path.join(cwd, "other.xml.gz") upd_xml_path = os.path.join(cwd, "updateinfo.xml.gz") mod_yml_path = os.path.join(cwd, "modules.yaml") comps_xml_path = os.path.join(cwd, "comps.xml") pri_xml = cr.PrimaryXmlFile(pri_xml_path) fil_xml = cr.FilelistsXmlFile(fil_xml_path) oth_xml = cr.OtherXmlFile(oth_xml_path) upd_xml = cr.UpdateInfoXmlFile(upd_xml_path) if publication.sqlite_metadata: pri_db_path = os.path.join(cwd, "primary.sqlite") fil_db_path = os.path.join(cwd, "filelists.sqlite") oth_db_path = os.path.join(cwd, "other.sqlite") pri_db = cr.PrimarySqlite(pri_db_path) fil_db = cr.FilelistsSqlite(fil_db_path) oth_db = cr.OtherSqlite(oth_db_path) packages = Package.objects.filter(pk__in=content) total_packages = packages.count() pri_xml.set_num_of_pkgs(total_packages) fil_xml.set_num_of_pkgs(total_packages) oth_xml.set_num_of_pkgs(total_packages) # We want to support publishing with a different checksum type than the one built-in to the # package itself, so we need to get the correct checksums somehow if there is an override. # We must also take into consideration that if the package has not been downloaded the only # checksum that is available is the one built-in. # # Since this lookup goes from Package->Content->ContentArtifact->Artifact, performance is a # challenge. We use ContentArtifact as our starting point because it enables us to work with # simple foreign keys and avoid messing with the many-to-many relationship, which doesn't # work with select_related() and performs poorly with prefetch_related(). This is fine # because we know that Packages should only ever have one artifact per content. contentartifact_qs = ( ContentArtifact.objects.filter(content__in=packages.only("pk")). select_related( # content__rpm_package is a bit of a hack, exploiting the way django sets up model # inheritance, but it works and is unlikely to break. All content artifacts being # accessed here have an associated Package since they originally came from the # Package queryset. "artifact", "content__rpm_package", ).only("artifact", "content__rpm_package__checksum_type", "content__rpm_package__pkgId")) pkg_to_hash = {} for ca in contentartifact_qs.iterator(): pkgid = None if package_checksum_type: package_checksum_type = package_checksum_type.lower() pkgid = getattr(ca.artifact, package_checksum_type, None) if pkgid: pkg_to_hash[ca.content_id] = (package_checksum_type, pkgid) else: pkg_to_hash[ca.content_id] = ( ca.content.rpm_package.checksum_type, ca.content.rpm_package.pkgId, ) # Process all packages for package in packages.iterator(): pkg = package.to_createrepo_c() # rewrite the checksum and checksum type with the desired ones (checksum, pkgId) = pkg_to_hash[package.pk] pkg.checksum_type = checksum pkg.pkgId = pkgId pkg_filename = os.path.basename(package.location_href) # this can cause an issue when two same RPM package names appears # a/name1.rpm b/name1.rpm pkg.location_href = os.path.join(PACKAGES_DIRECTORY, pkg_filename[0].lower(), pkg_filename) pri_xml.add_pkg(pkg) fil_xml.add_pkg(pkg) oth_xml.add_pkg(pkg) if publication.sqlite_metadata: pri_db.add_pkg(pkg) fil_db.add_pkg(pkg) oth_db.add_pkg(pkg) # Process update records for update_record in UpdateRecord.objects.filter( pk__in=content).iterator(): upd_xml.add_chunk( cr.xml_dump_updaterecord(update_record.to_createrepo_c())) # Process modulemd and modulemd_defaults with open(mod_yml_path, "ab") as mod_yml: for modulemd in Modulemd.objects.filter(pk__in=content).iterator(): mod_yml.write(modulemd._artifacts.get().file.read()) has_modules = True for default in ModulemdDefaults.objects.filter( pk__in=content).iterator(): mod_yml.write(default._artifacts.get().file.read()) has_modules = True # Process comps comps = libcomps.Comps() for pkg_grp in PackageGroup.objects.filter(pk__in=content).iterator(): group = pkg_grp.pkg_grp_to_libcomps() comps.groups.append(group) has_comps = True for pkg_cat in PackageCategory.objects.filter(pk__in=content).iterator(): cat = pkg_cat.pkg_cat_to_libcomps() comps.categories.append(cat) has_comps = True for pkg_env in PackageEnvironment.objects.filter( pk__in=content).iterator(): env = pkg_env.pkg_env_to_libcomps() comps.environments.append(env) has_comps = True for pkg_lng in PackageLangpacks.objects.filter(pk__in=content).iterator(): comps.langpacks = dict_to_strdict(pkg_lng.matches) has_comps = True comps.toxml_f( comps_xml_path, xml_options={ "default_explicit": True, "empty_groups": True, "uservisible_explicit": True }, ) pri_xml.close() fil_xml.close() oth_xml.close() upd_xml.close() repomd = cr.Repomd() if publication.sqlite_metadata: repomdrecords = [ ("primary", pri_xml_path, pri_db), ("filelists", fil_xml_path, fil_db), ("other", oth_xml_path, oth_db), ("primary_db", pri_db_path, None), ("filelists_db", fil_db_path, None), ("other_db", oth_db_path, None), ("updateinfo", upd_xml_path, None), ] else: repomdrecords = [ ("primary", pri_xml_path, None), ("filelists", fil_xml_path, None), ("other", oth_xml_path, None), ("updateinfo", upd_xml_path, None), ] if has_modules: repomdrecords.append(("modules", mod_yml_path, None)) if has_comps: repomdrecords.append(("group", comps_xml_path, None)) repomdrecords.extend(extra_repomdrecords) sqlite_files = ("primary_db", "filelists_db", "other_db") for name, path, db_to_update in repomdrecords: record = cr.RepomdRecord(name, path) checksum_type = get_checksum_type(name, checksum_types) if name in sqlite_files: record_bz = record.compress_and_fill(checksum_type, cr.BZ2) record_bz.type = name record_bz.rename_file() path = record_bz.location_href.split("/")[-1] repomd.set_record(record_bz) else: record.fill(checksum_type) if db_to_update: db_to_update.dbinfo_update(record.checksum) db_to_update.close() record.rename_file() path = record.location_href.split("/")[-1] repomd.set_record(record) if sub_folder: path = os.path.join(sub_folder, path) PublishedMetadata.create_from_file( relative_path=os.path.join(repodata_path, os.path.basename(path)), publication=publication, file=File(open(path, "rb")), ) with open(repomd_path, "w") as repomd_f: repomd_f.write(repomd.xml_dump()) if metadata_signing_service: signing_service = AsciiArmoredDetachedSigningService.objects.get( pk=metadata_signing_service.pk) sign_results = signing_service.sign(repomd_path) # publish a signed file PublishedMetadata.create_from_file( relative_path=os.path.join(repodata_path, os.path.basename(sign_results["file"])), publication=publication, file=File(open(sign_results["file"], "rb")), ) # publish a detached signature PublishedMetadata.create_from_file( relative_path=os.path.join( repodata_path, os.path.basename(sign_results["signature"])), publication=publication, file=File(open(sign_results["signature"], "rb")), ) # publish a public key required for further verification PublishedMetadata.create_from_file( relative_path=os.path.join(repodata_path, os.path.basename(sign_results["key"])), publication=publication, file=File(open(sign_results["key"], "rb")), ) else: PublishedMetadata.create_from_file( relative_path=os.path.join(repodata_path, os.path.basename(repomd_path)), publication=publication, file=File(open(repomd_path, "rb")), )
h.setopt(librepo.LRO_CHECKSUM, True) h.setopt(librepo.LRO_PROGRESSCB, dl_callback) h.setopt(librepo.LRO_YUMDLIST, ["group", "primary"]) h.setopt(librepo.LRO_INTERRUPTIBLE, True) h.perform(r) repo_info = r.getinfo(librepo.LRR_YUM_REPO) # Get primary primary_sack = hawkey.Sack() hk_repo = hawkey.Repo(repo) hk_repo.repomd_fn = repo_info['repomd'] hk_repo.primary_fn = repo_info['primary'] primary_sack.load_repo(hk_repo, load_filelists=False) # Get comps comps = libcomps.Comps() if 'group' in repo_info: ret = comps.fromxml_f(repo_info['group']) if ret == -1: print('Error parsing') break repos[repo] = (comps, primary_sack) expanded = 0 in_packages = contents['packages'] out_packages = set() for package in in_packages: if package.startswith('@'): # Evaluate package = package[1:]
async def run(self): """ Build `DeclarativeContent` from the repodata. """ remote_url = self.new_url or self.remote.url remote_url = remote_url if remote_url[-1] == "/" else f"{remote_url}/" optimize_sync = self.optimize progress_data = dict(message='Downloading Metadata Files', code='downloading.metadata') with ProgressReport(**progress_data) as metadata_pb: downloader = self.remote.get_downloader( url=urljoin(remote_url, 'repodata/repomd.xml')) # TODO: decide how to distinguish between a mirror list and a normal repo result = await downloader.run() metadata_pb.increment() repomd_path = result.path repomd = cr.Repomd(repomd_path) # Caution: we are not storing when the remote was last updated, so the order of this # logic must remain in this order where we first check the version number as other # changes than sync could have taken place such that the date or repo version will be # different from last sync if (optimize_sync and self.repository.last_sync_remote and self.remote.pk == self.repository.last_sync_remote.pk and (self.repository.last_sync_repo_version == self.repository.latest_version().number) and (self.remote.pulp_last_updated <= self.repository.latest_version().pulp_created) and is_previous_version( repomd.revision, self.repository.last_sync_revision_number)): optimize_data = dict(message='Optimizing Sync', code='optimizing.sync') with ProgressReport(**optimize_data) as optimize_pb: optimize_pb.done = 1 optimize_pb.save() return self.repository.last_sync_revision_number = repomd.revision if self.treeinfo: d_artifacts = [ DeclarativeArtifact( artifact=Artifact(), url=urljoin(remote_url, self.treeinfo["filename"]), relative_path=".treeinfo", remote=self.remote, deferred_download=False, ) ] for path, checksum in self.treeinfo["download"][ "images"].items(): artifact = Artifact(**checksum) da = DeclarativeArtifact( artifact=artifact, url=urljoin(remote_url, path), relative_path=path, remote=self.remote, deferred_download=self.deferred_download) d_artifacts.append(da) distribution_tree = DistributionTree( **self.treeinfo["distribution_tree"]) dc = DeclarativeContent(content=distribution_tree, d_artifacts=d_artifacts) dc.extra_data = self.treeinfo await self.put(dc) package_repodata_urls = {} downloaders = [] modulemd_list = list() dc_groups = [] dc_categories = [] dc_environments = [] nevra_to_module = defaultdict(dict) pkgname_to_groups = defaultdict(list) group_to_categories = defaultdict(list) group_to_environments = defaultdict(list) optionalgroup_to_environments = defaultdict(list) modulemd_results = None comps_downloader = None main_types = set() checksums = {} for record in repomd.records: checksums[record.type] = record.checksum_type.upper() if record.type in PACKAGE_REPODATA: main_types.update([record.type]) package_repodata_urls[record.type] = urljoin( remote_url, record.location_href) elif record.type in UPDATE_REPODATA: updateinfo_url = urljoin(remote_url, record.location_href) downloader = self.remote.get_downloader(url=updateinfo_url) downloaders.append([downloader.run()]) elif record.type in COMPS_REPODATA: comps_url = urljoin(remote_url, record.location_href) comps_downloader = self.remote.get_downloader( url=comps_url) elif record.type in SKIP_REPODATA: continue elif '_zck' in record.type: continue elif record.type in MODULAR_REPODATA: modules_url = urljoin(remote_url, record.location_href) modulemd_downloader = self.remote.get_downloader( url=modules_url) modulemd_results = await modulemd_downloader.run() elif record.type not in PACKAGE_DB_REPODATA: file_data = { record.checksum_type: record.checksum, "size": record.size } da = DeclarativeArtifact( artifact=Artifact(**file_data), url=urljoin(remote_url, record.location_href), relative_path=record.location_href, remote=self.remote, deferred_download=False) repo_metadata_file = RepoMetadataFile( data_type=record.type, checksum_type=record.checksum_type, checksum=record.checksum, ) dc = DeclarativeContent(content=repo_metadata_file, d_artifacts=[da]) await self.put(dc) missing_type = set(PACKAGE_REPODATA) - main_types if missing_type: raise FileNotFoundError( _("XML file(s): {filename} not found").format( filename=", ".join(missing_type))) self.repository.original_checksum_types = checksums # we have to sync module.yaml first if it exists, to make relations to packages if modulemd_results: modulemd_index = mmdlib.ModuleIndex.new() open_func = gzip.open if modulemd_results.url.endswith( '.gz') else open with open_func(modulemd_results.path, 'r') as moduleyaml: content = moduleyaml.read() module_content = content if isinstance( content, str) else content.decode() modulemd_index.update_from_string(module_content, True) modulemd_names = modulemd_index.get_module_names() or [] modulemd_all = parse_modulemd(modulemd_names, modulemd_index) # Parsing modules happens all at one time, and from here on no useful work happens. # So just report that it finished this stage. modulemd_pb_data = { 'message': 'Parsed Modulemd', 'code': 'parsing.modulemds' } with ProgressReport(**modulemd_pb_data) as modulemd_pb: modulemd_total = len(modulemd_all) modulemd_pb.total = modulemd_total modulemd_pb.done = modulemd_total for modulemd in modulemd_all: artifact = modulemd.pop('artifact') relative_path = '{}{}{}{}{}snippet'.format( modulemd[PULP_MODULE_ATTR.NAME], modulemd[PULP_MODULE_ATTR.STREAM], modulemd[PULP_MODULE_ATTR.VERSION], modulemd[PULP_MODULE_ATTR.CONTEXT], modulemd[PULP_MODULE_ATTR.ARCH]) da = DeclarativeArtifact(artifact=artifact, relative_path=relative_path, url=modules_url) modulemd_content = Modulemd(**modulemd) dc = DeclarativeContent(content=modulemd_content, d_artifacts=[da]) dc.extra_data = defaultdict(list) # dc.content.artifacts are Modulemd artifacts for artifact in dc.content.artifacts: nevra_to_module.setdefault(artifact, set()).add(dc) modulemd_list.append(dc) # delete list now that we're done with it for memory savings del modulemd_all modulemd_default_names = parse_defaults(modulemd_index) # Parsing module-defaults happens all at one time, and from here on no useful # work happens. So just report that it finished this stage. modulemd_defaults_pb_data = { 'message': 'Parsed Modulemd-defaults', 'code': 'parsing.modulemd_defaults' } with ProgressReport( **modulemd_defaults_pb_data) as modulemd_defaults_pb: modulemd_defaults_total = len(modulemd_default_names) modulemd_defaults_pb.total = modulemd_defaults_total modulemd_defaults_pb.done = modulemd_defaults_total for default in modulemd_default_names: artifact = default.pop('artifact') relative_path = '{}{}snippet'.format( default[PULP_MODULEDEFAULTS_ATTR.MODULE], default[PULP_MODULEDEFAULTS_ATTR.STREAM]) da = DeclarativeArtifact(artifact=artifact, relative_path=relative_path, url=modules_url) default_content = ModulemdDefaults(**default) dc = DeclarativeContent(content=default_content, d_artifacts=[da]) await self.put(dc) # delete list now that we're done with it for memory savings del modulemd_default_names if comps_downloader: comps_result = await comps_downloader.run() comps = libcomps.Comps() comps.fromxml_f(comps_result.path) with ProgressReport(message='Parsed Comps', code='parsing.comps') as comps_pb: comps_total = (len(comps.groups) + len(comps.categories) + len(comps.environments)) comps_pb.total = comps_total comps_pb.done = comps_total if comps.langpacks: langpack_dict = PackageLangpacks.libcomps_to_dict( comps.langpacks) packagelangpack = PackageLangpacks( matches=strdict_to_dict(comps.langpacks), digest=dict_digest(langpack_dict)) dc = DeclarativeContent(content=packagelangpack) dc.extra_data = defaultdict(list) await self.put(dc) if comps.categories: for category in comps.categories: category_dict = PackageCategory.libcomps_to_dict( category) category_dict['digest'] = dict_digest(category_dict) packagecategory = PackageCategory(**category_dict) dc = DeclarativeContent(content=packagecategory) dc.extra_data = defaultdict(list) if packagecategory.group_ids: for group_id in packagecategory.group_ids: group_to_categories[group_id['name']].append( dc) dc_categories.append(dc) if comps.environments: for environment in comps.environments: environment_dict = PackageEnvironment.libcomps_to_dict( environment) environment_dict['digest'] = dict_digest( environment_dict) packageenvironment = PackageEnvironment( **environment_dict) dc = DeclarativeContent(content=packageenvironment) dc.extra_data = defaultdict(list) if packageenvironment.option_ids: for option_id in packageenvironment.option_ids: optionalgroup_to_environments[ option_id['name']].append(dc) if packageenvironment.group_ids: for group_id in packageenvironment.group_ids: group_to_environments[group_id['name']].append( dc) dc_environments.append(dc) if comps.groups: for group in comps.groups: group_dict = PackageGroup.libcomps_to_dict(group) group_dict['digest'] = dict_digest(group_dict) packagegroup = PackageGroup(**group_dict) dc = DeclarativeContent(content=packagegroup) dc.extra_data = defaultdict(list) if packagegroup.packages: for package in packagegroup.packages: pkgname_to_groups[package['name']].append(dc) if dc.content.id in group_to_categories.keys(): for dc_category in group_to_categories[ dc.content.id]: dc.extra_data['category_relations'].append( dc_category) dc_category.extra_data['packagegroups'].append( dc) if dc.content.id in group_to_environments.keys(): for dc_environment in group_to_environments[ dc.content.id]: dc.extra_data['environment_relations'].append( dc_environment) dc_environment.extra_data[ 'packagegroups'].append(dc) if dc.content.id in optionalgroup_to_environments.keys( ): for dc_environment in optionalgroup_to_environments[ dc.content.id]: dc.extra_data['env_relations_optional'].append( dc_environment) dc_environment.extra_data[ 'optionalgroups'].append(dc) dc_groups.append(dc) for dc_category in dc_categories: await self.put(dc_category) for dc_environment in dc_environments: await self.put(dc_environment) # delete lists now that we're done with them for memory savings del dc_environments del dc_categories # to preserve order, downloaders are created after all repodata urls are identified package_repodata_downloaders = [] for repodata_type in PACKAGE_REPODATA: downloader = self.remote.get_downloader( url=package_repodata_urls[repodata_type]) package_repodata_downloaders.append(downloader.run()) downloaders.append(package_repodata_downloaders) # asyncio.gather is used to preserve the order of results for package repodata pending = [ asyncio.gather(*downloaders_group) for downloaders_group in downloaders ] while pending: done, pending = await asyncio.wait( pending, return_when=asyncio.FIRST_COMPLETED) for downloader in done: try: results = downloader.result() except ClientResponseError as exc: raise HTTPNotFound( reason=_("File not found: {filename}").format( filename=exc.request_info.url)) if results[0].url == package_repodata_urls['primary']: primary_xml_path = results[0].path filelists_xml_path = results[1].path other_xml_path = results[2].path metadata_pb.done += 3 metadata_pb.save() packages = await RpmFirstStage.parse_repodata( primary_xml_path, filelists_xml_path, other_xml_path) # skip SRPM if defined if 'srpm' in self.skip_types: packages = { pkgId: pkg for pkgId, pkg in packages.items() if pkg.arch != 'src' } progress_data = { 'message': 'Parsed Packages', 'code': 'parsing.packages', 'total': len(packages), } with ProgressReport(**progress_data) as packages_pb: for pkg in packages.values(): package = Package( **Package.createrepo_to_dict(pkg)) artifact = Artifact(size=package.size_package) checksum_type = getattr( CHECKSUM_TYPES, package.checksum_type.upper()) setattr(artifact, checksum_type, package.pkgId) url = urljoin(remote_url, package.location_href) filename = os.path.basename( package.location_href) da = DeclarativeArtifact( artifact=artifact, url=url, relative_path=filename, remote=self.remote, deferred_download=self.deferred_download) dc = DeclarativeContent(content=package, d_artifacts=[da]) dc.extra_data = defaultdict(list) # find if a package relates to a modulemd if dc.content.nevra in nevra_to_module.keys(): dc.content.is_modular = True for dc_modulemd in nevra_to_module[ dc.content.nevra]: dc.extra_data[ 'modulemd_relation'].append( dc_modulemd) dc_modulemd.extra_data[ 'package_relation'].append(dc) if dc.content.name in pkgname_to_groups.keys(): for dc_group in pkgname_to_groups[ dc.content.name]: dc.extra_data[ 'group_relations'].append(dc_group) dc_group.extra_data[ 'related_packages'].append(dc) packages_pb.increment() await self.put(dc) elif results[0].url == updateinfo_url: updateinfo_xml_path = results[0].path metadata_pb.increment() updates = await RpmFirstStage.parse_updateinfo( updateinfo_xml_path) progress_data = { 'message': 'Parsed Advisories', 'code': 'parsing.advisories', 'total': len(updates), } with ProgressReport(**progress_data) as advisories_pb: for update in updates: update_record = UpdateRecord( **UpdateRecord.createrepo_to_dict(update)) update_record.digest = hash_update_record( update) future_relations = { 'collections': defaultdict(list), 'references': [] } for collection in update.collections: coll_dict = UpdateCollection.createrepo_to_dict( collection) coll = UpdateCollection(**coll_dict) for package in collection.packages: pkg_dict = UpdateCollectionPackage.createrepo_to_dict( package) pkg = UpdateCollectionPackage( **pkg_dict) future_relations['collections'][ coll].append(pkg) for reference in update.references: reference_dict = UpdateReference.createrepo_to_dict( reference) ref = UpdateReference(**reference_dict) future_relations['references'].append(ref) advisories_pb.increment() dc = DeclarativeContent(content=update_record) dc.extra_data = future_relations await self.put(dc) # now send modules down the pipeline since all relations have been set up for modulemd in modulemd_list: await self.put(modulemd) for dc_group in dc_groups: await self.put(dc_group)
def create_repomd_xml(content, publication, extra_repomdrecords, sub_folder=None): """ Creates a repomd.xml file. Args: content(app.models.Content): content set publication(pulpcore.plugin.models.Publication): the publication extra_repomdrecords(list): list with data relative to repo metadata files sub_folder(str): name of the folder for sub repos """ cwd = os.getcwd() repodata_path = REPODATA_PATH has_modules = False has_comps = False if sub_folder: cwd = os.path.join(cwd, sub_folder) repodata_path = os.path.join(sub_folder, repodata_path) # Prepare metadata files repomd_path = os.path.join(cwd, "repomd.xml") pri_xml_path = os.path.join(cwd, "primary.xml.gz") fil_xml_path = os.path.join(cwd, "filelists.xml.gz") oth_xml_path = os.path.join(cwd, "other.xml.gz") pri_db_path = os.path.join(cwd, "primary.sqlite") fil_db_path = os.path.join(cwd, "filelists.sqlite") oth_db_path = os.path.join(cwd, "other.sqlite") upd_xml_path = os.path.join(cwd, "updateinfo.xml.gz") mod_yml_path = os.path.join(cwd, "modules.yaml") comps_xml_path = os.path.join(cwd, "comps.xml") pri_xml = cr.PrimaryXmlFile(pri_xml_path) fil_xml = cr.FilelistsXmlFile(fil_xml_path) oth_xml = cr.OtherXmlFile(oth_xml_path) pri_db = cr.PrimarySqlite(pri_db_path) fil_db = cr.FilelistsSqlite(fil_db_path) oth_db = cr.OtherSqlite(oth_db_path) upd_xml = cr.UpdateInfoXmlFile(upd_xml_path) packages = Package.objects.filter(pk__in=content) total_packages = packages.count() pri_xml.set_num_of_pkgs(total_packages) fil_xml.set_num_of_pkgs(total_packages) oth_xml.set_num_of_pkgs(total_packages) # Process all packages for package in packages.iterator(): pkg = package.to_createrepo_c() pkg.location_href = package.contentartifact_set.only('relative_path').first().relative_path pri_xml.add_pkg(pkg) fil_xml.add_pkg(pkg) oth_xml.add_pkg(pkg) pri_db.add_pkg(pkg) fil_db.add_pkg(pkg) oth_db.add_pkg(pkg) # Process update records for update_record in UpdateRecord.objects.filter(pk__in=content).iterator(): upd_xml.add_chunk(cr.xml_dump_updaterecord(update_record.to_createrepo_c())) # Process modulemd and modulemd_defaults with open(mod_yml_path, 'ab') as mod_yml: for modulemd in Modulemd.objects.filter(pk__in=content).iterator(): mod_yml.write(modulemd._artifacts.get().file.read()) has_modules = True for default in ModulemdDefaults.objects.filter(pk__in=content).iterator(): mod_yml.write(default._artifacts.get().file.read()) has_modules = True # Process comps comps = libcomps.Comps() for pkg_grp in PackageGroup.objects.filter(pk__in=content).iterator(): group = pkg_grp.pkg_grp_to_libcomps() comps.groups.append(group) has_comps = True for pkg_cat in PackageCategory.objects.filter(pk__in=content).iterator(): cat = pkg_cat.pkg_cat_to_libcomps() comps.categories.append(cat) has_comps = True for pkg_env in PackageEnvironment.objects.filter(pk__in=content).iterator(): env = pkg_env.pkg_env_to_libcomps() comps.environments.append(env) has_comps = True for pkg_lng in PackageLangpacks.objects.filter(pk__in=content).iterator(): comps.langpacks = dict_to_strdict(pkg_lng.matches) has_comps = True comps.toxml_f(comps_xml_path, xml_options={"default_explicit": True, "empty_groups": True, "uservisible_explicit": True}) pri_xml.close() fil_xml.close() oth_xml.close() upd_xml.close() repomd = cr.Repomd() repomdrecords = [("primary", pri_xml_path, pri_db), ("filelists", fil_xml_path, fil_db), ("other", oth_xml_path, oth_db), ("primary_db", pri_db_path, None), ("filelists_db", fil_db_path, None), ("other_db", oth_db_path, None), ("updateinfo", upd_xml_path, None)] if has_modules: repomdrecords.append(("modules", mod_yml_path, None)) if has_comps: repomdrecords.append(("group", comps_xml_path, None)) repomdrecords.extend(extra_repomdrecords) sqlite_files = ("primary_db", "filelists_db", "other_db") for name, path, db_to_update in repomdrecords: record = cr.RepomdRecord(name, path) if name in sqlite_files: record_bz = record.compress_and_fill(cr.SHA256, cr.BZ2) record_bz.type = name record_bz.rename_file() path = record_bz.location_href.split('/')[-1] repomd.set_record(record_bz) else: record.fill(cr.SHA256) if (db_to_update): db_to_update.dbinfo_update(record.checksum) db_to_update.close() record.rename_file() path = record.location_href.split('/')[-1] repomd.set_record(record) if sub_folder: path = os.path.join(sub_folder, path) PublishedMetadata.create_from_file( relative_path=os.path.join(repodata_path, os.path.basename(path)), publication=publication, file=File(open(path, 'rb')) ) with open(repomd_path, "w") as repomd_f: repomd_f.write(repomd.xml_dump()) PublishedMetadata.create_from_file( relative_path=os.path.join(repodata_path, os.path.basename(repomd_path)), publication=publication, file=File(open(repomd_path, 'rb')) )
async def run(self): """ Build `DeclarativeContent` from the repodata. """ packages_pb = ProgressReport(message='Parsed Packages', code='parsing.packages') errata_pb = ProgressReport(message='Parsed Erratum', code='parsing.errata') modulemd_pb = ProgressReport(message='Parse Modulemd', code='parsing.modulemds') modulemd_defaults_pb = ProgressReport( message='Parse Modulemd-defaults', code='parsing.modulemddefaults') comps_pb = ProgressReport(message='Parsed Comps', code='parsing.comps') packages_pb.save() errata_pb.save() comps_pb.save() remote_url = self.new_url or self.remote.url remote_url = remote_url if remote_url[-1] == "/" else f"{remote_url}/" progress_data = dict(message='Downloading Metadata Files', code='downloading.metadata') with ProgressReport(**progress_data) as metadata_pb: downloader = self.remote.get_downloader( url=urljoin(remote_url, 'repodata/repomd.xml')) # TODO: decide how to distinguish between a mirror list and a normal repo result = await downloader.run() metadata_pb.increment() if self.kickstart: d_artifacts = [] for path, checksum in self.kickstart["download"][ "images"].items(): artifact = Artifact(**checksum) da = DeclarativeArtifact( artifact=artifact, url=urljoin(remote_url, path), relative_path=path, remote=self.remote, deferred_download=self.deferred_download) d_artifacts.append(da) distribution_tree = DistributionTree( **self.kickstart["distribution_tree"]) dc = DeclarativeContent(content=distribution_tree, d_artifacts=d_artifacts) dc.extra_data = self.kickstart await self.put(dc) repomd_path = result.path repomd = cr.Repomd(repomd_path) package_repodata_urls = {} downloaders = [] modulemd_list = list() dc_groups = [] dc_categories = [] dc_environments = [] nevra_to_module = defaultdict(dict) pkgname_to_groups = defaultdict(list) group_to_categories = defaultdict(list) group_to_environments = defaultdict(list) optionalgroup_to_environments = defaultdict(list) modulemd_results = None comps_downloader = None for record in repomd.records: if record.type in PACKAGE_REPODATA: package_repodata_urls[record.type] = urljoin( remote_url, record.location_href) elif record.type in UPDATE_REPODATA: updateinfo_url = urljoin(remote_url, record.location_href) downloader = self.remote.get_downloader(url=updateinfo_url) downloaders.append([downloader.run()]) elif record.type in COMPS_REPODATA: comps_url = urljoin(remote_url, record.location_href) comps_downloader = self.remote.get_downloader( url=comps_url) elif record.type in SKIP_REPODATA: continue elif record.type in MODULAR_REPODATA: modules_url = urljoin(remote_url, record.location_href) modulemd_downloader = self.remote.get_downloader( url=modules_url) modulemd_results = await modulemd_downloader.run() elif record.type not in PACKAGE_DB_REPODATA: file_data = { record.checksum_type: record.checksum, "size": record.size } da = DeclarativeArtifact( artifact=Artifact(**file_data), url=urljoin(remote_url, record.location_href), relative_path=record.location_href, remote=self.remote, deferred_download=False) repo_metadata_file = RepoMetadataFile( data_type=record.type, checksum_type=record.checksum_type, checksum=record.checksum, ) dc = DeclarativeContent(content=repo_metadata_file, d_artifacts=[da]) await self.put(dc) # we have to sync module.yaml first if it exists, to make relations to packages if modulemd_results: modulemd_index = mmdlib.ModuleIndex.new() open_func = gzip.open if modulemd_results.url.endswith( '.gz') else open with open_func(modulemd_results.path, 'r') as moduleyaml: modulemd_index.update_from_string( moduleyaml.read().decode(), True) modulemd_names = modulemd_index.get_module_names() or [] modulemd_all = parse_modulemd(modulemd_names, modulemd_index) modulemd_pb.total = len(modulemd_all) modulemd_pb.state = 'running' modulemd_pb.save() for modulemd in modulemd_all: artifact = modulemd.pop('artifact') relative_path = '{}{}{}{}{}snippet'.format( modulemd[PULP_MODULE_ATTR.NAME], modulemd[PULP_MODULE_ATTR.STREAM], modulemd[PULP_MODULE_ATTR.VERSION], modulemd[PULP_MODULE_ATTR.CONTEXT], modulemd[PULP_MODULE_ATTR.ARCH]) da = DeclarativeArtifact(artifact=artifact, relative_path=relative_path, url=modules_url) modulemd_content = Modulemd(**modulemd) dc = DeclarativeContent(content=modulemd_content, d_artifacts=[da]) dc.extra_data = defaultdict(list) # dc.content.artifacts are Modulemd artifacts for artifact in json.loads(dc.content.artifacts): nevra_to_module.setdefault(artifact, set()).add(dc) modulemd_list.append(dc) modulemd_default_names = parse_defaults(modulemd_index) modulemd_defaults_pb.total = len(modulemd_default_names) modulemd_defaults_pb.state = 'running' modulemd_defaults_pb.save() for default in modulemd_default_names: artifact = default.pop('artifact') relative_path = '{}{}snippet'.format( default[PULP_MODULEDEFAULTS_ATTR.MODULE], default[PULP_MODULEDEFAULTS_ATTR.STREAM]) da = DeclarativeArtifact(artifact=artifact, relative_path=relative_path, url=modules_url) default_content = ModulemdDefaults(**default) modulemd_defaults_pb.increment() dc = DeclarativeContent(content=default_content, d_artifacts=[da]) await self.put(dc) if comps_downloader: comps_result = await comps_downloader.run() comps = libcomps.Comps() comps.fromxml_f(comps_result.path) comps_pb.total = (len(comps.groups) + len(comps.categories) + len(comps.environments)) comps_pb.state = 'running' comps_pb.save() if comps.langpacks: langpack_dict = PackageLangpacks.libcomps_to_dict( comps.langpacks) packagelangpack = PackageLangpacks( matches=strdict_to_dict(comps.langpacks), digest=dict_digest(langpack_dict)) dc = DeclarativeContent(content=packagelangpack) dc.extra_data = defaultdict(list) await self.put(dc) if comps.categories: for category in comps.categories: category_dict = PackageCategory.libcomps_to_dict( category) category_dict['digest'] = dict_digest(category_dict) packagecategory = PackageCategory(**category_dict) dc = DeclarativeContent(content=packagecategory) dc.extra_data = defaultdict(list) if packagecategory.group_ids: for group_id in packagecategory.group_ids: group_to_categories[group_id['name']].append( dc) dc_categories.append(dc) if comps.environments: for environment in comps.environments: environment_dict = PackageEnvironment.libcomps_to_dict( environment) environment_dict['digest'] = dict_digest( environment_dict) packageenvironment = PackageEnvironment( **environment_dict) dc = DeclarativeContent(content=packageenvironment) dc.extra_data = defaultdict(list) if packageenvironment.option_ids: for option_id in packageenvironment.option_ids: optionalgroup_to_environments[ option_id['name']].append(dc) if packageenvironment.group_ids: for group_id in packageenvironment.group_ids: group_to_environments[group_id['name']].append( dc) dc_environments.append(dc) if comps.groups: for group in comps.groups: group_dict = PackageGroup.libcomps_to_dict(group) group_dict['digest'] = dict_digest(group_dict) packagegroup = PackageGroup(**group_dict) dc = DeclarativeContent(content=packagegroup) dc.extra_data = defaultdict(list) if packagegroup.packages: for package in packagegroup.packages: pkgname_to_groups[package['name']].append(dc) if dc.content.id in group_to_categories.keys(): for dc_category in group_to_categories[ dc.content.id]: dc.extra_data['category_relations'].append( dc_category) dc_category.extra_data['packagegroups'].append( dc) if dc.content.id in group_to_environments.keys(): for dc_environment in group_to_environments[ dc.content.id]: dc.extra_data['environment_relations'].append( dc_environment) dc_environment.extra_data[ 'packagegroups'].append(dc) if dc.content.id in optionalgroup_to_environments.keys( ): for dc_environment in optionalgroup_to_environments[ dc.content.id]: dc.extra_data['env_relations_optional'].append( dc_environment) dc_environment.extra_data[ 'optionalgroups'].append(dc) dc_groups.append(dc) for dc_category in dc_categories: comps_pb.increment() await self.put(dc_category) for dc_environment in dc_environments: comps_pb.increment() await self.put(dc_environment) # to preserve order, downloaders are created after all repodata urls are identified package_repodata_downloaders = [] for repodata_type in PACKAGE_REPODATA: downloader = self.remote.get_downloader( url=package_repodata_urls[repodata_type]) package_repodata_downloaders.append(downloader.run()) downloaders.append(package_repodata_downloaders) # asyncio.gather is used to preserve the order of results for package repodata pending = [ asyncio.gather(*downloaders_group) for downloaders_group in downloaders ] while pending: done, pending = await asyncio.wait( pending, return_when=asyncio.FIRST_COMPLETED) for downloader in done: results = downloader.result() if results[0].url == package_repodata_urls['primary']: primary_xml_path = results[0].path filelists_xml_path = results[1].path other_xml_path = results[2].path metadata_pb.done += 3 metadata_pb.save() packages = await RpmFirstStage.parse_repodata( primary_xml_path, filelists_xml_path, other_xml_path) packages_pb.total = len(packages) packages_pb.state = 'running' packages_pb.save() for pkg in packages.values(): package = Package( **Package.createrepo_to_dict(pkg)) artifact = Artifact(size=package.size_package) checksum_type = getattr( CHECKSUM_TYPES, package.checksum_type.upper()) setattr(artifact, checksum_type, package.pkgId) url = urljoin(remote_url, package.location_href) filename = os.path.basename(package.location_href) da = DeclarativeArtifact( artifact=artifact, url=url, relative_path=filename, remote=self.remote, deferred_download=self.deferred_download) dc = DeclarativeContent(content=package, d_artifacts=[da]) dc.extra_data = defaultdict(list) # find if a package relates to a modulemd if dc.content.nevra in nevra_to_module.keys(): dc.content.is_modular = True for dc_modulemd in nevra_to_module[ dc.content.nevra]: dc.extra_data['modulemd_relation'].append( dc_modulemd) dc_modulemd.extra_data[ 'package_relation'].append(dc) if dc.content.name in pkgname_to_groups.keys(): for dc_group in pkgname_to_groups[ dc.content.name]: dc.extra_data['group_relations'].append( dc_group) dc_group.extra_data[ 'related_packages'].append(dc) packages_pb.increment() await self.put(dc) elif results[0].url == updateinfo_url: updateinfo_xml_path = results[0].path metadata_pb.increment() updates = await RpmFirstStage.parse_updateinfo( updateinfo_xml_path) errata_pb.total = len(updates) errata_pb.state = 'running' errata_pb.save() for update in updates: update_record = UpdateRecord( **UpdateRecord.createrepo_to_dict(update)) update_record.digest = RpmFirstStage.hash_update_record( update) future_relations = { 'collections': defaultdict(list), 'references': [] } for collection in update.collections: coll_dict = UpdateCollection.createrepo_to_dict( collection) coll = UpdateCollection(**coll_dict) for package in collection.packages: pkg_dict = UpdateCollectionPackage.createrepo_to_dict( package) pkg = UpdateCollectionPackage(**pkg_dict) future_relations['collections'][ coll].append(pkg) for reference in update.references: reference_dict = UpdateReference.createrepo_to_dict( reference) ref = UpdateReference(**reference_dict) future_relations['references'].append(ref) errata_pb.increment() dc = DeclarativeContent(content=update_record) dc.extra_data = future_relations await self.put(dc) # now send modules down the pipeline since all relations have been set up for modulemd in modulemd_list: modulemd_pb.increment() await self.put(modulemd) for dc_group in dc_groups: comps_pb.increment() await self.put(dc_group) packages_pb.state = 'completed' errata_pb.state = 'completed' modulemd_pb.state = 'completed' modulemd_defaults_pb.state = 'completed' comps_pb.state = 'completed' packages_pb.save() errata_pb.save() modulemd_pb.save() modulemd_defaults_pb.save() comps_pb.save()
def create_repomd_xml(content, publication, checksum_types, extra_repomdrecords, sub_folder=None, metadata_signing_service=None): """ Creates a repomd.xml file. Args: content(app.models.Content): content set publication(pulpcore.plugin.models.Publication): the publication extra_repomdrecords(list): list with data relative to repo metadata files sub_folder(str): name of the folder for sub repos metadata_signing_service (pulpcore.app.models.AsciiArmoredDetachedSigningService): A reference to an associated signing service. """ cwd = os.getcwd() repodata_path = REPODATA_PATH has_modules = False has_comps = False package_checksum_type = checksum_types.get("package") if sub_folder: cwd = os.path.join(cwd, sub_folder) repodata_path = os.path.join(sub_folder, repodata_path) # Prepare metadata files repomd_path = os.path.join(cwd, "repomd.xml") pri_xml_path = os.path.join(cwd, "primary.xml.gz") fil_xml_path = os.path.join(cwd, "filelists.xml.gz") oth_xml_path = os.path.join(cwd, "other.xml.gz") pri_db_path = os.path.join(cwd, "primary.sqlite") fil_db_path = os.path.join(cwd, "filelists.sqlite") oth_db_path = os.path.join(cwd, "other.sqlite") upd_xml_path = os.path.join(cwd, "updateinfo.xml.gz") mod_yml_path = os.path.join(cwd, "modules.yaml") comps_xml_path = os.path.join(cwd, "comps.xml") pri_xml = cr.PrimaryXmlFile(pri_xml_path) fil_xml = cr.FilelistsXmlFile(fil_xml_path) oth_xml = cr.OtherXmlFile(oth_xml_path) pri_db = cr.PrimarySqlite(pri_db_path) fil_db = cr.FilelistsSqlite(fil_db_path) oth_db = cr.OtherSqlite(oth_db_path) upd_xml = cr.UpdateInfoXmlFile(upd_xml_path) packages = Package.objects.filter(pk__in=content) total_packages = packages.count() pri_xml.set_num_of_pkgs(total_packages) fil_xml.set_num_of_pkgs(total_packages) oth_xml.set_num_of_pkgs(total_packages) # Process all packages for package in packages.iterator(): pkg = package.to_createrepo_c(package_checksum_type) pkg_filename = os.path.basename(package.location_href) # this can cause an issue when two same RPM package names appears # a/name1.rpm b/name1.rpm pkg.location_href = os.path.join(PACKAGES_DIRECTORY, pkg_filename[0].lower(), pkg_filename) pri_xml.add_pkg(pkg) fil_xml.add_pkg(pkg) oth_xml.add_pkg(pkg) pri_db.add_pkg(pkg) fil_db.add_pkg(pkg) oth_db.add_pkg(pkg) # Process update records for update_record in UpdateRecord.objects.filter( pk__in=content).iterator(): upd_xml.add_chunk( cr.xml_dump_updaterecord(update_record.to_createrepo_c())) # Process modulemd and modulemd_defaults with open(mod_yml_path, 'ab') as mod_yml: for modulemd in Modulemd.objects.filter(pk__in=content).iterator(): mod_yml.write(modulemd._artifacts.get().file.read()) has_modules = True for default in ModulemdDefaults.objects.filter( pk__in=content).iterator(): mod_yml.write(default._artifacts.get().file.read()) has_modules = True # Process comps comps = libcomps.Comps() for pkg_grp in PackageGroup.objects.filter(pk__in=content).iterator(): group = pkg_grp.pkg_grp_to_libcomps() comps.groups.append(group) has_comps = True for pkg_cat in PackageCategory.objects.filter(pk__in=content).iterator(): cat = pkg_cat.pkg_cat_to_libcomps() comps.categories.append(cat) has_comps = True for pkg_env in PackageEnvironment.objects.filter( pk__in=content).iterator(): env = pkg_env.pkg_env_to_libcomps() comps.environments.append(env) has_comps = True for pkg_lng in PackageLangpacks.objects.filter(pk__in=content).iterator(): comps.langpacks = dict_to_strdict(pkg_lng.matches) has_comps = True comps.toxml_f(comps_xml_path, xml_options={ "default_explicit": True, "empty_groups": True, "uservisible_explicit": True }) pri_xml.close() fil_xml.close() oth_xml.close() upd_xml.close() repomd = cr.Repomd() repomdrecords = [("primary", pri_xml_path, pri_db), ("filelists", fil_xml_path, fil_db), ("other", oth_xml_path, oth_db), ("primary_db", pri_db_path, None), ("filelists_db", fil_db_path, None), ("other_db", oth_db_path, None), ("updateinfo", upd_xml_path, None)] if has_modules: repomdrecords.append(("modules", mod_yml_path, None)) if has_comps: repomdrecords.append(("group", comps_xml_path, None)) repomdrecords.extend(extra_repomdrecords) sqlite_files = ("primary_db", "filelists_db", "other_db") for name, path, db_to_update in repomdrecords: record = cr.RepomdRecord(name, path) checksum_type = get_checksum_type(name, checksum_types) if name in sqlite_files: record_bz = record.compress_and_fill(checksum_type, cr.BZ2) record_bz.type = name record_bz.rename_file() path = record_bz.location_href.split('/')[-1] repomd.set_record(record_bz) else: record.fill(checksum_type) if (db_to_update): db_to_update.dbinfo_update(record.checksum) db_to_update.close() record.rename_file() path = record.location_href.split('/')[-1] repomd.set_record(record) if sub_folder: path = os.path.join(sub_folder, path) PublishedMetadata.create_from_file(relative_path=os.path.join( repodata_path, os.path.basename(path)), publication=publication, file=File(open(path, 'rb'))) with open(repomd_path, "w") as repomd_f: repomd_f.write(repomd.xml_dump()) if metadata_signing_service: signing_service = AsciiArmoredDetachedSigningService.objects.get( pk=metadata_signing_service.pk) sign_results = signing_service.sign(repomd_path) # publish a signed file PublishedMetadata.create_from_file( relative_path=os.path.join(repodata_path, os.path.basename(sign_results['file'])), publication=publication, file=File(open(sign_results['file'], 'rb'))) # publish a detached signature PublishedMetadata.create_from_file(relative_path=os.path.join( repodata_path, os.path.basename(sign_results['signature'])), publication=publication, file=File( open(sign_results['signature'], 'rb'))) # publish a public key required for further verification PublishedMetadata.create_from_file( relative_path=os.path.join(repodata_path, os.path.basename(sign_results['key'])), publication=publication, file=File(open(sign_results['key'], 'rb'))) else: PublishedMetadata.create_from_file(relative_path=os.path.join( repodata_path, os.path.basename(repomd_path)), publication=publication, file=File(open(repomd_path, 'rb')))
def sanity_check_repodata(myurl, repo_type): """ Sanity check the repodata for a given repository. Args: myurl (str): A path to a repodata directory. repo_type (str): This should be set to 'yum' for Yum repositories, 'module' for module repositories, or 'source' for source repositories. Raises: RepodataException: If the repodata is not valid or does not exist. ValueError: If repo_type is not an acceptable value. """ if repo_type not in ('module', 'source', 'yum'): raise ValueError('repo_type must be one of module, source, or yum.') with tempfile.TemporaryDirectory(prefix='bodhi_repotest_') as tmpdir: os.mkdir(os.path.join(tmpdir, 'lrodir')) h = librepo.Handle() h.setopt(librepo.LRO_REPOTYPE, librepo.LR_YUMREPO) h.setopt(librepo.LRO_DESTDIR, os.path.join(tmpdir, 'lrodir')) if myurl[-1] != '/': myurl += '/' if myurl.endswith('repodata/'): myurl = myurl.replace('repodata/', '') h.setopt(librepo.LRO_URLS, [myurl]) h.setopt(librepo.LRO_LOCAL, True) h.setopt(librepo.LRO_CHECKSUM, True) h.setopt(librepo.LRO_IGNOREMISSING, False) r = librepo.Result() try: h.perform(r) except librepo.LibrepoException as e: rc, msg, general_msg = e.args raise RepodataException(msg) repo_info = r.getinfo(librepo.LRR_YUM_REPO) reqparts = ['filelists', 'primary', 'repomd', 'updateinfo'] # Source and module repos don't have DRPMs. if repo_type == 'yum': reqparts.append('prestodelta') reqparts.append('group') elif repo_type == 'module': reqparts.append('modules') missing = [] for part in reqparts: if part not in repo_info: missing.append(part) if missing: raise RepodataException(f'Required parts not in repomd.xml: {", ".join(missing)}') # Only yum repos have comps if repo_type == 'yum': # Test comps comps = libcomps.Comps() try: ret = comps.fromxml_f(repo_info['group']) except Exception: raise RepodataException('Comps file unable to be parsed') if len(comps.groups) < 1: raise RepodataException('Comps file empty') # Test updateinfo ret = subprocess.call(['zgrep', '<id/>', repo_info['updateinfo']]) if not ret: raise RepodataException('updateinfo.xml.gz contains empty ID tags') # Now call out to DNF to check if the repo is usable # "tests" is a list of tuples with (dnf args, expected output) to run. # For every test, DNF is run with the arguments, and if the expected output is not found, # an error is raised. tests = [] if repo_type in ('yum', 'source'): tests.append((['list', 'available'], 'testrepo')) else: # repo_type == 'module', verified above tests.append((['module', 'list'], '.*')) for test in tests: dnfargs, expout = test # Make sure every DNF test runs in a new temp dir testdir = tempfile.mkdtemp(dir=tmpdir) output = sanity_check_repodata_dnf(testdir, myurl, *dnfargs) if (expout == ".*" and len(output.strip()) != 0) or (expout in output): continue else: raise RepodataException( "DNF did not return expected output when running test!" + f" Test: {dnfargs}, expected: {expout}, output: {output}")
def generate_repo_metadata( content, publication, checksum_types, extra_repomdrecords, sub_folder=None, metadata_signing_service=None, ): """ Creates a repomd.xml file. Args: content(app.models.Content): content set publication(pulpcore.plugin.models.Publication): the publication extra_repomdrecords(list): list with data relative to repo metadata files sub_folder(str): name of the folder for sub repos metadata_signing_service (pulpcore.app.models.AsciiArmoredDetachedSigningService): A reference to an associated signing service. """ cwd = os.getcwd() repodata_path = REPODATA_PATH has_modules = False has_comps = False package_checksum_type = checksum_types.get("package") if sub_folder: cwd = os.path.join(cwd, sub_folder) repodata_path = os.path.join(sub_folder, repodata_path) if package_checksum_type and package_checksum_type not in settings.ALLOWED_CONTENT_CHECKSUMS: raise ValueError( "Repository contains disallowed package checksum type '{}', " "thus can't be published. {}".format(package_checksum_type, ALLOWED_CHECKSUM_ERROR_MSG)) # Prepare metadata files repomd_path = os.path.join(cwd, "repomd.xml") pri_xml_path = os.path.join(cwd, "primary.xml.gz") fil_xml_path = os.path.join(cwd, "filelists.xml.gz") oth_xml_path = os.path.join(cwd, "other.xml.gz") upd_xml_path = os.path.join(cwd, "updateinfo.xml.gz") mod_yml_path = os.path.join(cwd, "modules.yaml") comps_xml_path = os.path.join(cwd, "comps.xml") pri_xml = cr.PrimaryXmlFile(pri_xml_path) fil_xml = cr.FilelistsXmlFile(fil_xml_path) oth_xml = cr.OtherXmlFile(oth_xml_path) upd_xml = cr.UpdateInfoXmlFile(upd_xml_path) if publication.sqlite_metadata: pri_db_path = os.path.join(cwd, "primary.sqlite") fil_db_path = os.path.join(cwd, "filelists.sqlite") oth_db_path = os.path.join(cwd, "other.sqlite") pri_db = cr.PrimarySqlite(pri_db_path) fil_db = cr.FilelistsSqlite(fil_db_path) oth_db = cr.OtherSqlite(oth_db_path) packages = Package.objects.filter(pk__in=content) # We want to support publishing with a different checksum type than the one built-in to the # package itself, so we need to get the correct checksums somehow if there is an override. # We must also take into consideration that if the package has not been downloaded the only # checksum that is available is the one built-in. # # Since this lookup goes from Package->Content->ContentArtifact->Artifact, performance is a # challenge. We use ContentArtifact as our starting point because it enables us to work with # simple foreign keys and avoid messing with the many-to-many relationship, which doesn't # work with select_related() and performs poorly with prefetch_related(). This is fine # because we know that Packages should only ever have one artifact per content. contentartifact_qs = ( ContentArtifact.objects.filter(content__in=packages.only("pk")). select_related( # content__rpm_package is a bit of a hack, exploiting the way django sets up model # inheritance, but it works and is unlikely to break. All content artifacts being # accessed here have an associated Package since they originally came from the # Package queryset. "artifact", "content__rpm_package", ).only("artifact", "content__rpm_package__checksum_type", "content__rpm_package__pkgId")) pkg_to_hash = {} for ca in contentartifact_qs.iterator(): if package_checksum_type: package_checksum_type = package_checksum_type.lower() pkgid = getattr(ca.artifact, package_checksum_type, None) if not package_checksum_type or not pkgid: if ca.content.rpm_package.checksum_type not in settings.ALLOWED_CONTENT_CHECKSUMS: raise ValueError( "Package {} as content unit {} contains forbidden checksum type '{}', " "thus can't be published. {}".format( ca.content.rpm_package.nevra, ca.content.pk, ca.content.rpm_package.checksum_type, ALLOWED_CHECKSUM_ERROR_MSG, )) package_checksum_type = ca.content.rpm_package.checksum_type pkgid = ca.content.rpm_package.pkgId pkg_to_hash[ca.content_id] = (package_checksum_type, pkgid) # TODO: this is meant to be a !! *temporary* !! fix for # https://github.com/pulp/pulp_rpm/issues/2407 pkg_pks_to_ignore = set() latest_build_time_by_nevra = defaultdict(list) for pkg in packages.only("pk", "name", "epoch", "version", "release", "arch", "time_build").iterator(): latest_build_time_by_nevra[pkg.nevra].append((pkg.time_build, pkg.pk)) for nevra, pkg_data in latest_build_time_by_nevra.items(): # sort the packages by when they were built if len(pkg_data) > 1: pkg_data.sort(key=lambda p: p[0], reverse=True) pkg_pks_to_ignore |= set(entry[1] for entry in pkg_data[1:]) log.warning( "Duplicate packages found competing for NEVRA {nevra}, selected the one with " "the most recent build time, excluding {others} others.". format(nevra=nevra, others=len(pkg_data[1:]))) total_packages = packages.count() - len(pkg_pks_to_ignore) pri_xml.set_num_of_pkgs(total_packages) fil_xml.set_num_of_pkgs(total_packages) oth_xml.set_num_of_pkgs(total_packages) # Process all packages for package in packages.order_by("name", "evr").iterator(): if package.pk in pkg_pks_to_ignore: # Temporary! continue pkg = package.to_createrepo_c() # rewrite the checksum and checksum type with the desired ones (checksum, pkgId) = pkg_to_hash[package.pk] pkg.checksum_type = checksum pkg.pkgId = pkgId pkg_filename = os.path.basename(package.location_href) # this can cause an issue when two same RPM package names appears # a/name1.rpm b/name1.rpm pkg.location_href = os.path.join(PACKAGES_DIRECTORY, pkg_filename[0].lower(), pkg_filename) pri_xml.add_pkg(pkg) fil_xml.add_pkg(pkg) oth_xml.add_pkg(pkg) if publication.sqlite_metadata: pri_db.add_pkg(pkg) fil_db.add_pkg(pkg) oth_db.add_pkg(pkg) # Process update records for update_record in UpdateRecord.objects.filter( pk__in=content).iterator(): upd_xml.add_chunk( cr.xml_dump_updaterecord(update_record.to_createrepo_c())) # Process modulemd, modulemd_defaults and obsoletes with open(mod_yml_path, "ab") as mod_yml: for modulemd in Modulemd.objects.filter(pk__in=content).iterator(): mod_yml.write(modulemd.snippet.encode()) has_modules = True for default in ModulemdDefaults.objects.filter( pk__in=content).iterator(): mod_yml.write(default.snippet.encode()) has_modules = True for obsolete in ModulemdObsolete.objects.filter( pk__in=content).iterator(): mod_yml.write(obsolete.snippet.encode()) has_modules = True # Process comps comps = libcomps.Comps() for pkg_grp in PackageGroup.objects.filter(pk__in=content).iterator(): group = pkg_grp.pkg_grp_to_libcomps() comps.groups.append(group) has_comps = True for pkg_cat in PackageCategory.objects.filter(pk__in=content).iterator(): cat = pkg_cat.pkg_cat_to_libcomps() comps.categories.append(cat) has_comps = True for pkg_env in PackageEnvironment.objects.filter( pk__in=content).iterator(): env = pkg_env.pkg_env_to_libcomps() comps.environments.append(env) has_comps = True for pkg_lng in PackageLangpacks.objects.filter(pk__in=content).iterator(): comps.langpacks = dict_to_strdict(pkg_lng.matches) has_comps = True comps.toxml_f( comps_xml_path, xml_options={ "default_explicit": True, "empty_groups": True, "empty_packages": True, "uservisible_explicit": True, }, ) pri_xml.close() fil_xml.close() oth_xml.close() upd_xml.close() repomd = cr.Repomd() # If the repository is empty, use a revision of 0 # See: https://pulp.plan.io/issues/9402 if not content.exists(): repomd.revision = "0" if publication.sqlite_metadata: repomdrecords = [ ("primary", pri_xml_path, pri_db), ("filelists", fil_xml_path, fil_db), ("other", oth_xml_path, oth_db), ("primary_db", pri_db_path, None), ("filelists_db", fil_db_path, None), ("other_db", oth_db_path, None), ("updateinfo", upd_xml_path, None), ] else: repomdrecords = [ ("primary", pri_xml_path, None), ("filelists", fil_xml_path, None), ("other", oth_xml_path, None), ("updateinfo", upd_xml_path, None), ] if has_modules: repomdrecords.append(("modules", mod_yml_path, None)) if has_comps: repomdrecords.append(("group", comps_xml_path, None)) repomdrecords.extend(extra_repomdrecords) sqlite_files = ("primary_db", "filelists_db", "other_db") for name, path, db_to_update in repomdrecords: record = cr.RepomdRecord(name, path) checksum_type = cr_checksum_type_from_string( get_checksum_type(name, checksum_types, default=publication.metadata_checksum_type)) if name in sqlite_files: record_bz = record.compress_and_fill(checksum_type, cr.BZ2) record_bz.type = name record_bz.rename_file() path = record_bz.location_href.split("/")[-1] repomd.set_record(record_bz) else: record.fill(checksum_type) if db_to_update: db_to_update.dbinfo_update(record.checksum) db_to_update.close() record.rename_file() path = record.location_href.split("/")[-1] repomd.set_record(record) if sub_folder: path = os.path.join(sub_folder, path) with open(path, "rb") as repodata_fd: PublishedMetadata.create_from_file( relative_path=os.path.join(repodata_path, os.path.basename(path)), publication=publication, file=File(repodata_fd), ) with open(repomd_path, "w") as repomd_f: repomd_f.write(repomd.xml_dump()) if metadata_signing_service: signing_service = AsciiArmoredDetachedSigningService.objects.get( pk=metadata_signing_service) sign_results = signing_service.sign(repomd_path) # publish a signed file with open(sign_results["file"], "rb") as signed_file_fd: PublishedMetadata.create_from_file( relative_path=os.path.join( repodata_path, os.path.basename(sign_results["file"])), publication=publication, file=File(signed_file_fd), ) # publish a detached signature with open(sign_results["signature"], "rb") as signature_fd: PublishedMetadata.create_from_file( relative_path=os.path.join( repodata_path, os.path.basename(sign_results["signature"])), publication=publication, file=File(signature_fd), ) # publish a public key required for further verification pubkey_name = "repomd.xml.key" with open(pubkey_name, "wb+") as f: f.write(signing_service.public_key.encode("utf-8")) f.flush() PublishedMetadata.create_from_file( relative_path=os.path.join(repodata_path, pubkey_name), publication=publication, file=File(f), ) else: with open(repomd_path, "rb") as repomd_fd: PublishedMetadata.create_from_file( relative_path=os.path.join(repodata_path, os.path.basename(repomd_path)), publication=publication, file=File(repomd_fd), )