def content_handler(self, path): """Serve config.repo and repomd.xml.key.""" if path == self.repository_config_file_name: repository, publication = self.get_repository_and_publication() if not publication: return base_url = "{}/".format( urlpath_sanitize(settings.CONTENT_ORIGIN, settings.CONTENT_PATH_PREFIX, self.base_path)) val = textwrap.dedent(f"""\ [{self.name}] name={self.name} enabled=1 baseurl={base_url} gpgcheck={publication.gpgcheck} repo_gpgcheck={publication.repo_gpgcheck} """) signing_service = repository.metadata_signing_service if signing_service: gpgkey_path = urlpath_sanitize( base_url, "/repodata/repomd.xml.key", ) val += f"gpgkey={gpgkey_path}\n" return Response(body=val)
def parse(self): """Parse repository metadata.""" required_metadata_found = set() for record in self.data.repomd.records: self.checksum_types[record.type] = getattr( CHECKSUM_TYPES, record.checksum_type.upper()) record.checksum_type = getattr(CHECKSUM_TYPES, record.checksum_type.upper()) if record.type in PACKAGE_REPODATA: required_metadata_found.add(record.type) self.data.package_repodata_urls[ record.type] = urlpath_sanitize(self.data.remote_url, record.location_href) elif record.type in UPDATE_REPODATA: self.data.updateinfo_url = urlpath_sanitize( self.data.remote_url, record.location_href) elif record.type in COMPS_REPODATA: self.data.comps_url = urlpath_sanitize(self.data.remote_url, record.location_href) elif record.type in MODULAR_REPODATA: self.data.modules_url = urlpath_sanitize( self.data.remote_url, record.location_href) elif record.type in SKIP_REPODATA: pass else: self._set_repomd_file(record) missing_types = set(PACKAGE_REPODATA) - required_metadata_found if missing_types: raise FileNotFoundError( _("XML file(s): {filenames} not found").format( filenames=", ".join(missing_types)))
async def parse_distribution_tree(self): """Parse content from the file treeinfo if present.""" if self.treeinfo: d_artifacts = [ DeclarativeArtifact( artifact=Artifact(), url=urlpath_sanitize(self.data.remote_url, self.treeinfo["filename"]), relative_path=".treeinfo", remote=self.remote, deferred_download=False, ) ] for path, checksum in self.treeinfo["download"]["images"].items(): artifact = Artifact(**checksum) da = DeclarativeArtifact( artifact=artifact, url=urlpath_sanitize(self.data.remote_url, path), relative_path=path, remote=self.remote, deferred_download=self.deferred_download, ) d_artifacts.append(da) distribution_tree = DistributionTree( **self.treeinfo["distribution_tree"]) dc = DeclarativeContent(content=distribution_tree, d_artifacts=d_artifacts) dc.extra_data = self.treeinfo await self.put(dc)
def content_handler(self, path): """Serve config.repo and public.key.""" if path == self.repository_config_file_name: base_url = f"{settings.CONTENT_ORIGIN}{settings.CONTENT_PATH_PREFIX}{self.base_path}/" publication = self.publication.cast() val = textwrap.dedent( f"""\ [{self.name}] enabled=1 baseurl={base_url} gpgcheck={publication.gpgcheck} repo_gpgcheck={publication.repo_gpgcheck} """ ) repository_pk = self.publication.repository.pk repository = RpmRepository.objects.get(pk=repository_pk) signing_service = repository.metadata_signing_service if signing_service: gpgkey_path = urlpath_sanitize( settings.CONTENT_ORIGIN, settings.CONTENT_PATH_PREFIX ) gpgkey_path = urllib.parse.urljoin(gpgkey_path, self.base_path, True) gpgkey_path += "/repodata/public.key" val += f"gpgkey={gpgkey_path}\n" return Response(body=val)
def get_treeinfo_data(remote, remote_url): """ Get Treeinfo data from remote. """ treeinfo_serialized = {} namespaces = [".treeinfo", "treeinfo"] for namespace in namespaces: downloader = remote.get_downloader( url=urlpath_sanitize(remote_url, namespace), silence_errors_for_response_status_codes={403, 404}, ) try: result = downloader.fetch() except FileNotFoundError: continue treeinfo = PulpTreeInfo() treeinfo.load(f=result.path) treeinfo_parsed = treeinfo.parsed_sections() sha256 = result.artifact_attributes["sha256"] treeinfo_serialized = TreeinfoData(treeinfo_parsed).to_dict( hash=sha256, filename=namespace) break return treeinfo_serialized
async def _run(self, extra_data=None): """ Download, validate, and compute digests on the `url`. This is a coroutine. This method provides the same return object type and documented in :meth:`~pulpcore.plugin.download.BaseDownloader._run`. """ if self.sles_auth_token: auth_param = f"?{self.sles_auth_token}" url = urlpath_sanitize(self.url) + auth_param else: url = self.url async with self.session.get(url, proxy=self.proxy, proxy_auth=self.proxy_auth, auth=self.auth) as response: self.raise_for_status(response) to_return = await self._handle_response(response) await response.release() self.response_headers = response.headers if self._close_session_on_finalize: self.session.close() return to_return
async def run(self): """Build `DeclarativeContent` from the repodata.""" progress_data = dict(message="Downloading Metadata Files", code="sync.downloading.metadata") with ProgressReport(**progress_data) as metadata_pb: self.data.metadata_pb = metadata_pb downloader = self.remote.get_downloader(url=urlpath_sanitize( self.data.remote_url, "repodata/repomd.xml")) result = await downloader.run() metadata_pb.increment() repomd_path = result.path self.data.repomd = cr.Repomd(repomd_path) self.repository.last_sync_revision_number = self.data.repomd.revision self.repository.last_sync_repomd_checksum = get_sha256(repomd_path) await self.parse_distribution_tree() await self.parse_repository_metadata() await self.parse_modules_metadata() await self.parse_packages_components() await self.parse_content() # now send modules down the pipeline since all relations have been set up for modulemd in self.data.modulemd_list: await self.put(modulemd) for dc_group in self.data.dc_groups: await self.put(dc_group)
async def _parse_packages(self, packages): progress_data = { "message": "Parsed Packages", "code": "sync.parsing.packages", "total": len(packages), } with ProgressReport(**progress_data) as packages_pb: while True: try: (_, pkg) = packages.popitem(last=False) except KeyError: break package = Package(**Package.createrepo_to_dict(pkg)) del pkg artifact = Artifact(size=package.size_package) checksum_type = getattr(CHECKSUM_TYPES, package.checksum_type.upper()) setattr(artifact, checksum_type, package.pkgId) url = urlpath_sanitize(self.data.remote_url, package.location_href) filename = os.path.basename(package.location_href) da = DeclarativeArtifact( artifact=artifact, url=url, relative_path=filename, remote=self.remote, deferred_download=self.deferred_download, ) dc = DeclarativeContent(content=package, d_artifacts=[da]) dc.extra_data = defaultdict(list) # find if a package relates to a modulemd if dc.content.nevra in self.data.nevra_to_module.keys(): dc.content.is_modular = True for dc_modulemd in self.data.nevra_to_module[ dc.content.nevra]: dc.extra_data["modulemd_relation"].append(dc_modulemd) dc_modulemd.extra_data["package_relation"].append(dc) if dc.content.name in self.data.pkgname_to_groups.keys(): for dc_group in self.data.pkgname_to_groups[ dc.content.name]: dc.extra_data["group_relations"].append(dc_group) dc_group.extra_data["related_packages"].append(dc) packages_pb.increment() await self.put(dc)
def __init__( self, *args, silence_errors_for_response_status_codes=None, sles_auth_token=None, urlencode=True, **kwargs, ): """ Initialize the downloader. """ self.sles_auth_token = sles_auth_token if silence_errors_for_response_status_codes is None: silence_errors_for_response_status_codes = set() self.silence_errors_for_response_status_codes = silence_errors_for_response_status_codes super().__init__(*args, **kwargs) new_url = self.url if urlencode: # Some upstream-repos (eg, Amazon) require url-encoded paths for things like "libc++" # Let's make them happy. # We can't urlencode the whole url, because BasicAuth is still A Thing and we would # break username/passwords in the url. # So we need to urlencode **only the path** and **nothing else** . # We can't use _replace() and urlunparse(), because urlunparse() "helpfully" undoes # the urlencode we just did in the path. # We can't use urljoin(), because urljoin() "helpfully" treats a number of schemes # (like, say, uln:) as "can't take relative paths", and throws away everything # **except** the path-portion # So, we have a pretty ugly workaround. parsed = urlparse(self.url) # two pieces of the URL: pre- and post-path (before_path, after_path) = self.url.split(parsed.path) new_path = quote(unquote(parsed.path), safe=":/") # fix the path new_url = "{}{}{}".format(before_path, new_path, after_path) # rebuild if self.sles_auth_token: auth_param = f"?{self.sles_auth_token}" self.url = urlpath_sanitize(new_url) + auth_param else: self.url = new_url
def _set_repomd_file(self, record): if "_zck" not in record.type and record.type not in PACKAGE_DB_REPODATA: file_data = { record.checksum_type: record.checksum, "size": record.size } da = DeclarativeArtifact( artifact=Artifact(**file_data), url=urlpath_sanitize(self.data.remote_url, record.location_href), relative_path=record.location_href, remote=self.remote, deferred_download=False, ) repo_metadata_file = RepoMetadataFile( data_type=record.type, checksum_type=record.checksum_type, checksum=record.checksum, relative_path=record.location_href, ) dc = DeclarativeContent(content=repo_metadata_file, d_artifacts=[da]) self.repomd_dcs.append(dc)
def get_repomd_file(remote, url): """ Check if repodata exists. Args: remote(RpmRemote or UlnRemote): An RpmRemote or UlnRemote to download with. url(str): A remote repository URL Returns: pulpcore.plugin.download.DownloadResult: downloaded repomd.xml """ downloader = remote.get_downloader( url=urlpath_sanitize(url, "repodata/repomd.xml")) try: result = downloader.fetch() except ClientResponseError as exc: if 404 == exc.status: return except FileNotFoundError: return return result
def test_urlpath_sanitize(self): """Test urljoin-replacement.""" # arbitrary number of args become one single-slash-separated string a_expected = "a" ab_expected = "a/b" abc_expected = "a/b/c" # a /a a/ /a/ self.assertEqual(a_expected, urlpath_sanitize("a")) self.assertEqual(a_expected, urlpath_sanitize("/a")) self.assertEqual(a_expected, urlpath_sanitize("a/")) self.assertEqual(a_expected, urlpath_sanitize("/a/")) # a b : a/ b : /a b : a b/ : a /b : a /b/ : a/ /b self.assertEqual(ab_expected, urlpath_sanitize("a", "b")) self.assertEqual(ab_expected, urlpath_sanitize("a/", "b")) self.assertEqual(ab_expected, urlpath_sanitize("/a", "b")) self.assertEqual(ab_expected, urlpath_sanitize("a", "b/")) self.assertEqual(ab_expected, urlpath_sanitize("a", "/b")) self.assertEqual(ab_expected, urlpath_sanitize("a", "/b/")) self.assertEqual(ab_expected, urlpath_sanitize("a/", "/b")) self.assertEqual(ab_expected, urlpath_sanitize("a/", "", "/b")) self.assertEqual(ab_expected, urlpath_sanitize("a/", "/", "/b")) # a b c : a /b/ /c : /a/ /b/ /c/ self.assertEqual(abc_expected, urlpath_sanitize("a", "b", "c")) self.assertEqual(abc_expected, urlpath_sanitize("a", "/b/", "/c")) self.assertEqual(abc_expected, urlpath_sanitize("/a/", "/b/", "/c/"))
def synchronize(remote_pk, repository_pk, mirror, skip_types, optimize): """ Sync content from the remote repository. Create a new version of the repository that is synchronized with the remote. Args: remote_pk (str): The remote PK. repository_pk (str): The repository PK. mirror (bool): Mirror mode. skip_types (list): List of content to skip. optimize(bool): Optimize mode. Raises: ValueError: If the remote does not specify a url to sync. """ try: remote = RpmRemote.objects.get(pk=remote_pk) except ObjectDoesNotExist: remote = UlnRemote.objects.get(pk=remote_pk) repository = RpmRepository.objects.get(pk=repository_pk) if not remote.url: raise ValueError( _("A remote must have a url specified to synchronize.")) log.info( _("Synchronizing: repository={r} remote={p}").format(r=repository.name, p=remote.name)) deferred_download = remote.policy != Remote.IMMEDIATE # Interpret download policy with tempfile.TemporaryDirectory("."): remote_url = fetch_remote_url(remote) if optimize and is_optimized_sync(repository, remote, remote_url): return with tempfile.TemporaryDirectory("."): treeinfo = get_treeinfo_data(remote, remote_url) if treeinfo: treeinfo["repositories"] = {} for repodata in set(treeinfo["download"]["repodatas"]): if repodata == DIST_TREE_MAIN_REPO_PATH: treeinfo["repositories"].update({repodata: None}) continue name = f"{repodata}-{treeinfo['hash']}" sub_repo, created = RpmRepository.objects.get_or_create( name=name, sub_repo=True) if created: sub_repo.save() directory = treeinfo["repo_map"][repodata] treeinfo["repositories"].update({directory: str(sub_repo.pk)}) path = f"{repodata}/" new_url = urlpath_sanitize(remote_url, path) with tempfile.TemporaryDirectory("."): repodata_exists = get_repomd_file(remote, new_url) if repodata_exists: if optimize and is_optimized_sync(sub_repo, remote, new_url): continue stage = RpmFirstStage( remote, sub_repo, deferred_download, skip_types=skip_types, new_url=new_url, ) dv = RpmDeclarativeVersion(first_stage=stage, repository=sub_repo) dv.create() sub_repo.last_sync_remote = remote sub_repo.last_sync_repo_version = sub_repo.latest_version( ).number sub_repo.save() first_stage = RpmFirstStage( remote, repository, deferred_download, skip_types=skip_types, treeinfo=treeinfo, new_url=remote_url, ) dv = RpmDeclarativeVersion(first_stage=first_stage, repository=repository, mirror=mirror) version = dv.create() if version: repository.last_sync_remote = remote repository.last_sync_repo_version = version.number repository.save() return version