def _build_additions(self): """ Generate the content to be added. Returns: generator: A generator of content to be added. """ parsed_url = urlparse(self._importer.feed_url) root_dir = os.path.dirname(parsed_url.path) for key in self._keys_to_add: # Instantiate the content and artifact based on the key values. gem = GemContent(name=key.name, version=key.version) relative_path = os.path.join('gems', key.name + '-' + key.version + '.gem') path = os.path.join(root_dir, relative_path) url = urlunparse(parsed_url._replace(path=path)) spec_relative_path = os.path.join( 'quick/Marshal.4.8', key.name + '-' + key.version + '.gemspec.rz') spec_path = os.path.join(root_dir, spec_relative_path) spec_url = urlunparse(parsed_url._replace(path=spec_path)) # Now that we know what we want to add, hand it to "core" with the API objects. content = PendingContent( gem, artifacts={ PendingArtifact(Artifact(), url, relative_path), PendingArtifact(Artifact(), spec_url, spec_relative_path), }) yield content
async def parse_distribution_tree(self): """Parse content from the file treeinfo if present.""" if self.treeinfo: d_artifacts = [ DeclarativeArtifact( artifact=Artifact(), url=urljoin(self.data.remote_url, self.treeinfo["filename"]), relative_path=".treeinfo", remote=self.remote, deferred_download=False, ) ] for path, checksum in self.treeinfo["download"]["images"].items(): artifact = Artifact(**checksum) da = DeclarativeArtifact( artifact=artifact, url=urljoin(self.data.remote_url, path), relative_path=path, remote=self.remote, deferred_download=self.deferred_download, ) d_artifacts.append(da) distribution_tree = DistributionTree( **self.treeinfo["distribution_tree"]) dc = DeclarativeContent(content=distribution_tree, d_artifacts=d_artifacts) dc.extra_data = self.treeinfo await self.put(dc)
class CookbookContentHandlerTestCase(TestCase): """Test the CookbookContentHandler.""" def setUp(self): self.remote = CookbookRemote.objects.create(name="remote") self.c1 = CookbookPackageContent.objects.create(name="c1", version="1.0.0", dependencies={}) self.ca1 = ContentArtifact.objects.create( artifact=None, content=self.c1, relative_path=self.c1.relative_path()) self.ra1 = RemoteArtifact.objects.create(content_artifact=self.ca1, remote=self.remote) self.c1_prime = CookbookPackageContent.objects.create(name="c1", version="1.0.0", dependencies={}) self.ca1_prime = ContentArtifact.objects.create( artifact=None, content=self.c1_prime, relative_path=self.c1_prime.relative_path()) self.ra1_prime = RemoteArtifact.objects.create( content_artifact=self.ca1_prime, remote=self.remote) self.assertEqual(self.c1.content_id_type, CookbookPackageContent.UUID) self.assertEqual(self.c1_prime.content_id_type, CookbookPackageContent.UUID) @patch.object(Handler, "_save_artifact", return_value=Artifact(sha256="1")) def test_save_artifact(self, save_artifact_mock): """Verify the 'on_demand' policy case.""" cch = CookbookContentHandler() new_artifact = cch._save_artifact( None, RemoteArtifact.objects.get(pk=self.ra1.pk)) c1 = CookbookPackageContent.objects.get(pk=self.c1.pk) self.assertIsNotNone(new_artifact) self.assertEqual(c1.content_id, "1") @patch.object(Handler, "_save_artifact", return_value=Artifact(sha256="1")) def test_save_artifact_other_content_exists(self, save_artifact_mock): """When save tries to 'upgrade' the cookbook with the digest, failure is ignored.""" CookbookPackageContent.objects.create( name="c1", version="1.0.0", content_id_type=CookbookPackageContent.SHA256, content_id="1", dependencies={}, ) cch = CookbookContentHandler() new_artifact = cch._save_artifact( None, RemoteArtifact.objects.get(pk=self.ra1.pk)) c1 = CookbookPackageContent.objects.get(pk=self.c1.pk) self.assertIsNotNone(new_artifact) self.assertEqual(c1.content_id_type, CookbookPackageContent.UUID) self.assertEqual(c1.content_id, str(self.c1.content_id))
def test_content_associated_using_repo_key(self): stage = QueryExistingRepoContentAndArtifacts( new_version=self.new_version_all_content()) # c1: Existing content unit with Artifact c1 = CookbookPackageContent(name="c1", version="1.0.0", dependencies={}) # c2: content unit does not exist in DB c2 = CookbookPackageContent(name="c2", version="1.0.0", dependencies={}) # c3: content unit does exist, has a content_artifact association, # but no artifact (i.e. is a non-immediate content unit) c3 = CookbookPackageContent(name="c3", version="1.0.0", dependencies={}) d_c1_d_a1 = DeclarativeArtifact( artifact=Artifact(), url="http://a1", relative_path=c1.relative_path(), remote=self.remote, ) d_c2_d_a2 = DeclarativeArtifact( artifact=Artifact(), url="http://a2", relative_path=c2.relative_path(), remote=self.remote, ) d_c3_d_a3 = DeclarativeArtifact( artifact=Artifact(), url="http://a3", relative_path=c3.relative_path(), remote=self.remote, ) batch = [ DeclarativeContent(content=c1, d_artifacts=[d_c1_d_a1]), DeclarativeContent(content=c2, d_artifacts=[d_c2_d_a2]), DeclarativeContent(content=c3, d_artifacts=[d_c3_d_a3]), ] stage._process_batch(batch) self.assertEqual(batch[0].content.content_id, "1") self.assertEqual(batch[0].content.pk, self.c1.pk) self.assertEqual(batch[0].d_artifacts[0].artifact.pk, self.a1.pk) self.assertIsNone(batch[1].content.pk) self.assertTrue(batch[1].d_artifacts[0].artifact._state.adding) self.assertEqual(batch[2].content.pk, self.c3.pk) self.assertTrue(batch[2].d_artifacts[0].artifact._state.adding)
async def _add_role_versions(self, role_future, metadata): role = await role_future for version in metadata['summary_fields']['versions']: url = GITHUB_URL % ( metadata['github_user'], metadata['github_repo'], version['name'], ) role_version = AnsibleRoleVersion(version=version['name'], role=role) relative_path = "%s/%s/%s.tar.gz" % ( metadata['namespace'], metadata['name'], version['name'], ) d_artifact = DeclarativeArtifact( artifact=Artifact(), url=url, relative_path=relative_path, remote=self.remote, deferred_download=self.deferred_download, ) d_content = DeclarativeContent( content=role_version, d_artifacts=[d_artifact], ) await self.put(d_content)
def __init__(self, content): """ Args: content (PendingContent): The associated pending content. """ super().__init__(Artifact(), '', '') self.content = content
async def run(self): """ Build and emit `DeclarativeContent` from the ansible metadata. """ with ProgressBar(message='Parsing Role Metadata') as pb: async for metadata in self._fetch_roles(): for version in metadata['summary_fields']['versions']: url = GITHUB_URL % ( metadata['github_user'], metadata['github_repo'], version['name'], ) role = Role(version=version['name'], name=metadata['name'], namespace=metadata['namespace']) relative_path = "%s/%s/%s.tar.gz" % ( metadata['namespace'], metadata['name'], version['name'], ) d_artifact = DeclarativeArtifact( artifact=Artifact(), url=url, relative_path=relative_path, remote=self.remote, deferred_download=self.deferred_download, ) d_content = DeclarativeContent( content=role, d_artifacts=[d_artifact], ) pb.increment() await self.put(d_content)
async def run(self): """ Build and emit `DeclarativeContent` from the Manifest data. Args: in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue. out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to """ downloader = self.remote.get_downloader(url=self.remote.url) result = await downloader.run() # Use ProgressReport to report progress data = self.get_json_data(result.path) package = Package(name=data["name"], version=data["version"]) artifact = Artifact() # make Artifact in memory-only url = data["dist"]["tarball"] da = DeclarativeArtifact( artifact, url, url.split("/")[-1], self.remote, deferred_download=self.deferred_download, ) dc = DeclarativeContent(content=package, d_artifacts=[da]) await self.put(dc)
def create_manifest(self, list_dc, manifest_data): """ Create an Image Manifest from manifest data in a ManifestList. Args: list_dc (pulpcore.plugin.stages.DeclarativeContent): dc for a ManifestList manifest_data (dict): Data about a single new ImageManifest. """ digest = manifest_data['digest'] relative_url = '/v2/{name}/manifests/{digest}'.format( name=self.remote.namespaced_upstream_name, digest=digest) manifest_url = urljoin(self.remote.url, relative_url) da = DeclarativeArtifact(artifact=Artifact(), url=manifest_url, relative_path=digest, remote=self.remote, extra_data={'headers': V2_ACCEPT_HEADERS}) manifest = Manifest( digest=manifest_data['digest'], schema_version=2 if manifest_data['mediaType'] == MEDIA_TYPE.MANIFEST_V2 else 1, media_type=manifest_data['mediaType'], ) man_dc = DeclarativeContent( content=manifest, d_artifacts=[da], extra_data={'relation': list_dc}, does_batch=False, ) return man_dc
async def run(self): """ Build and emit `DeclarativeContent` from the Manifest data. Args: in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue. out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to """ with ProgressBar(message='Downloading Metadata') as pb: parsed_url = urlparse(self.remote.url) root_dir = os.path.dirname(parsed_url.path) downloader = self.remote.get_downloader(url=self.remote.url) result = await downloader.run() pb.increment() with ProgressBar(message='Parsing Metadata') as pb: for entry in self.read_my_metadata_file_somehow(result.path): path = os.path.join(root_dir, entry['picture']) url = urlunparse(parsed_url._replace(path=path)) unit = Animal(**entry) # make the content unit in memory-only artifact = Artifact() # make Artifact in memory-only da = DeclarativeArtifact(artifact, url, entry['picture'], self.remote) dc = DeclarativeContent(content=unit, d_artifacts=[da]) pb.increment() await self.put(dc)
def create_blob(self, man_dc, blob_data): """ Create blob. Args: man_dc (pulpcore.plugin.stages.DeclarativeContent): dc for a ImageManifest blob_data (dict): Data about a blob """ digest = blob_data.get('digest') or blob_data.get('blobSum') blob_artifact = Artifact(sha256=digest[len("sha256:"):]) blob = Blob( digest=digest, media_type=blob_data.get('mediaType', MEDIA_TYPE.REGULAR_BLOB), ) relative_url = '/v2/{name}/blobs/{digest}'.format( name=self.remote.namespaced_upstream_name, digest=digest, ) blob_url = urljoin(self.remote.url, relative_url) da = DeclarativeArtifact( artifact=blob_artifact, url=blob_url, relative_path=digest, remote=self.remote, extra_data={'headers': V2_ACCEPT_HEADERS}, deferred_download=self.deferred_download ) blob_dc = DeclarativeContent( content=blob, d_artifacts=[da], ) return blob_dc
async def run(self): """ Build and emit `DeclarativeContent` from the Manifest data. """ deferred_download = (self.remote.policy != Remote.IMMEDIATE) # Interpret download policy with ProgressBar(message='Downloading Metadata') as pb: parsed_url = urlparse(self.remote.url) root_dir = os.path.dirname(parsed_url.path) downloader = self.remote.get_downloader(url=self.remote.url) result = await downloader.run() pb.increment() with ProgressBar(message='Parsing Metadata') as pb: manifest = Manifest(result.path) for entry in manifest.read(): path = os.path.join(root_dir, entry.relative_path) url = urlunparse(parsed_url._replace(path=path)) file = FileContent(relative_path=entry.relative_path, digest=entry.digest) artifact = Artifact(size=entry.size, sha256=entry.digest) da = DeclarativeArtifact( artifact=artifact, url=url, relative_path=entry.relative_path, remote=self.remote, deferred_download=deferred_download, ) dc = DeclarativeContent(content=file, d_artifacts=[da]) pb.increment() await self.put(dc)
async def run(self): """ Build and emit `DeclarativeContent` from the Manifest data. If a cookbook specifier is set in the remote, cookbooks are filtered using this specifier. """ with ProgressBar(message="Downloading Metadata", total=1) as pb: downloader = self.remote.get_downloader(url=urljoin(self.remote.url + "/", "universe")) result = await downloader.run() pb.increment() cookbook_names = self.remote.specifier_cookbook_names() with ProgressBar(message="Parsing Metadata") as pb: universe = Universe(result.path) for entry in universe.read(): if cookbook_names and entry.name not in cookbook_names: continue cookbook = CookbookPackageContent( name=entry.name, version=entry.version, dependencies=entry.dependencies ) artifact = Artifact() da = DeclarativeArtifact( artifact=artifact, url=entry.download_url, relative_path=cookbook.relative_path(), remote=self.remote, deferred_download=not self.download_artifacts, ) dc = DeclarativeContent(content=cookbook, d_artifacts=[da]) pb.increment() await self.put(dc)
async def __call__(self, in_q, out_q): """ Build and emit `DeclarativeContent` from the Manifest data. Args: in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue. out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to """ with ProgressBar(message='Downloading Metadata') as pb: parsed_url = urlparse(self.remote.url) root_dir = os.path.dirname(parsed_url.path) downloader = self.remote.get_downloader(self.remote.url) result = await downloader.run() pb.increment() with ProgressBar(message='Parsing Metadata') as pb: manifest = Manifest(result.path) for entry in manifest.read(): path = os.path.join(root_dir, entry.relative_path) url = urlunparse(parsed_url._replace(path=path)) file = FileContent(relative_path=entry.relative_path, digest=entry.digest) artifact = Artifact(size=entry.size, sha256=entry.digest) da = DeclarativeArtifact(artifact, url, entry.relative_path, self.remote) dc = DeclarativeContent(content=file, d_artifacts=[da]) pb.increment() await out_q.put(dc) await out_q.put(None)
def create_blob(self, blob_data, deferred_download=True): """ Create blob. Args: blob_data (dict): Data about a blob deferred_download (bool): boolean that indicates whether not to download a blob immediatly. Config blob is downloaded regardless of the remote's settings """ digest = blob_data.get("digest") or blob_data.get("blobSum") blob_artifact = Artifact(sha256=digest[len("sha256:"):]) blob = Blob(digest=digest, media_type=blob_data.get("mediaType", MEDIA_TYPE.REGULAR_BLOB)) relative_url = "/v2/{name}/blobs/{digest}".format( name=self.remote.namespaced_upstream_name, digest=digest) blob_url = urljoin(self.remote.url, relative_url) da = DeclarativeArtifact( artifact=blob_artifact, url=blob_url, relative_path=digest, remote=self.remote, deferred_download=deferred_download and self.deferred_download, ) blob_dc = DeclarativeContent(content=blob, d_artifacts=[da]) return blob_dc
def receive_artifact(self, chunk): """Handles assembling of Manifest as it's being uploaded.""" with NamedTemporaryFile("ab") as temp_file: size = 0 hashers = {} for algorithm in Artifact.DIGEST_FIELDS: hashers[algorithm] = getattr(hashlib, algorithm)() while True: subchunk = chunk.read(2000000) if not subchunk: break temp_file.write(subchunk) size += len(subchunk) for algorithm in Artifact.DIGEST_FIELDS: hashers[algorithm].update(subchunk) temp_file.flush() digests = {} for algorithm in Artifact.DIGEST_FIELDS: digests[algorithm] = hashers[algorithm].hexdigest() artifact = Artifact(file=temp_file.name, size=size, **digests) try: artifact.save() except IntegrityError: artifact = Artifact.objects.get(sha256=artifact.sha256) return artifact
async def run(self): """ Parse PackageIndex content units. Ensure, that an uncompressed artifact is available. """ with ProgressReport(message="Update PackageIndex units", code="update.packageindex") as pb: async for d_content in self.items(): if isinstance(d_content.content, PackageIndex): if not d_content.d_artifacts: raise NoPackageIndexFile() content = d_content.content if not [ da for da in d_content.d_artifacts if da.artifact.sha256 == content.sha256 ]: # No main_artifact found uncompress one filename = _uncompress_artifact(d_content.d_artifacts) da = DeclarativeArtifact( Artifact(sha256=content.sha256), filename, content.relative_path, d_content.d_artifacts[0].remote, ) d_content.d_artifacts.append(da) await da.download() da.artifact.save() log.info( "*** Expected: {} *** Uncompressed: {} ***".format( content.sha256, da.artifact.sha256)) pb.increment() await self.put(d_content)
def create_blob(self, man_dc, blob_data): """ Create blob. Args: man_dc (pulpcore.plugin.stages.DeclarativeContent): dc for a ImageManifest blob_data (dict): Data about a blob """ digest = blob_data.get("digest") or blob_data.get("blobSum") blob_artifact = Artifact(sha256=digest[len("sha256:"):]) blob = Blob(digest=digest, media_type=blob_data.get("mediaType", MEDIA_TYPE.REGULAR_BLOB)) relative_url = "/v2/{name}/blobs/{digest}".format( name=self.remote.namespaced_upstream_name, digest=digest) blob_url = urljoin(self.remote.url, relative_url) da = DeclarativeArtifact( artifact=blob_artifact, url=blob_url, relative_path=digest, remote=self.remote, deferred_download=self.deferred_download, ) blob_dc = DeclarativeContent(content=blob, d_artifacts=[da]) return blob_dc
def create_pending_tag(self, tag_name): """ Create `DeclarativeContent` for each tag. Each dc contains enough information to be dowloaded by an ArtifactDownload Stage. Args: tag_name (str): Name of each tag Returns: pulpcore.plugin.stages.DeclarativeContent: A Tag DeclarativeContent object """ relative_url = '/v2/{name}/manifests/{tag}'.format( name=self.remote.namespaced_upstream_name, tag=tag_name, ) url = urljoin(self.remote.url, relative_url) tag = Tag(name=tag_name) manifest_artifact = Artifact() da = DeclarativeArtifact(artifact=manifest_artifact, url=url, relative_path=tag_name, remote=self.remote, extra_data={'headers': V2_ACCEPT_HEADERS}) tag_dc = DeclarativeContent(content=tag, d_artifacts=[da]) return tag_dc
def _build_additions(self): """ Generate the content to be added. This makes a second pass through the manifest. While it does not matter a lot for this plugin specifically, many plugins cannot hold the entire index of remote content in memory at once. They must reduce that to only the natural keys, decide which to retrieve (self.keys_to_add in our case), and then re-iterate the index to access each full entry one at a time. Returns: generator: A generator of content to be added. """ parsed_url = urlparse(self._importer.feed_url) root_dir = os.path.dirname(parsed_url.path) for entry in self._manifest.read(): # Determine if this is an entry we decided to add. key = Key(path=entry.path, digest=entry.digest) if key not in self._keys_to_add: continue # Instantiate the content and artifact based on the manifest entry. path = os.path.join(root_dir, entry.path) url = urlunparse(parsed_url._replace(path=path)) file = FileContent(path=entry.path, digest=entry.digest) artifact = Artifact(size=entry.size, sha256=entry.digest) # Now that we know what we want to add, hand it to "core" with the API objects. content = PendingContent( file, artifacts={PendingArtifact(artifact, url, entry.path)}) yield content
async def create_pending_blob(self, man_dc, blob_data, out_q): """ Create a pending blob from a layer in the ImageManifest. Args: man_dc (pulpcore.plugin.stages.DeclarativeContent): dc for an ImageManifest blob_data (dict): Data about a single new blob. out_q (asyncio.Queue): Queue to put created blob dcs. """ digest = blob_data['digest'] blob_artifact = Artifact(sha256=digest[len("sha256:"):]) blob = ManifestBlob( digest=digest, media_type=blob_data['mediaType'], ) relative_url = '/v2/{name}/blobs/{digest}'.format( name=self.remote.namespaced_upstream_name, digest=blob_data['digest'], ) blob_url = urljoin(self.remote.url, relative_url) da = DeclarativeArtifact(artifact=blob_artifact, url=blob_url, relative_path=blob_data['digest'], remote=self.remote, extra_data={'headers': V2_ACCEPT_HEADERS}) blob_dc = DeclarativeContent( content=blob, d_artifacts=[da], ) return blob_dc
async def create_pending_manifest(self, list_dc, manifest_data, out_q): """ Create a pending manifest from manifest data in a ManifestList. Args: list_dc (pulpcore.plugin.stages.DeclarativeContent): dc for a ManifestList manifest_data (dict): Data about a single new ImageManifest. out_q (asyncio.Queue): Queue to put created ImageManifest dcs. """ digest = manifest_data['digest'] relative_url = '/v2/{name}/manifests/{digest}'.format( name=self.remote.namespaced_upstream_name, digest=digest) manifest_url = urljoin(self.remote.url, relative_url) manifest_artifact = Artifact(sha256=digest[len("sha256:"):]) da = DeclarativeArtifact(artifact=manifest_artifact, url=manifest_url, relative_path=digest, remote=self.remote, extra_data={'headers': V2_ACCEPT_HEADERS}) manifest = ImageManifest( digest=manifest_data['digest'], schema_version=2, media_type=manifest_data['mediaType'], ) man_dc = DeclarativeContent(content=manifest, d_artifacts=[da], extra_data={'relation': list_dc}) await out_q.put(man_dc)
async def download(self): """ Download content and update the associated Artifact. Returns: Returns the :class:`~pulpcore.plugin.download.DownloadResult` of the Artifact. """ expected_digests = {} validation_kwargs = {} for digest_name in self.artifact.DIGEST_FIELDS: digest_value = getattr(self.artifact, digest_name) if digest_value: expected_digests[digest_name] = digest_value if expected_digests: validation_kwargs['expected_digests'] = expected_digests if self.artifact.size: expected_size = self.artifact.size validation_kwargs['expected_size'] = expected_size downloader = self.remote.get_downloader(url=self.url, **validation_kwargs) # Custom downloaders may need extra information to complete the request. download_result = await downloader.run(extra_data=self.extra_data) self.artifact = Artifact(**download_result.artifact_attributes, file=download_result.path) return download_result
async def run(self): """ Build and emit `DeclarativeContent` from the ansible metadata. """ with ProgressReport(message="Parsing Collection Metadata", code="parsing.metadata") as pb: async for metadata in self._fetch_collections(): url = metadata["download_url"] collection_version = CollectionVersion( namespace=metadata["namespace"]["name"], name=metadata["collection"]["name"], version=metadata["version"], ) artifact = metadata["artifact"] d_artifact = DeclarativeArtifact( artifact=Artifact(sha256=artifact["sha256"], size=artifact["size"]), url=url, relative_path=collection_version.relative_path, remote=self.remote, deferred_download=self.deferred_download, ) d_content = DeclarativeContent(content=collection_version, d_artifacts=[d_artifact]) pb.increment() await self.put(d_content)
async def run(self): """ Build and emit `DeclarativeContent` from the ansible metadata. """ async with ProgressReport( message="Parsing Role Metadata", code="sync.parsing.metadata" ) as pb: async for metadata in self._fetch_roles(): for version in metadata["summary_fields"]["versions"]: url = GITHUB_URL % ( metadata["github_user"], metadata["github_repo"], version["name"], ) role = Role( version=version["name"], name=metadata["name"], namespace=metadata["namespace"], ) relative_path = "%s/%s/%s.tar.gz" % ( metadata["namespace"], metadata["name"], version["name"], ) d_artifact = DeclarativeArtifact( artifact=Artifact(), url=url, relative_path=relative_path, remote=self.remote, deferred_download=self.deferred_download, ) d_content = DeclarativeContent(content=role, d_artifacts=[d_artifact]) await pb.aincrement() await self.put(d_content)
async def run(self): """ Build and emit `DeclarativeContent` from the Spec data. """ # Interpret policy to download Artifacts or not deferred_download = self.remote.policy != Remote.IMMEDIATE with ProgressReport(message="Downloading Metadata") as progress: parsed_url = urlparse(self.remote.url) root_dir = parsed_url.path specs_path = os.path.join(root_dir, "specs.4.8.gz") specs_url = urlunparse(parsed_url._replace(path=specs_path)) downloader = self.remote.get_downloader(url=specs_url) result = await downloader.run() progress.increment() with ProgressReport(message="Parsing Metadata") as progress: for key in read_specs(result.path): relative_path = os.path.join( "gems", key.name + "-" + key.version + ".gem") path = os.path.join(root_dir, relative_path) url = urlunparse(parsed_url._replace(path=path)) spec_relative_path = os.path.join( "quick/Marshal.4.8", key.name + "-" + key.version + ".gemspec.rz") spec_path = os.path.join(root_dir, spec_relative_path) spec_url = urlunparse(parsed_url._replace(path=spec_path)) gem = GemContent(name=key.name, version=key.version) da_gem = DeclarativeArtifact( artifact=Artifact(), url=url, relative_path=relative_path, remote=self.remote, deferred_download=deferred_download, ) da_spec = DeclarativeArtifact( artifact=Artifact(), url=spec_url, relative_path=spec_relative_path, remote=self.remote, deferred_download=deferred_download, ) dc = DeclarativeContent(content=gem, d_artifacts=[da_gem, da_spec]) progress.increment() await self.put(dc)
async def _read_package_index(self, package_index): """ Parse a package index file of apt Repositories. Put DeclarativeContent in the queue accordingly. Args: package_index: file object containing package paragraphs """ # Interpret policy to download Artifacts or not deferred_download = self.remote.policy != Remote.IMMEDIATE for package_paragraph in deb822.Packages.iter_paragraphs(package_index): try: package_relpath = package_paragraph["Filename"] package_sha256 = package_paragraph["sha256"] if package_relpath.endswith(".deb"): package_class = Package package_serializer_class = PackageSerializer elif package_relpath.endswith(".udeb"): package_class = InstallerPackage package_serializer_class = InstallerPackageSerializer try: package_content_unit = package_class.objects.get( sha256=package_sha256 ) except ObjectDoesNotExist: log.debug( "Downloading package {}".format(package_paragraph["Package"]) ) package_dict = package_class.from822(package_paragraph) package_dict["relative_path"] = package_relpath package_dict["sha256"] = package_sha256 package_serializer = package_serializer_class( data=package_dict, partial=True ) package_serializer.is_valid(raise_exception=True) package_content_unit = package_class( **package_serializer.validated_data ) package_path = os.path.join(self.parsed_url.path, package_relpath) package_artifact = Artifact(**_get_checksums(package_paragraph)) package_da = DeclarativeArtifact( artifact=package_artifact, url=urlunparse(self.parsed_url._replace(path=package_path)), relative_path=package_relpath, remote=self.remote, deferred_download=deferred_download, ) package_dc = DeclarativeContent( content=package_content_unit, d_artifacts=[package_da] ) yield package_dc except KeyError: log.warning( "Ignoring invalid package paragraph. {}".format(package_paragraph) )
async def __call__(self, in_q, out_q): """ Build and emit `DeclarativeContent` from the remote metadata. Fetch and parse the remote metadata, use the Project Specifiers on the Remote to determine which Python packages should be synced. Args: in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue. out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to. """ ps = ProjectSpecifier.objects.filter(remote=self.remote) with ProgressBar(message='Fetching Project Metadata') as pb: # Group multiple specifiers to the same project together, so that we only have to fetch # the metadata once, and can re-use it if there are multiple specifiers. for name, project_specifiers in groupby_unsorted( ps, key=lambda x: x.name): # Fetch the metadata from PyPI pb.increment() try: metadata = await self.get_project_metadata(name) except ClientResponseError as e: # Project doesn't exist, log a message and move on log.info( _("HTTP 404 'Not Found' for url '{url}'\n" "Does project '{name}' exist on the remote repository?" ).format(url=e.request_info.url, name=name)) continue project_specifiers = list(project_specifiers) # Determine which packages from the project match the criteria in the specifiers packages = await self.get_relevant_packages( metadata=metadata, includes=[ specifier for specifier in project_specifiers if not specifier.exclude ], excludes=[ specifier for specifier in project_specifiers if specifier.exclude ], prereleases=self.remote.prereleases) # For each package, create Declarative objects to pass into the next stage for entry in packages: url = entry.pop('url') artifact = Artifact(sha256=entry.pop('sha256_digest')) package = PythonPackageContent(**entry) da = DeclarativeArtifact(artifact, url, entry['filename'], self.remote) dc = DeclarativeContent(content=package, d_artifacts=[da]) await out_q.put(dc) await out_q.put(None)
async def create_artifact(self, pulp2_storage_path, expected_digests={}, expected_size=None, downloaded=True): """ Create a hard link if possible and then create an Artifact. If it's not possible to create a hard link, file is copied to the Pulp 3 storage. For non-downloaded content, artifact with its expected checksum and size is created. """ if not downloaded: if not expected_digests: raise ValueError( _('No digest is provided for on_demand content creation. Pulp 2 ' 'storage path: {}'.format(pulp2_storage_path))) artifact = Artifact(**expected_digests) artifact.size = expected_size return artifact artifact = Artifact.init_and_validate( pulp2_storage_path, expected_digests=expected_digests, expected_size=expected_size) pulp3_storage_relative_path = storage.get_artifact_path( artifact.sha256) pulp3_storage_path = os.path.join(settings.MEDIA_ROOT, pulp3_storage_relative_path) os.makedirs(os.path.dirname(pulp3_storage_path), exist_ok=True) is_copied = False try: os.link(pulp2_storage_path, pulp3_storage_path) except FileExistsError: pass except OSError: _logger.debug( _('Hard link cannot be created, file will be copied.')) shutil.copy2(pulp2_storage_path, pulp3_storage_path) is_copied = True if not expected_digests: expected_digests = {'sha256': artifact.sha256} if is_copied: # recalculate checksums to ensure that after being copied a file is still fine artifact = Artifact.init_and_validate( file=pulp3_storage_path, expected_digests=expected_digests, expected_size=expected_size) else: # a hard link has been created or a file has already been in the pulp 3 storage, so # artifact's path can be just updated and no checksum recalculation is needed. artifact.file = pulp3_storage_path return artifact
def _save_artifact_blocking(artifact_attributes): saved_artifact = Artifact(**artifact_attributes) try: saved_artifact.save() except IntegrityError: del artifact_attributes["file"] saved_artifact = Artifact.objects.get(**artifact_attributes) saved_artifact.touch() return saved_artifact