Exemple #1
0
    async def parse_distribution_tree(self):
        """Parse content from the file treeinfo if present."""
        if self.treeinfo:
            d_artifacts = [
                DeclarativeArtifact(
                    artifact=Artifact(),
                    url=urljoin(self.data.remote_url,
                                self.treeinfo["filename"]),
                    relative_path=".treeinfo",
                    remote=self.remote,
                    deferred_download=False,
                )
            ]
            for path, checksum in self.treeinfo["download"]["images"].items():
                artifact = Artifact(**checksum)
                da = DeclarativeArtifact(
                    artifact=artifact,
                    url=urljoin(self.data.remote_url, path),
                    relative_path=path,
                    remote=self.remote,
                    deferred_download=self.deferred_download,
                )
                d_artifacts.append(da)

            distribution_tree = DistributionTree(
                **self.treeinfo["distribution_tree"])
            dc = DeclarativeContent(content=distribution_tree,
                                    d_artifacts=d_artifacts)
            dc.extra_data = self.treeinfo
            await self.put(dc)
    def test_content_associated_using_repo_key(self):
        stage = QueryExistingRepoContentAndArtifacts(
            new_version=self.new_version_all_content())

        # c1: Existing content unit with Artifact
        c1 = CookbookPackageContent(name="c1",
                                    version="1.0.0",
                                    dependencies={})
        # c2: content unit does not exist in DB
        c2 = CookbookPackageContent(name="c2",
                                    version="1.0.0",
                                    dependencies={})
        # c3: content unit does exist, has a content_artifact association,
        # but no artifact (i.e. is a non-immediate content unit)
        c3 = CookbookPackageContent(name="c3",
                                    version="1.0.0",
                                    dependencies={})

        d_c1_d_a1 = DeclarativeArtifact(
            artifact=Artifact(),
            url="http://a1",
            relative_path=c1.relative_path(),
            remote=self.remote,
        )
        d_c2_d_a2 = DeclarativeArtifact(
            artifact=Artifact(),
            url="http://a2",
            relative_path=c2.relative_path(),
            remote=self.remote,
        )
        d_c3_d_a3 = DeclarativeArtifact(
            artifact=Artifact(),
            url="http://a3",
            relative_path=c3.relative_path(),
            remote=self.remote,
        )

        batch = [
            DeclarativeContent(content=c1, d_artifacts=[d_c1_d_a1]),
            DeclarativeContent(content=c2, d_artifacts=[d_c2_d_a2]),
            DeclarativeContent(content=c3, d_artifacts=[d_c3_d_a3]),
        ]

        stage._process_batch(batch)

        self.assertEqual(batch[0].content.content_id, "1")
        self.assertEqual(batch[0].content.pk, self.c1.pk)
        self.assertEqual(batch[0].d_artifacts[0].artifact.pk, self.a1.pk)

        self.assertIsNone(batch[1].content.pk)
        self.assertTrue(batch[1].d_artifacts[0].artifact._state.adding)

        self.assertEqual(batch[2].content.pk, self.c3.pk)
        self.assertTrue(batch[2].d_artifacts[0].artifact._state.adding)
Exemple #3
0
 async def run(self):
     """
     Build and emit `DeclarativeContent` from the ansible metadata.
     """
     with ProgressBar(message='Parsing Role Metadata') as pb:
         async for metadata in self._fetch_roles():
             for version in metadata['summary_fields']['versions']:
                 url = GITHUB_URL % (
                     metadata['github_user'],
                     metadata['github_repo'],
                     version['name'],
                 )
                 role = Role(version=version['name'],
                             name=metadata['name'],
                             namespace=metadata['namespace'])
                 relative_path = "%s/%s/%s.tar.gz" % (
                     metadata['namespace'],
                     metadata['name'],
                     version['name'],
                 )
                 d_artifact = DeclarativeArtifact(
                     artifact=Artifact(),
                     url=url,
                     relative_path=relative_path,
                     remote=self.remote,
                     deferred_download=self.deferred_download,
                 )
                 d_content = DeclarativeContent(
                     content=role,
                     d_artifacts=[d_artifact],
                 )
                 pb.increment()
                 await self.put(d_content)
    async def run(self):
        """
        Build and emit `DeclarativeContent` from the Manifest data.

        Args:
            in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue.
            out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to

        """
        downloader = self.remote.get_downloader(url=self.remote.url)
        result = await downloader.run()
        # Use ProgressReport to report progress
        data = self.get_json_data(result.path)
        package = Package(name=data["name"], version=data["version"])
        artifact = Artifact()  # make Artifact in memory-only
        url = data["dist"]["tarball"]
        da = DeclarativeArtifact(
            artifact,
            url,
            url.split("/")[-1],
            self.remote,
            deferred_download=self.deferred_download,
        )
        dc = DeclarativeContent(content=package, d_artifacts=[da])
        await self.put(dc)
Exemple #5
0
    def create_manifest(self, list_dc, manifest_data):
        """
        Create an Image Manifest from manifest data in a ManifestList.

        Args:
            list_dc (pulpcore.plugin.stages.DeclarativeContent): dc for a ManifestList
            manifest_data (dict): Data about a single new ImageManifest.
        """
        digest = manifest_data['digest']
        relative_url = '/v2/{name}/manifests/{digest}'.format(
            name=self.remote.namespaced_upstream_name, digest=digest)
        manifest_url = urljoin(self.remote.url, relative_url)
        da = DeclarativeArtifact(artifact=Artifact(),
                                 url=manifest_url,
                                 relative_path=digest,
                                 remote=self.remote,
                                 extra_data={'headers': V2_ACCEPT_HEADERS})
        manifest = Manifest(
            digest=manifest_data['digest'],
            schema_version=2
            if manifest_data['mediaType'] == MEDIA_TYPE.MANIFEST_V2 else 1,
            media_type=manifest_data['mediaType'],
        )
        man_dc = DeclarativeContent(
            content=manifest,
            d_artifacts=[da],
            extra_data={'relation': list_dc},
            does_batch=False,
        )
        return man_dc
    def create_blob(self, man_dc, blob_data):
        """
        Create blob.

        Args:
            man_dc (pulpcore.plugin.stages.DeclarativeContent): dc for a ImageManifest
            blob_data (dict): Data about a blob

        """
        digest = blob_data.get("digest") or blob_data.get("blobSum")
        blob_artifact = Artifact(sha256=digest[len("sha256:"):])
        blob = Blob(digest=digest,
                    media_type=blob_data.get("mediaType",
                                             MEDIA_TYPE.REGULAR_BLOB))
        relative_url = "/v2/{name}/blobs/{digest}".format(
            name=self.remote.namespaced_upstream_name, digest=digest)
        blob_url = urljoin(self.remote.url, relative_url)
        da = DeclarativeArtifact(
            artifact=blob_artifact,
            url=blob_url,
            relative_path=digest,
            remote=self.remote,
            deferred_download=self.deferred_download,
        )
        blob_dc = DeclarativeContent(content=blob, d_artifacts=[da])

        return blob_dc
Exemple #7
0
    def create_blob(self, man_dc, blob_data):
        """
        Create blob.

        Args:
            man_dc (pulpcore.plugin.stages.DeclarativeContent): dc for a ImageManifest
            blob_data (dict): Data about a blob

        """
        digest = blob_data.get('digest') or blob_data.get('blobSum')
        blob_artifact = Artifact(sha256=digest[len("sha256:"):])
        blob = Blob(
            digest=digest,
            media_type=blob_data.get('mediaType', MEDIA_TYPE.REGULAR_BLOB),
        )
        relative_url = '/v2/{name}/blobs/{digest}'.format(
            name=self.remote.namespaced_upstream_name,
            digest=digest,
        )
        blob_url = urljoin(self.remote.url, relative_url)
        da = DeclarativeArtifact(
            artifact=blob_artifact,
            url=blob_url,
            relative_path=digest,
            remote=self.remote,
            extra_data={'headers': V2_ACCEPT_HEADERS},
            deferred_download=self.deferred_download
        )
        blob_dc = DeclarativeContent(
            content=blob,
            d_artifacts=[da],
        )

        return blob_dc
Exemple #8
0
 async def _add_role_versions(self, role_future, metadata):
     role = await role_future
     for version in metadata['summary_fields']['versions']:
         url = GITHUB_URL % (
             metadata['github_user'],
             metadata['github_repo'],
             version['name'],
         )
         role_version = AnsibleRoleVersion(version=version['name'],
                                           role=role)
         relative_path = "%s/%s/%s.tar.gz" % (
             metadata['namespace'],
             metadata['name'],
             version['name'],
         )
         d_artifact = DeclarativeArtifact(
             artifact=Artifact(),
             url=url,
             relative_path=relative_path,
             remote=self.remote,
             deferred_download=self.deferred_download,
         )
         d_content = DeclarativeContent(
             content=role_version,
             d_artifacts=[d_artifact],
         )
         await self.put(d_content)
Exemple #9
0
    async def create_pending_manifest(self, list_dc, manifest_data, out_q):
        """
        Create a pending manifest from manifest data in a ManifestList.

        Args:
            list_dc (pulpcore.plugin.stages.DeclarativeContent): dc for a ManifestList
            manifest_data (dict): Data about a single new ImageManifest.
            out_q (asyncio.Queue): Queue to put created ImageManifest dcs.
        """
        digest = manifest_data['digest']
        relative_url = '/v2/{name}/manifests/{digest}'.format(
            name=self.remote.namespaced_upstream_name, digest=digest)
        manifest_url = urljoin(self.remote.url, relative_url)
        manifest_artifact = Artifact(sha256=digest[len("sha256:"):])
        da = DeclarativeArtifact(artifact=manifest_artifact,
                                 url=manifest_url,
                                 relative_path=digest,
                                 remote=self.remote,
                                 extra_data={'headers': V2_ACCEPT_HEADERS})
        manifest = ImageManifest(
            digest=manifest_data['digest'],
            schema_version=2,
            media_type=manifest_data['mediaType'],
        )
        man_dc = DeclarativeContent(content=manifest,
                                    d_artifacts=[da],
                                    extra_data={'relation': list_dc})
        await out_q.put(man_dc)
Exemple #10
0
    async def create_and_process_tagged_manifest_list(self, tag_dc,
                                                      manifest_list_data):
        """
        Create a ManifestList and nested ImageManifests from the Tag artifact.

        Args:
            tag_dc (pulpcore.plugin.stages.DeclarativeContent): dc for a Tag
            manifest_list_data (dict): Data about a ManifestList
        """
        tag_dc.content = ManifestListTag(name=tag_dc.content.name)
        digest = "sha256:{digest}".format(
            digest=tag_dc.d_artifacts[0].artifact.sha256)
        relative_url = '/v2/{name}/manifests/{digest}'.format(
            name=self.remote.namespaced_upstream_name,
            digest=digest,
        )
        url = urljoin(self.remote.url, relative_url)
        manifest_list = ManifestList(
            digest=digest,
            schema_version=manifest_list_data['schemaVersion'],
            media_type=manifest_list_data['mediaType'],
        )
        da = DeclarativeArtifact(artifact=tag_dc.d_artifacts[0].artifact,
                                 url=url,
                                 relative_path=digest,
                                 remote=self.remote,
                                 extra_data={'headers': V2_ACCEPT_HEADERS})
        list_dc = DeclarativeContent(content=manifest_list, d_artifacts=[da])
        for manifest in manifest_list_data.get('manifests'):
            await self.create_pending_manifest(list_dc, manifest)
        list_dc.extra_data['relation'] = tag_dc
        list_dc.extra_data['processed'] = True
        tag_dc.extra_data['processed'] = True
        await self.put(list_dc)
Exemple #11
0
    def _parse_modulemd_default_names(self, modulemd_index):
        modulemd_default_names = parse_defaults(modulemd_index)

        # Parsing module-defaults happens all at one time, and from here on no useful
        # work happens. So just report that it finished this stage.
        modulemd_defaults_pb_data = {
            "message": "Parsed Modulemd-defaults",
            "code": "parsing.modulemd_defaults",
        }
        with ProgressReport(
                **modulemd_defaults_pb_data) as modulemd_defaults_pb:
            modulemd_defaults_total = len(modulemd_default_names)
            modulemd_defaults_pb.total = modulemd_defaults_total
            modulemd_defaults_pb.done = modulemd_defaults_total

        for default in modulemd_default_names:
            artifact = default.pop("artifact")
            relative_path = "{}{}snippet".format(
                default[PULP_MODULEDEFAULTS_ATTR.MODULE],
                default[PULP_MODULEDEFAULTS_ATTR.STREAM])
            da = DeclarativeArtifact(artifact=artifact,
                                     relative_path=relative_path,
                                     url=self.data.modules_url)
            default_content = ModulemdDefaults(**default)
            self.default_content_dcs.append(
                DeclarativeContent(content=default_content, d_artifacts=[da]))

        # delete list now that we're done with it for memory savings
        del modulemd_default_names
Exemple #12
0
    def create_tag(self, saved_artifact, url):
        """
        Create `DeclarativeContent` for each tag.

        Each dc contains enough information to be dowloaded by an ArtifactDownload Stage.

        Args:
            tag_name (str): Name of each tag

        Returns:
            pulpcore.plugin.stages.DeclarativeContent: A Tag DeclarativeContent object

        """
        tag_name = url.split('/')[-1]
        relative_url = '/v2/{name}/manifests/{tag}'.format(
            name=self.remote.namespaced_upstream_name,
            tag=tag_name,
        )
        url = urljoin(self.remote.url, relative_url)
        tag = Tag(name=tag_name)
        da = DeclarativeArtifact(
            artifact=saved_artifact,
            url=url,
            relative_path=tag_name,
            remote=self.remote,
            extra_data={'headers': V2_ACCEPT_HEADERS}
        )
        tag_dc = DeclarativeContent(content=tag, d_artifacts=[da])
        return tag_dc
Exemple #13
0
    async def run(self):
        """
        Parse PackageIndex content units.

        Ensure, that an uncompressed artifact is available.
        """
        with ProgressReport(message="Update PackageIndex units",
                            code="update.packageindex") as pb:
            async for d_content in self.items():
                if isinstance(d_content.content, PackageIndex):
                    if not d_content.d_artifacts:
                        raise NoPackageIndexFile()

                    content = d_content.content
                    if not [
                            da for da in d_content.d_artifacts
                            if da.artifact.sha256 == content.sha256
                    ]:
                        # No main_artifact found uncompress one
                        filename = _uncompress_artifact(d_content.d_artifacts)
                        da = DeclarativeArtifact(
                            Artifact(sha256=content.sha256),
                            filename,
                            content.relative_path,
                            d_content.d_artifacts[0].remote,
                        )
                        d_content.d_artifacts.append(da)
                        await da.download()
                        da.artifact.save()
                        log.info(
                            "*** Expected: {} *** Uncompressed: {} ***".format(
                                content.sha256, da.artifact.sha256))

                    pb.increment()
                await self.put(d_content)
Exemple #14
0
    async def run(self):
        """
        Build and emit `DeclarativeContent` from the Manifest data.
        """
        deferred_download = (self.remote.policy != Remote.IMMEDIATE)  # Interpret download policy
        with ProgressBar(message='Downloading Metadata') as pb:
            parsed_url = urlparse(self.remote.url)
            root_dir = os.path.dirname(parsed_url.path)
            downloader = self.remote.get_downloader(url=self.remote.url)
            result = await downloader.run()
            pb.increment()

        with ProgressBar(message='Parsing Metadata') as pb:
            manifest = Manifest(result.path)
            for entry in manifest.read():
                path = os.path.join(root_dir, entry.relative_path)
                url = urlunparse(parsed_url._replace(path=path))
                file = FileContent(relative_path=entry.relative_path, digest=entry.digest)
                artifact = Artifact(size=entry.size, sha256=entry.digest)
                da = DeclarativeArtifact(
                    artifact=artifact,
                    url=url,
                    relative_path=entry.relative_path,
                    remote=self.remote,
                    deferred_download=deferred_download,
                )
                dc = DeclarativeContent(content=file, d_artifacts=[da])
                pb.increment()
                await self.put(dc)
Exemple #15
0
    async def __call__(self, in_q, out_q):
        """
        Build and emit `DeclarativeContent` from the Manifest data.

        Args:
            in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue.
            out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to
        """
        with ProgressBar(message='Downloading Metadata') as pb:
            parsed_url = urlparse(self.remote.url)
            root_dir = os.path.dirname(parsed_url.path)
            downloader = self.remote.get_downloader(self.remote.url)
            result = await downloader.run()
            pb.increment()

        with ProgressBar(message='Parsing Metadata') as pb:
            manifest = Manifest(result.path)
            for entry in manifest.read():
                path = os.path.join(root_dir, entry.relative_path)
                url = urlunparse(parsed_url._replace(path=path))
                file = FileContent(relative_path=entry.relative_path,
                                   digest=entry.digest)
                artifact = Artifact(size=entry.size, sha256=entry.digest)
                da = DeclarativeArtifact(artifact, url, entry.relative_path,
                                         self.remote)
                dc = DeclarativeContent(content=file, d_artifacts=[da])
                pb.increment()
                await out_q.put(dc)
        await out_q.put(None)
Exemple #16
0
    async def run(self):
        """
        Build and emit `DeclarativeContent` from the ansible metadata.
        """
        with ProgressReport(message="Parsing Collection Metadata",
                            code="parsing.metadata") as pb:
            async for metadata in self._fetch_collections():

                url = metadata["download_url"]

                collection_version = CollectionVersion(
                    namespace=metadata["namespace"]["name"],
                    name=metadata["collection"]["name"],
                    version=metadata["version"],
                )

                artifact = metadata["artifact"]

                d_artifact = DeclarativeArtifact(
                    artifact=Artifact(sha256=artifact["sha256"],
                                      size=artifact["size"]),
                    url=url,
                    relative_path=collection_version.relative_path,
                    remote=self.remote,
                    deferred_download=self.deferred_download,
                )
                d_content = DeclarativeContent(content=collection_version,
                                               d_artifacts=[d_artifact])
                pb.increment()
                await self.put(d_content)
Exemple #17
0
 async def run(self):
     """
     Build and emit `DeclarativeContent` from the ansible metadata.
     """
     async with ProgressReport(
         message="Parsing Role Metadata", code="sync.parsing.metadata"
     ) as pb:
         async for metadata in self._fetch_roles():
             for version in metadata["summary_fields"]["versions"]:
                 url = GITHUB_URL % (
                     metadata["github_user"],
                     metadata["github_repo"],
                     version["name"],
                 )
                 role = Role(
                     version=version["name"],
                     name=metadata["name"],
                     namespace=metadata["namespace"],
                 )
                 relative_path = "%s/%s/%s.tar.gz" % (
                     metadata["namespace"],
                     metadata["name"],
                     version["name"],
                 )
                 d_artifact = DeclarativeArtifact(
                     artifact=Artifact(),
                     url=url,
                     relative_path=relative_path,
                     remote=self.remote,
                     deferred_download=self.deferred_download,
                 )
                 d_content = DeclarativeContent(content=role, d_artifacts=[d_artifact])
                 await pb.aincrement()
                 await self.put(d_content)
Exemple #18
0
    def create_blob(self, blob_data, deferred_download=True):
        """
        Create blob.

        Args:
            blob_data (dict): Data about a blob
            deferred_download (bool): boolean that indicates whether not to download a blob
                immediatly. Config blob is downloaded regardless of the remote's settings

        """
        digest = blob_data.get("digest") or blob_data.get("blobSum")
        blob_artifact = Artifact(sha256=digest[len("sha256:"):])
        blob = Blob(digest=digest,
                    media_type=blob_data.get("mediaType",
                                             MEDIA_TYPE.REGULAR_BLOB))
        relative_url = "/v2/{name}/blobs/{digest}".format(
            name=self.remote.namespaced_upstream_name, digest=digest)
        blob_url = urljoin(self.remote.url, relative_url)
        da = DeclarativeArtifact(
            artifact=blob_artifact,
            url=blob_url,
            relative_path=digest,
            remote=self.remote,
            deferred_download=deferred_download and self.deferred_download,
        )
        blob_dc = DeclarativeContent(content=blob, d_artifacts=[da])

        return blob_dc
Exemple #19
0
    async def create_pending_blob(self, man_dc, blob_data, out_q):
        """
        Create a pending blob from a layer in the ImageManifest.

        Args:
            man_dc (pulpcore.plugin.stages.DeclarativeContent): dc for an ImageManifest
            blob_data (dict): Data about a single new blob.
            out_q (asyncio.Queue): Queue to put created blob dcs.

        """
        digest = blob_data['digest']
        blob_artifact = Artifact(sha256=digest[len("sha256:"):])
        blob = ManifestBlob(
            digest=digest,
            media_type=blob_data['mediaType'],
        )
        relative_url = '/v2/{name}/blobs/{digest}'.format(
            name=self.remote.namespaced_upstream_name,
            digest=blob_data['digest'],
        )
        blob_url = urljoin(self.remote.url, relative_url)
        da = DeclarativeArtifact(artifact=blob_artifact,
                                 url=blob_url,
                                 relative_path=blob_data['digest'],
                                 remote=self.remote,
                                 extra_data={'headers': V2_ACCEPT_HEADERS})
        blob_dc = DeclarativeContent(
            content=blob,
            d_artifacts=[da],
        )
        return blob_dc
    async def run(self):
        """
        Build and emit `DeclarativeContent` from the Manifest data.

        Args:
            in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue.
            out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to

        """
        with ProgressBar(message='Downloading Metadata') as pb:
            parsed_url = urlparse(self.remote.url)
            root_dir = os.path.dirname(parsed_url.path)
            downloader = self.remote.get_downloader(url=self.remote.url)
            result = await downloader.run()
            pb.increment()

        with ProgressBar(message='Parsing Metadata') as pb:
            for entry in self.read_my_metadata_file_somehow(result.path):
                path = os.path.join(root_dir, entry['picture'])
                url = urlunparse(parsed_url._replace(path=path))
                unit = Animal(**entry)  # make the content unit in memory-only
                artifact = Artifact()  # make Artifact in memory-only
                da =  DeclarativeArtifact(artifact, url, entry['picture'], self.remote)
                dc = DeclarativeContent(content=unit, d_artifacts=[da])
                pb.increment()
                await self.put(dc)
Exemple #21
0
    async def _pre_save(self, batch):
        """
        Relate manifest to tag before saving tag.
        We need to do it in the pre_save hook because of Tag's uniqueness constraint.

        Args:
            batch (list of :class:`~pulpcore.plugin.stages.DeclarativeContent`): The batch of
                :class:`~pulpcore.plugin.stages.DeclarativeContent` objects to be saved.

        """
        for dc in batch:
            if type(dc.content) == Tag:
                related_man_id = dc.extra_data.get('tag_rel')
                # find manifest by id
                # We are relying on the order of the processed DC
                # Manifests should have passed through ContentSaver stage already
                man = Manifest.objects.filter(digest=related_man_id).first()
                artifact = man._artifacts.get()
                # add manifest's artifact
                da = DeclarativeArtifact(
                    artifact=artifact,
                    url=NOT_USED,
                    relative_path=dc.content.name,
                    remote=NOT_USED,
                    deferred_download=False)
                dc.d_artifacts.append(da)
                dc.content.tagged_manifest = man
Exemple #22
0
    def create_tagged_manifest_list(self, tag_dc, manifest_list_data):
        """
        Create a ManifestList.

        Args:
            tag_dc (pulpcore.plugin.stages.DeclarativeContent): dc for a Tag
            manifest_list_data (dict): Data about a ManifestList

        """
        digest = "sha256:{digest}".format(digest=tag_dc.d_artifacts[0].artifact.sha256)
        relative_url = '/v2/{name}/manifests/{digest}'.format(
            name=self.remote.namespaced_upstream_name,
            digest=digest,
        )
        url = urljoin(self.remote.url, relative_url)
        manifest_list = Manifest(
            digest=digest,
            schema_version=manifest_list_data['schemaVersion'],
            media_type=manifest_list_data['mediaType'],
        )
        da = DeclarativeArtifact(
            artifact=tag_dc.d_artifacts[0].artifact,
            url=url,
            relative_path=digest,
            remote=self.remote,
            extra_data={'headers': V2_ACCEPT_HEADERS}
        )
        list_dc = DeclarativeContent(content=manifest_list, d_artifacts=[da])

        return list_dc
    async def run(self):
        """
        Build and emit `DeclarativeContent` from the Manifest data.

        If a cookbook specifier is set in the remote, cookbooks are filtered
        using this specifier.

        """
        with ProgressBar(message="Downloading Metadata", total=1) as pb:
            downloader = self.remote.get_downloader(url=urljoin(self.remote.url + "/", "universe"))
            result = await downloader.run()
            pb.increment()

        cookbook_names = self.remote.specifier_cookbook_names()

        with ProgressBar(message="Parsing Metadata") as pb:
            universe = Universe(result.path)
            for entry in universe.read():
                if cookbook_names and entry.name not in cookbook_names:
                    continue
                cookbook = CookbookPackageContent(
                    name=entry.name, version=entry.version, dependencies=entry.dependencies
                )
                artifact = Artifact()
                da = DeclarativeArtifact(
                    artifact=artifact,
                    url=entry.download_url,
                    relative_path=cookbook.relative_path(),
                    remote=self.remote,
                    deferred_download=not self.download_artifacts,
                )
                dc = DeclarativeContent(content=cookbook, d_artifacts=[da])
                pb.increment()
                await self.put(dc)
Exemple #24
0
    def create_tagged_manifest(self, tag_dc, manifest_data, raw_data):
        """
        Create an Image Manifest.

        Args:
            tag_dc (pulpcore.plugin.stages.DeclarativeContent): dc for a Tag
            manifest_data (dict): Data about a single new ImageManifest.
            raw_data: (str): The raw JSON representation of the ImageManifest.

        """
        media_type = manifest_data.get('mediaType', MEDIA_TYPE.MANIFEST_V1)
        if media_type in (MEDIA_TYPE.MANIFEST_V2, MEDIA_TYPE.MANIFEST_OCI):
            digest = "sha256:{digest}".format(digest=tag_dc.d_artifacts[0].artifact.sha256)
        else:

            digest = self._calculate_digest(raw_data)
        manifest = Manifest(
            digest=digest,
            schema_version=manifest_data['schemaVersion'],
            media_type=media_type
        )
        relative_url = '/v2/{name}/manifests/{digest}'.format(
            name=self.remote.namespaced_upstream_name,
            digest=digest,
        )
        url = urljoin(self.remote.url, relative_url)
        da = DeclarativeArtifact(
            artifact=tag_dc.d_artifacts[0].artifact,
            url=url,
            relative_path=digest,
            remote=self.remote,
            extra_data={'headers': V2_ACCEPT_HEADERS}
        )
        man_dc = DeclarativeContent(content=manifest, d_artifacts=[da])
        return man_dc
Exemple #25
0
    def queue_dc(self, delays=[], artifact_path=None):
        """Put a DeclarativeContent instance into `in_q`

        For each `delay` in `delays`, associate a DeclarativeArtifact
        with download duration `delay` to the content unit. `delay ==
        None` means that the artifact is already present (pk is set)
        and no download is required. `artifact_path != None` means
        that the Artifact already has a file associated with it and a
        download does not need to be scheduled.
        """
        das = []
        for delay in delays:
            artifact = mock.Mock()
            artifact.pk = uuid4()
            artifact._state.adding = delay is not None
            artifact.DIGEST_FIELDS = []
            artifact.file = artifact_path
            remote = mock.Mock()
            remote.get_downloader = DownloaderMock
            das.append(
                DeclarativeArtifact(artifact=artifact,
                                    url=str(delay),
                                    relative_path="path",
                                    remote=remote))
        dc = DeclarativeContent(content=mock.Mock(), d_artifacts=das)
        self.in_q.put_nowait(dc)
Exemple #26
0
    async def run(self):
        """
        Build and emit `DeclarativeContent` from the Spec data.
        """
        # Interpret policy to download Artifacts or not
        deferred_download = self.remote.policy != Remote.IMMEDIATE

        with ProgressReport(message="Downloading Metadata") as progress:
            parsed_url = urlparse(self.remote.url)
            root_dir = parsed_url.path
            specs_path = os.path.join(root_dir, "specs.4.8.gz")
            specs_url = urlunparse(parsed_url._replace(path=specs_path))
            downloader = self.remote.get_downloader(url=specs_url)
            result = await downloader.run()
            progress.increment()

        with ProgressReport(message="Parsing Metadata") as progress:
            for key in read_specs(result.path):
                relative_path = os.path.join(
                    "gems", key.name + "-" + key.version + ".gem")
                path = os.path.join(root_dir, relative_path)
                url = urlunparse(parsed_url._replace(path=path))

                spec_relative_path = os.path.join(
                    "quick/Marshal.4.8",
                    key.name + "-" + key.version + ".gemspec.rz")
                spec_path = os.path.join(root_dir, spec_relative_path)
                spec_url = urlunparse(parsed_url._replace(path=spec_path))
                gem = GemContent(name=key.name, version=key.version)
                da_gem = DeclarativeArtifact(
                    artifact=Artifact(),
                    url=url,
                    relative_path=relative_path,
                    remote=self.remote,
                    deferred_download=deferred_download,
                )
                da_spec = DeclarativeArtifact(
                    artifact=Artifact(),
                    url=spec_url,
                    relative_path=spec_relative_path,
                    remote=self.remote,
                    deferred_download=deferred_download,
                )
                dc = DeclarativeContent(content=gem,
                                        d_artifacts=[da_gem, da_spec])
                progress.increment()
                await self.put(dc)
Exemple #27
0
    async def _read_package_index(self, package_index):
        """
        Parse a package index file of apt Repositories.

        Put DeclarativeContent in the queue accordingly.

        Args:
            package_index: file object containing package paragraphs

        """
        # Interpret policy to download Artifacts or not
        deferred_download = self.remote.policy != Remote.IMMEDIATE

        for package_paragraph in deb822.Packages.iter_paragraphs(package_index):
            try:
                package_relpath = package_paragraph["Filename"]
                package_sha256 = package_paragraph["sha256"]
                if package_relpath.endswith(".deb"):
                    package_class = Package
                    package_serializer_class = PackageSerializer
                elif package_relpath.endswith(".udeb"):
                    package_class = InstallerPackage
                    package_serializer_class = InstallerPackageSerializer
                try:
                    package_content_unit = package_class.objects.get(
                        sha256=package_sha256
                    )
                except ObjectDoesNotExist:
                    log.debug(
                        "Downloading package {}".format(package_paragraph["Package"])
                    )
                    package_dict = package_class.from822(package_paragraph)
                    package_dict["relative_path"] = package_relpath
                    package_dict["sha256"] = package_sha256
                    package_serializer = package_serializer_class(
                        data=package_dict, partial=True
                    )
                    package_serializer.is_valid(raise_exception=True)
                    package_content_unit = package_class(
                        **package_serializer.validated_data
                    )
                package_path = os.path.join(self.parsed_url.path, package_relpath)
                package_artifact = Artifact(**_get_checksums(package_paragraph))
                package_da = DeclarativeArtifact(
                    artifact=package_artifact,
                    url=urlunparse(self.parsed_url._replace(path=package_path)),
                    relative_path=package_relpath,
                    remote=self.remote,
                    deferred_download=deferred_download,
                )
                package_dc = DeclarativeContent(
                    content=package_content_unit, d_artifacts=[package_da]
                )
                yield package_dc
            except KeyError:
                log.warning(
                    "Ignoring invalid package paragraph. {}".format(package_paragraph)
                )
Exemple #28
0
    async def __call__(self, in_q, out_q):
        """
        Build and emit `DeclarativeContent` from the remote metadata.

        Fetch and parse the remote metadata, use the Project Specifiers on the Remote
        to determine which Python packages should be synced.

        Args:
            in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue.
            out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to.

        """
        ps = ProjectSpecifier.objects.filter(remote=self.remote)

        with ProgressBar(message='Fetching Project Metadata') as pb:
            # Group multiple specifiers to the same project together, so that we only have to fetch
            # the metadata once, and can re-use it if there are multiple specifiers.
            for name, project_specifiers in groupby_unsorted(
                    ps, key=lambda x: x.name):
                # Fetch the metadata from PyPI
                pb.increment()
                try:
                    metadata = await self.get_project_metadata(name)
                except ClientResponseError as e:
                    # Project doesn't exist, log a message and move on
                    log.info(
                        _("HTTP 404 'Not Found' for url '{url}'\n"
                          "Does project '{name}' exist on the remote repository?"
                          ).format(url=e.request_info.url, name=name))
                    continue
                project_specifiers = list(project_specifiers)

                # Determine which packages from the project match the criteria in the specifiers
                packages = await self.get_relevant_packages(
                    metadata=metadata,
                    includes=[
                        specifier for specifier in project_specifiers
                        if not specifier.exclude
                    ],
                    excludes=[
                        specifier for specifier in project_specifiers
                        if specifier.exclude
                    ],
                    prereleases=self.remote.prereleases)

                # For each package, create Declarative objects to pass into the next stage
                for entry in packages:
                    url = entry.pop('url')

                    artifact = Artifact(sha256=entry.pop('sha256_digest'))
                    package = PythonPackageContent(**entry)

                    da = DeclarativeArtifact(artifact, url, entry['filename'],
                                             self.remote)
                    dc = DeclarativeContent(content=package, d_artifacts=[da])

                    await out_q.put(dc)
        await out_q.put(None)
Exemple #29
0
 def _create_manifest_declarative_artifact(self, relative_url, saved_artifact, digest):
     url = urljoin(self.remote.url, relative_url)
     da = DeclarativeArtifact(
         artifact=saved_artifact,
         url=url,
         relative_path=digest,
         remote=self.remote,
         extra_data={"headers": V2_ACCEPT_HEADERS},
     )
     return da
    async def migrate_to_pulp3(self, batch, pb=None):
        """
        Docker specific implementation of DeclarativeContent creation for migrating
        docker content to Pulp 3.

        Args:
            batch: A batch of Pulp2Content objects to migrate to Pulp 3
        """

        for pulp2content in batch:
            pulp_2to3_detail_content = pulp2content.detail_model
            pulp3content = pulp_2to3_detail_content.create_pulp3_content()
            future_relations = {'pulp2content': pulp2content}
            # store digests for future pulp3 content relations
            if pulp_2to3_detail_content.type == 'docker_manifest':

                future_relations['blob_rel'] = pulp_2to3_detail_content.blobs
                future_relations[
                    'config_blob_rel'] = pulp_2to3_detail_content.config_blob

            if pulp_2to3_detail_content.type == 'docker_manifest_list':

                future_relations[
                    'man_rel'] = pulp_2to3_detail_content.listed_manifests

            if pulp_2to3_detail_content.type == 'docker_tag':

                future_relations[
                    'tag_rel'] = pulp_2to3_detail_content.tagged_manifest

            if pulp_2to3_detail_content.type == 'docker_tag':
                # dc without artifact, will assign arifact in the _pre_save hook
                dc = DeclarativeContent(content=pulp3content)
            else:
                artifact = await self.create_artifact(
                    pulp2content.pulp2_storage_path,
                    pulp_2to3_detail_content.expected_digests,
                    pulp_2to3_detail_content.expected_size)
                da = DeclarativeArtifact(
                    artifact=artifact,
                    url=NOT_USED,
                    relative_path=pulp_2to3_detail_content.
                    relative_path_for_content_artifact,
                    remote=NOT_USED,
                    deferred_download=False)
                dc = DeclarativeContent(content=pulp3content,
                                        d_artifacts=[da],
                                        does_batch=False)

            dc.extra_data = future_relations
            await self.put(dc)
            if pb:
                pb.increment()