Example #1
0
    def post(self, request):
        """Upload an RPM package."""
        serializer = OneShotUploadSerializer(
            data=request.data, context={'request': request})
        serializer.is_valid(raise_exception=True)

        artifact = Artifact.init_and_validate(request.data['file'])

        if 'repository' in request.data:
            repository = serializer.validated_data['repository']
        else:
            repository = None

        try:
            artifact.save()
        except IntegrityError:
            # if artifact already exists, let's use it
            artifact = Artifact.objects.get(sha256=artifact.sha256)

        async_result = enqueue_with_reservation(
            one_shot_upload, [artifact],
            kwargs={
                'artifact': artifact,
                'repository': repository,
            })
        return OperationPostponedResponse(async_result, request)
Example #2
0
    async def run(self):
        """
        Build and emit `DeclarativeContent` from the Spec data.
        """
        # Interpret policy to download Artifacts or not
        deferred_download = self.remote.policy != Remote.IMMEDIATE

        async with ProgressReport(message="Downloading Metadata") as progress:
            parsed_url = urlparse(self.remote.url)
            root_dir = parsed_url.path
            specs_path = os.path.join(root_dir, "specs.4.8.gz")
            specs_url = urlunparse(parsed_url._replace(path=specs_path))
            downloader = self.remote.get_downloader(url=specs_url)
            result = await downloader.run()
            await progress.aincrement()

        async with ProgressReport(message="Parsing Metadata") as progress:
            for key in read_specs(result.path):
                relative_path = os.path.join(
                    "gems", key.name + "-" + key.version + ".gem")
                path = os.path.join(root_dir, relative_path)
                url = urlunparse(parsed_url._replace(path=path))

                spec_relative_path = os.path.join(
                    "quick/Marshal.4.8",
                    key.name + "-" + key.version + ".gemspec.rz")
                spec_path = os.path.join(root_dir, spec_relative_path)
                spec_url = urlunparse(parsed_url._replace(path=spec_path))
                gem = GemContent(name=key.name, version=key.version)
                da_gem = DeclarativeArtifact(
                    artifact=Artifact(),
                    url=url,
                    relative_path=relative_path,
                    remote=self.remote,
                    deferred_download=deferred_download,
                )
                da_spec = DeclarativeArtifact(
                    artifact=Artifact(),
                    url=spec_url,
                    relative_path=spec_relative_path,
                    remote=self.remote,
                    deferred_download=deferred_download,
                )
                dc = DeclarativeContent(content=gem,
                                        d_artifacts=[da_gem, da_spec])
                await progress.aincrement()
                await self.put(dc)
Example #3
0
    async def _read_package_index(self, package_index):
        """
        Parse a package index file of apt Repositories.

        Put DeclarativeContent in the queue accordingly.

        Args:
            package_index: file object containing package paragraphs

        """
        # Interpret policy to download Artifacts or not
        deferred_download = self.remote.policy != Remote.IMMEDIATE

        for package_paragraph in deb822.Packages.iter_paragraphs(package_index):
            try:
                package_relpath = package_paragraph["Filename"]
                package_sha256 = package_paragraph["sha256"]
                if package_relpath.endswith(".deb"):
                    package_class = Package
                    package_serializer_class = PackageSerializer
                elif package_relpath.endswith(".udeb"):
                    package_class = InstallerPackage
                    package_serializer_class = InstallerPackageSerializer
                try:
                    package_content_unit = package_class.objects.get(
                        sha256=package_sha256
                    )
                except ObjectDoesNotExist:
                    log.debug(
                        "Downloading package {}".format(package_paragraph["Package"])
                    )
                    package_dict = package_class.from822(package_paragraph)
                    package_dict["relative_path"] = package_relpath
                    package_dict["sha256"] = package_sha256
                    package_serializer = package_serializer_class(
                        data=package_dict, partial=True
                    )
                    package_serializer.is_valid(raise_exception=True)
                    package_content_unit = package_class(
                        **package_serializer.validated_data
                    )
                package_path = os.path.join(self.parsed_url.path, package_relpath)
                package_artifact = Artifact(**_get_checksums(package_paragraph))
                package_da = DeclarativeArtifact(
                    artifact=package_artifact,
                    url=urlunparse(self.parsed_url._replace(path=package_path)),
                    relative_path=package_relpath,
                    remote=self.remote,
                    deferred_download=deferred_download,
                )
                package_dc = DeclarativeContent(
                    content=package_content_unit, d_artifacts=[package_da]
                )
                yield package_dc
            except KeyError:
                log.warning(
                    "Ignoring invalid package paragraph. {}".format(package_paragraph)
                )
Example #4
0
    async def __call__(self, in_q, out_q):
        """
        Build and emit `DeclarativeContent` from the remote metadata.

        Fetch and parse the remote metadata, use the Project Specifiers on the Remote
        to determine which Python packages should be synced.

        Args:
            in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue.
            out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to.

        """
        ps = ProjectSpecifier.objects.filter(remote=self.remote)

        with ProgressBar(message='Fetching Project Metadata') as pb:
            # Group multiple specifiers to the same project together, so that we only have to fetch
            # the metadata once, and can re-use it if there are multiple specifiers.
            for name, project_specifiers in groupby_unsorted(
                    ps, key=lambda x: x.name):
                # Fetch the metadata from PyPI
                pb.increment()
                try:
                    metadata = await self.get_project_metadata(name)
                except ClientResponseError as e:
                    # Project doesn't exist, log a message and move on
                    log.info(
                        _("HTTP 404 'Not Found' for url '{url}'\n"
                          "Does project '{name}' exist on the remote repository?"
                          ).format(url=e.request_info.url, name=name))
                    continue
                project_specifiers = list(project_specifiers)

                # Determine which packages from the project match the criteria in the specifiers
                packages = await self.get_relevant_packages(
                    metadata=metadata,
                    includes=[
                        specifier for specifier in project_specifiers
                        if not specifier.exclude
                    ],
                    excludes=[
                        specifier for specifier in project_specifiers
                        if specifier.exclude
                    ],
                    prereleases=self.remote.prereleases)

                # For each package, create Declarative objects to pass into the next stage
                for entry in packages:
                    url = entry.pop('url')

                    artifact = Artifact(sha256=entry.pop('sha256_digest'))
                    package = PythonPackageContent(**entry)

                    da = DeclarativeArtifact(artifact, url, entry['filename'],
                                             self.remote)
                    dc = DeclarativeContent(content=package, d_artifacts=[da])

                    await out_q.put(dc)
        await out_q.put(None)
Example #5
0
    async def create_artifact(self,
                              pulp2_storage_path,
                              expected_digests={},
                              expected_size=None):
        """
        Create a hard link if possible and then create an Artifact.

        If it's not possible to create a hard link, file is copied to the Pulp 3 storage.
        """
        if not expected_digests.get('sha256'):
            # TODO: all checksums are calculated for the pulp 2 storage path, is it ok?
            artifact = Artifact.init_and_validate(pulp2_storage_path,
                                                  size=expected_size)

        sha256digest = expected_digests.get('sha256') or artifact.sha256

        pulp3_storage_relative_path = storage.get_artifact_path(sha256digest)
        pulp3_storage_path = os.path.join(settings.MEDIA_ROOT,
                                          pulp3_storage_relative_path)
        os.makedirs(os.path.dirname(pulp3_storage_path), exist_ok=True)

        is_copied = False
        try:
            os.link(pulp2_storage_path, pulp3_storage_path)
        except FileExistsError:
            pass
        except OSError:
            _logger.debug('Hard link cannot be created, file will be copied.')
            shutil.copy2(pulp2_storage_path, pulp3_storage_path)
            is_copied = True

        expected_digests = {'sha256': sha256digest}

        if is_copied:
            # recalculate checksums to ensure that after being copied a file is still fine
            artifact = Artifact.init_and_validate(
                file=pulp3_storage_path,
                expected_digests=expected_digests,
                expected_size=expected_size)
        else:
            # a hard link has been created or a file has already been in the pulp 3 storage, so
            # artifact's path can be just updated and no checksum recalculation is needed.
            artifact.file = pulp3_storage_path

        return artifact
Example #6
0
    def downloaded(self, downloader):
        """
        The artifact (file) has been downloaded.
        A new _stored_model is created (and assigned) for the downloaded file.

        Args:
            downloader (BaseDownloader): The downloader that successfully completed.
        """
        self._stored_model = Artifact(file=downloader.path, **downloader.artifact_attributes)
Example #7
0
    def create(self, request, path):
        """
        Dispatch a Collection creation task.
        """
        distro = get_object_or_404(AnsibleDistribution, base_path=path)
        serializer = CollectionOneShotSerializer(data=request.data,
                                                 context={"request": request})
        serializer.is_valid(raise_exception=True)

        expected_digests = {}
        if serializer.validated_data["sha256"]:
            expected_digests["sha256"] = serializer.validated_data["sha256"]
        try:
            artifact = Artifact.init_and_validate(
                serializer.validated_data["file"],
                expected_digests=expected_digests)
        except DigestValidationError:
            raise serializers.ValidationError(
                _("The provided sha256 value does not match the sha256 of the uploaded file."
                  ))

        try:
            artifact.save()
        except IntegrityError:
            raise serializers.ValidationError(_("Artifact already exists."))

        kwargs = {}

        if serializer.validated_data["expected_namespace"]:
            kwargs["expected_namespace"] = serializer.validated_data[
                "expected_namespace"]

        if serializer.validated_data["expected_name"]:
            kwargs["expected_name"] = serializer.validated_data[
                "expected_name"]

        if serializer.validated_data["expected_version"]:
            kwargs["expected_version"] = serializer.validated_data[
                "expected_version"]

        async_result = self._dispatch_import_collection_task(
            artifact.pk, distro.repository, **kwargs)
        CollectionImport.objects.create(task_id=async_result.id)

        data = {
            "task":
            reverse(
                "collection-imports-detail",
                kwargs={
                    "path": path,
                    "pk": async_result.id
                },
                request=None,
            )
        }
        return Response(data, status=http_status.HTTP_202_ACCEPTED)
Example #8
0
 def setUp(self):
     """Setup database fixtures."""
     self.package1 = Package(
         package_name='aegir',
         version='0.1-edda0',
         architecture='sea',
         maintainer='Utgardloki',
         description='A sea jötunn associated with the ocean.',
     )
     self.package1.save()
     self.artifact1 = Artifact(
         size=42,
         md5='aabb',
         sha1='ccdd',
         sha256='eeff',
         file=SimpleUploadedFile('test_filename', b'test content'),
     )
     self.artifact1.save()
     ContentArtifact(artifact=self.artifact1, content=self.package1).save()
Example #9
0
 def setUp(self):
     """Setup database fixtures."""
     self.package1 = Package(
         package_name="aegir",
         version="0.1-edda0",
         architecture="sea",
         maintainer="Utgardloki",
         description="A sea jötunn associated with the ocean.",
     )
     self.package1.save()
     self.artifact1 = Artifact(
         size=42,
         md5="aabb",
         sha1="ccdd",
         sha256="eeff",
         file=SimpleUploadedFile("test_filename", b"test content"),
     )
     self.artifact1.save()
     ContentArtifact(artifact=self.artifact1, content=self.package1).save()
Example #10
0
    def put(self, request, path, pk=None):
        """Handles creation of Uploads."""
        _, repository = self.get_dr_push(request, path)

        digest = request.query_params["digest"]
        upload = models.Upload.objects.get(pk=pk, repository=repository)

        if upload.sha256 == digest[len("sha256:"):]:
            try:
                artifact = Artifact(
                    file=upload.file.name,
                    md5=upload.md5,
                    sha1=upload.sha1,
                    sha256=upload.sha256,
                    sha384=upload.sha384,
                    sha512=upload.sha512,
                    size=upload.file.size,
                )
                artifact.save()
            except IntegrityError:
                artifact = Artifact.objects.get(sha256=artifact.sha256)
            try:
                blob = models.Blob(digest=digest,
                                   media_type=models.MEDIA_TYPE.REGULAR_BLOB)
                blob.save()
            except IntegrityError:
                blob = models.Blob.objects.get(digest=digest)
            try:
                blob_artifact = ContentArtifact(artifact=artifact,
                                                content=blob,
                                                relative_path=digest)
                blob_artifact.save()
            except IntegrityError:
                pass

            with repository.new_version() as new_version:
                new_version.add_content(models.Blob.objects.filter(pk=blob.pk))

            upload.delete()

            return BlobResponse(blob, path, 201, request)
        else:
            raise Exception("The digest did not match")
Example #11
0
 def _to_d_artifact(self, relative_path, data=None):
     artifact = Artifact(**_get_checksums(data or {}))
     url_path = os.path.join(self.parsed_url.path, relative_path)
     return DeclarativeFailsafeArtifact(
         artifact,
         urlunparse(self.parsed_url._replace(path=url_path)),
         relative_path,
         self.remote,
         deferred_download=False,
     )
Example #12
0
    def generate():
        for key in delta.additions:
            relative_path = os.path.join('gems',
                                         key.name + '-' + key.version + '.gem')
            path = os.path.join(root_dir, relative_path)
            url = urlunparse(parsed_url._replace(path=path))

            spec_relative_path = os.path.join(
                'quick/Marshal.4.8',
                key.name + '-' + key.version + '.gemspec.rz')
            spec_path = os.path.join(root_dir, spec_relative_path)
            spec_url = urlunparse(parsed_url._replace(path=spec_path))

            gem = GemContent(name=key.name, version=key.version)
            content = PendingContent(
                gem,
                artifacts={
                    PendingArtifact(Artifact(), url, relative_path),
                    PendingArtifact(Artifact(), spec_url, spec_relative_path),
                })
            yield content
Example #13
0
    def _update_content(self, content, downloads):
        """Update the content using the download results."""
        for download_result in downloads:

            def url_lookup(x):
                return x.url == download_result.url

            d_artifact = list(filter(url_lookup, content.d_artifacts))[0]
            if d_artifact.artifact.pk is None:
                new_artifact = Artifact(**download_result.artifact_attributes,
                                        file=download_result.path)
                d_artifact.artifact = new_artifact
Example #14
0
 def receive_artifact(self, chunk):
     """Handles assembling of Manifest as it's being uploaded."""
     with NamedTemporaryFile("ab") as temp_file:
         size = 0
         hashers = {}
         for algorithm in Artifact.DIGEST_FIELDS:
             hashers[algorithm] = getattr(hashlib, algorithm)()
         while True:
             subchunk = chunk.read(2000000)
             if not subchunk:
                 break
             temp_file.write(subchunk)
             size += len(subchunk)
             for algorithm in Artifact.DIGEST_FIELDS:
                 hashers[algorithm].update(subchunk)
         temp_file.flush()
         digests = {}
         for algorithm in Artifact.DIGEST_FIELDS:
             digests[algorithm] = hashers[algorithm].hexdigest()
         artifact = Artifact(file=temp_file.name, size=size, **digests)
         try:
             artifact.save()
         except IntegrityError:
             artifact = Artifact.objects.get(sha256=artifact.sha256)
             artifact.touch()
         return artifact
Example #15
0
    async def __call__(self, in_q, out_q):
        """
        Build and emit `DeclarativeContent` from the Spec data.

        Args:
            in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue.
            out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to

        """
        with ProgressBar(message='Downloading Metadata') as pb:
            parsed_url = urlparse(self.remote.url)
            root_dir = parsed_url.path
            specs_path = os.path.join(root_dir, 'specs.4.8.gz')
            specs_url = urlunparse(parsed_url._replace(path=specs_path))
            downloader = self.remote.get_downloader(url=specs_url)
            result = await downloader.run()
            pb.increment()

        with ProgressBar(message='Parsing Metadata') as pb:
            for key in read_specs(result.path):
                relative_path = os.path.join(
                    'gems', key.name + '-' + key.version + '.gem')
                path = os.path.join(root_dir, relative_path)
                url = urlunparse(parsed_url._replace(path=path))

                spec_relative_path = os.path.join(
                    'quick/Marshal.4.8',
                    key.name + '-' + key.version + '.gemspec.rz')
                spec_path = os.path.join(root_dir, spec_relative_path)
                spec_url = urlunparse(parsed_url._replace(path=spec_path))
                gem = GemContent(name=key.name, version=key.version)
                da_gem = DeclarativeArtifact(Artifact(), url, relative_path,
                                             self.remote)
                da_spec = DeclarativeArtifact(Artifact(), spec_url,
                                              spec_relative_path, self.remote)
                dc = DeclarativeContent(content=gem,
                                        d_artifacts=[da_gem, da_spec])
                pb.increment()
                await out_q.put(dc)
        await out_q.put(None)
Example #16
0
        def to_d_artifact(data):
            nonlocal release

            artifact = Artifact(**_get_checksums(data))
            relpath = os.path.join(os.path.dirname(release.relative_path), data["Name"])
            urlpath = os.path.join(self.parsed_url.path, relpath)
            return DeclarativeFailsafeArtifact(
                artifact,
                urlunparse(self.parsed_url._replace(path=urlpath)),
                relpath,
                self.remote,
                deferred_download=False,
            )
Example #17
0
    async def create_artifact(self,
                              pulp2_storage_path,
                              expected_digests={},
                              expected_size=None,
                              downloaded=True):
        """
        Create a hard link if possible and then create an Artifact.

        If it's not possible to create a hard link, file is copied to the Pulp 3 storage.
        For non-downloaded content, artifact with its expected checksum and size is created.
        """
        if not downloaded:
            if not expected_digests:
                raise ValueError(
                    _('No digest is provided for on_demand content creation. Pulp 2 '
                      'storage path: {}'.format(pulp2_storage_path)))
            artifact = Artifact(**expected_digests)
            artifact.size = expected_size
            return artifact

        artifact = Artifact.init_and_validate(
            pulp2_storage_path,
            expected_digests=expected_digests,
            expected_size=expected_size)

        pulp3_storage_relative_path = storage.get_artifact_path(
            artifact.sha256)
        pulp3_storage_path = os.path.join(settings.MEDIA_ROOT,
                                          pulp3_storage_relative_path)
        os.makedirs(os.path.dirname(pulp3_storage_path), exist_ok=True)

        is_copied = False
        try:
            os.link(pulp2_storage_path, pulp3_storage_path)
        except FileExistsError:
            pass
        except OSError:
            _logger.debug(
                _('Hard link cannot be created, file will be copied.'))
            shutil.copy2(pulp2_storage_path, pulp3_storage_path)
            is_copied = True

        if not expected_digests:
            expected_digests = {'sha256': artifact.sha256}

        if is_copied:
            # recalculate checksums to ensure that after being copied a file is still fine
            artifact = Artifact.init_and_validate(
                file=pulp3_storage_path,
                expected_digests=expected_digests,
                expected_size=expected_size)
        else:
            # a hard link has been created or a file has already been in the pulp 3 storage, so
            # artifact's path can be just updated and no checksum recalculation is needed.
            artifact.file = pulp3_storage_path

        return artifact
Example #18
0
def _save_artifact_blocking(artifact_attributes):
    saved_artifact = Artifact(**artifact_attributes)
    try:
        saved_artifact.save()
    except IntegrityError:
        del artifact_attributes["file"]
        saved_artifact = Artifact.objects.get(**artifact_attributes)
        saved_artifact.touch()
    return saved_artifact
Example #19
0
    async def run(self):
        """
        Build and emit `DeclarativeContent` from the Manifest data.

        Args:
            in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue.
            out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to

        """
        downloader = self.remote.get_downloader(url=self.remote.url)
        result = await downloader.run()
        data = [self.get_json_data(result.path)]
        dependencies = data[0].get("dependencies")
        to_download = []
        if dependencies:
            to_download.extend(dependencies.items())
            downloaded = []
            while to_download:
                next_batch = []
                for name, version in to_download:
                    new_url = self.remote.url.replace(data[0]["name"], name)
                    new_url = new_url.replace(data[0]["version"],
                                              version.replace("^", ""))
                    downloader = self.remote.get_downloader(url=new_url)
                    result = await downloader.run()
                    new_data = self.get_json_data(result.path)
                    data.append(new_data)
                    next_batch.extend(new_data.get("dependencies", {}).items())
                    downloaded.append((name, version))

                to_download.extend(next_batch)

                for dependency in downloaded:
                    if dependency in to_download:
                        to_download.remove(dependency)

        for pkg in data:
            package = Package(name=pkg["name"], version=pkg["version"])
            artifact = Artifact()  # make Artifact in memory-only
            url = pkg["dist"]["tarball"]
            da = DeclarativeArtifact(
                artifact,
                url,
                url.split("/")[-1],
                self.remote,
                deferred_download=self.deferred_download,
            )
            dc = DeclarativeContent(content=package, d_artifacts=[da])
            await self.put(dc)
Example #20
0
    def validate(self, data):
        """Validate the GemContent data."""
        data = super().validate(data)

        if "file" in data:
            if "artifact" in data:
                raise ValidationError(_("Only one of 'file' and 'artifact' may be specified."))
            data["artifact"] = Artifact.init_and_validate(data.pop("file"))
        elif "artifact" not in data:
            raise ValidationError(_("One of 'file' and 'artifact' must be specified."))

        if "request" not in self.context:
            data = self.deferred_validate(data)

        return data
Example #21
0
    async def _parse_packages(self, packages):
        progress_data = {
            "message": "Parsed Packages",
            "code": "sync.parsing.packages",
            "total": len(packages),
        }

        with ProgressReport(**progress_data) as packages_pb:
            while True:
                try:
                    (_, pkg) = packages.popitem(last=False)
                except KeyError:
                    break
                package = Package(**Package.createrepo_to_dict(pkg))
                del pkg
                artifact = Artifact(size=package.size_package)
                checksum_type = getattr(CHECKSUM_TYPES,
                                        package.checksum_type.upper())
                setattr(artifact, checksum_type, package.pkgId)
                url = urlpath_sanitize(self.data.remote_url,
                                       package.location_href)
                filename = os.path.basename(package.location_href)
                da = DeclarativeArtifact(
                    artifact=artifact,
                    url=url,
                    relative_path=filename,
                    remote=self.remote,
                    deferred_download=self.deferred_download,
                )
                dc = DeclarativeContent(content=package, d_artifacts=[da])
                dc.extra_data = defaultdict(list)

                # find if a package relates to a modulemd
                if dc.content.nevra in self.data.nevra_to_module.keys():
                    dc.content.is_modular = True
                    for dc_modulemd in self.data.nevra_to_module[
                            dc.content.nevra]:
                        dc.extra_data["modulemd_relation"].append(dc_modulemd)
                        dc_modulemd.extra_data["package_relation"].append(dc)

                if dc.content.name in self.data.pkgname_to_groups.keys():
                    for dc_group in self.data.pkgname_to_groups[
                            dc.content.name]:
                        dc.extra_data["group_relations"].append(dc_group)
                        dc_group.extra_data["related_packages"].append(dc)

                packages_pb.increment()
                await self.put(dc)
Example #22
0
def _create_snippet(snippet_string):
    """
    Create snippet of modulemd[-defaults] as artifact.

    Args:
        snippet_string (string):
            Snippet with modulemd[-defaults] yaml

    Returns:
        Snippet as unsaved Artifact object

    """
    tmp_file = tempfile.NamedTemporaryFile(dir=os.getcwd(), delete=False)
    with open(tmp_file.name, "w") as snippet:
        snippet.write(snippet_string)
    return Artifact.init_and_validate(tmp_file.name)
Example #23
0
    def post(self, request, path):
        """
        Queues a task that creates a new Collection from an uploaded artifact.
        """
        distro = get_object_or_404(AnsibleDistribution, base_path=path)
        serializer = GalaxyCollectionUploadSerializer(
            data=request.data, context={"request": request})
        serializer.is_valid(raise_exception=True)

        artifact = Artifact.init_and_validate(
            serializer.validated_data["file"])
        artifact.save()

        async_result = self._dispatch_import_collection_task(
            artifact.pk, distro.repository)
        return OperationPostponedResponse(async_result, request)
Example #24
0
    def put(self, request, path, pk=None):
        """
        Create a blob from uploaded chunks.
        """
        _, repository = self.get_dr_push(request, path)

        digest = request.query_params["digest"]
        upload = models.Upload.objects.get(pk=pk, repository=repository)
        chunks = UploadChunk.objects.filter(upload=upload).order_by("offset")

        with NamedTemporaryFile("ab") as temp_file:
            for chunk in chunks:
                temp_file.write(chunk.file.read())
            temp_file.flush()

            uploaded_file = PulpTemporaryUploadedFile.from_file(
                File(open(temp_file.name, "rb")))

        if uploaded_file.hashers["sha256"].hexdigest() == digest[len("sha256:"
                                                                     ):]:
            try:
                artifact = Artifact.init_and_validate(uploaded_file)
                artifact.save()
            except IntegrityError:
                artifact = Artifact.objects.get(sha256=artifact.sha256)
            try:
                blob = models.Blob(digest=digest,
                                   media_type=models.MEDIA_TYPE.REGULAR_BLOB)
                blob.save()
            except IntegrityError:
                blob = models.Blob.objects.get(digest=digest)
            try:
                blob_artifact = ContentArtifact(artifact=artifact,
                                                content=blob,
                                                relative_path=digest)
                blob_artifact.save()
            except IntegrityError:
                pass

            with repository.new_version() as new_version:
                new_version.add_content(models.Blob.objects.filter(pk=blob.pk))

            upload.delete()

            return BlobResponse(blob, path, 201, request)
        else:
            raise Exception("The digest did not match")
Example #25
0
    def setUp(self):
        with open(self.artifact_path, 'w') as f:
            f.write('Temp Artifact File')
        self.artifact = Artifact.init_and_validate(self.artifact_path)
        self.artifact.save()

        collection = Collection.objects.create(namespace='my_ns',
                                               name='my_name')
        self.collection_version = CollectionVersion.objects.create(
            collection=collection)
        self.collection_version.save()

        content_artifact = ContentArtifact.objects.create(
            artifact=self.artifact,
            content=self.collection_version,
        )
        content_artifact.save()
Example #26
0
 def generate():
     for entry in manifest.read():
         key = Key(relative_path=entry.relative_path, digest=entry.digest)
         if key not in delta.additions:
             continue
         path = os.path.join(root_dir, entry.relative_path)
         url = urlunparse(parsed_url._replace(path=path))
         file = FileContent(relative_path=entry.relative_path,
                            digest=entry.digest)
         artifact = Artifact(size=entry.size, sha256=entry.digest)
         content = PendingContent(file,
                                  artifacts={
                                      PendingArtifact(
                                          artifact, url,
                                          entry.relative_path)
                                  })
         yield content
Example #27
0
    async def _read_installer_file_index(self, installer_file_index):
        """
        Parse an installer file index file of apt Repositories.

        Put DeclarativeContent in the queue accordingly.

        Args:
            installer_file_index: object of type :class:`InstallerFileIndex`

        """
        # Interpret policy to download Artifacts or not
        deferred_download = self.remote.policy != Remote.IMMEDIATE

        file_list = defaultdict(dict)
        for content_artifact in installer_file_index.contentartifact_set.all():
            algorithm = InstallerFileIndex.FILE_ALGORITHM.get(
                os.path.basename(content_artifact.relative_path)
            )
            if not algorithm:
                continue
            for line in content_artifact.artifact.file:
                digest, filename = line.decode().strip().split(maxsplit=1)
                filename = os.path.normpath(filename)
                if (
                    filename in InstallerFileIndex.FILE_ALGORITHM
                ):  # strangely they may appear here
                    continue
                file_list[filename][algorithm] = digest

        for filename, digests in file_list.items():
            relpath = os.path.join(installer_file_index.relative_path, filename)
            urlpath = os.path.join(self.parsed_url.path, relpath)
            content_unit = GenericContent(
                sha256=digests["sha256"], relative_path=relpath
            )
            d_artifact = DeclarativeArtifact(
                artifact=Artifact(**digests),
                url=urlunparse(self.parsed_url._replace(path=urlpath)),
                relative_path=relpath,
                remote=self.remote,
                deferred_download=deferred_download,
            )
            d_content = DeclarativeContent(
                content=content_unit, d_artifacts=[d_artifact]
            )
            yield d_content
Example #28
0
    async def run(self):
        """
        Build and emit `DeclarativeContent` from the ansible metadata.
        """
        msg = "Parsing CollectionVersion Metadata"
        with ProgressReport(message=msg, code="parsing.metadata") as pb:
            async for metadata in self._fetch_collections():

                url = metadata["download_url"]

                collection_version = CollectionVersion(
                    namespace=metadata["namespace"]["name"],
                    name=metadata["collection"]["name"],
                    version=metadata["version"],
                )

                info = metadata["metadata"]

                info.pop("tags")
                for attr_name, attr_value in info.items():
                    if attr_value is None or attr_name not in collection_version.__dict__:
                        continue
                    setattr(collection_version, attr_name, attr_value)

                artifact = metadata["artifact"]

                d_artifact = DeclarativeArtifact(
                    artifact=Artifact(sha256=artifact["sha256"], size=artifact["size"]),
                    url=url,
                    relative_path=collection_version.relative_path,
                    remote=self.remote,
                    deferred_download=self.deferred_download,
                )

                extradata = dict(
                    docs_blob_url=metadata["docs_blob_url"],
                    deprecated=metadata["deprecated"],
                )

                d_content = DeclarativeContent(
                    content=collection_version,
                    d_artifacts=[d_artifact],
                    extra_data=extradata,
                )
                pb.increment()
                await self.put(d_content)
Example #29
0
def add_image_from_directory_to_repository(path, repository, tag):
    """
    Creates a Manifest and all blobs from a directory with OCI image

    Args:
        path (str): Path to directory with the OCI image
        repository (class:`pulpcore.plugin.models.Repository`): The destination repository
        tag (str): Tag name for the new image in the repository

    Returns:
        A class:`pulpcore.plugin.models.RepositoryVersion` that contains the new OCI container
        image and tag.

    """
    manifest_path = "{}manifest.json".format(path)
    manifest_artifact = Artifact.init_and_validate(manifest_path)
    manifest_artifact.save()
    manifest_digest = "sha256:{}".format(manifest_artifact.sha256)
    manifest = Manifest(digest=manifest_digest,
                        schema_version=2,
                        media_type=MEDIA_TYPE.MANIFEST_OCI)
    manifest.save()
    ContentArtifact(artifact=manifest_artifact,
                    content=manifest,
                    relative_path=manifest_digest).save()
    tag = Tag(name=tag, tagged_manifest=manifest)
    tag.save()
    ContentArtifact(artifact=manifest_artifact,
                    content=tag,
                    relative_path=tag.name).save()
    with repository.new_version() as new_repo_version:
        new_repo_version.add_content(Manifest.objects.filter(pk=manifest.pk))
        new_repo_version.add_content(Tag.objects.filter(pk=tag.pk))
        with open(manifest_artifact.file.path, "r") as manifest_file:
            manifest_json = json.load(manifest_file)
            config_blob = get_or_create_blob(manifest_json["config"], manifest,
                                             path)
            manifest.config_blob = config_blob
            manifest.save()
            new_repo_version.add_content(
                Blob.objects.filter(pk=config_blob.pk))
            for layer in manifest_json["layers"]:
                blob = get_or_create_blob(layer, manifest, path)
                new_repo_version.add_content(Blob.objects.filter(pk=blob.pk))
    return new_repo_version
Example #30
0
def import_collection_from_path(path):
    """
    Import a single collection by path.

    This method will not fail if the Artifact already exists.

    Args:
        path: The path to the tarball to import.

    """
    artifact = Artifact.init_and_validate(path)

    try:
        artifact.save()
    except IntegrityError:
        artifact = Artifact.objects.get(sha256=artifact.sha256)

    import_collection(artifact.pk)
Example #31
0
    async def run(self):
        """
        Build and emit `DeclarativeContent` from the Manifest data.

        Args:
            in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue.
            out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to

        """
        remote_url = self.remote.url
        if not remote_url.endswith('/index.yaml'):
            remote_url += '/index.yaml'

        # TODO Skip reading generator to list?
        index_yaml = []
        with ProgressReport(message="Downloading Index",
                            code="downloading.metadata") as pb:
            downloader = self.remote.get_downloader(url=remote_url)
            result = await downloader.run()
            index_yaml = list(self.read_index_yaml(result.path))
            pb.increment()

        with ProgressReport(message="Parsing Entries",
                            code="parsing.metadata") as pb:
            pb.total = len(index_yaml)
            pb.save()

            for entry in index_yaml:
                content_entry = dict(
                    filter(lambda e: e[0] not in ('url'), entry.items()))

                unit = ChartContent(**content_entry)
                artifact = Artifact(sha256=entry['digest'])

                da = DeclarativeArtifact(
                    artifact,
                    urljoin(remote_url, entry['url']),
                    "{}-{}.tgz".format(entry['name'], entry['version']),
                    self.remote,
                    deferred_download=self.deferred_download,
                )
                dc = DeclarativeContent(content=unit, d_artifacts=[da])
                pb.increment()
                await self.put(dc)
Example #32
0
 def setUp(self):
     """Setup database fixtures."""
     self.package1 = Package(
         package_name='aegir',
         version='0.1-edda0',
         architecture='sea',
         maintainer='Utgardloki',
         description='A sea jötunn associated with the ocean.',
     )
     self.package1.save()
     self.artifact1 = Artifact(
         size=42,
         md5='aabb',
         sha1='ccdd',
         sha256='eeff',
         file=SimpleUploadedFile('test_filename', b'test content'),
     )
     self.artifact1.save()
     ContentArtifact(artifact=self.artifact1, content=self.package1).save()
Example #33
0
    def create_manifest(self, list_dc, manifest_data):
        """
        Create an Image Manifest from manifest data in a ManifestList.

        Args:
            list_dc (pulpcore.plugin.stages.DeclarativeContent): dc for a ManifestList
            manifest_data (dict): Data about a single new ImageManifest.

        """
        digest = manifest_data["digest"]
        relative_url = "/v2/{name}/manifests/{digest}".format(
            name=self.remote.namespaced_upstream_name, digest=digest)
        manifest_url = urljoin(self.remote.url, relative_url)
        da = DeclarativeArtifact(
            artifact=Artifact(),
            url=manifest_url,
            relative_path=digest,
            remote=self.remote,
            extra_data={"headers": V2_ACCEPT_HEADERS},
        )
        manifest = Manifest(
            digest=manifest_data["digest"],
            schema_version=2 if manifest_data["mediaType"]
            in (MEDIA_TYPE.MANIFEST_V2, MEDIA_TYPE.MANIFEST_OCI) else 1,
            media_type=manifest_data["mediaType"],
        )
        platform = {}
        p = manifest_data["platform"]
        platform["architecture"] = p["architecture"]
        platform["os"] = p["os"]
        platform["features"] = p.get("features", "")
        platform["variant"] = p.get("variant", "")
        platform["os.version"] = p.get("os.version", "")
        platform["os.features"] = p.get("os.features", "")
        man_dc = DeclarativeContent(
            content=manifest,
            d_artifacts=[da],
            extra_data={
                "relation": list_dc,
                "platform": platform
            },
        )
        return man_dc
Example #34
0
class PendingArtifact(Pending):
    """
    Represents an artifact related to content that is contained within
    the remote repository.

    Attributes:
        url (str): The URL used to download the artifact.
        relative_path (str): The relative path within the content.
        content (PendingContent): The associated pending content.
            This is the reverse relationship.

    Examples:
        >>>
        >>> from pulpcore.plugin.models import Artifact
        >>>
        >>> model = Artifact(...)  # DB model instance.
        >>> download = ...
        >>> ...
        >>> artifact = PendingArtifact(model, 'http://zoo.org/lion.rpm', 'lion.rpm')
        >>>
    """

    __slots__ = (
        'url',
        'relative_path',
        'content',
    )

    def __init__(self, model, url, relative_path, content=None):
        """
        Args:
            model (pulpcore.plugin.models.Artifact): A pending artifact model.
            url (str): The URL used to download the artifact.
            relative_path (str): The relative path within the content.
            content (PendingContent): The associated pending content.
                This is the reverse relationship.
        """
        super().__init__(model)
        self.url = url
        self.relative_path = relative_path
        self.content = content
        if content:
            content.artifacts.add(self)

    @property
    def model(self):
        """
        The model getter.

        Returns:
            pulpcore.plugin.models.Artifact: The pending model.
        """
        return self._model

    @property
    def stored_model(self):
        """
        The stored model getter.

        Returns:
            pulpcore.plugin.models.Artifact: The stored model.
        """
        return self._stored_model

    @stored_model.setter
    def stored_model(self, model):
        """
        The stored model setter.

        Args:
            model (pulpcore.plugin.models.Artifact): The stored model.
        """
        self._stored_model = model

    @property
    def changeset(self):
        """
        The changeset getter.

        Returns:
            pulpcore.plugin.changeset.Changeset: The active changeset.
        """
        return self.content.changeset

    @property
    def remote(self):
        """
        The remote getter.

        Returns:
            pulpcore.plugin.models.Remote: A remote.
        """
        return self.changeset.remote

    @property
    def downloader(self):
        """
        A downloader used to download the artifact.
        The downloader may be a NopDownloader (no-operation) when:
        - The _stored_model is set to an model fetched from the DB.
        - The download policy is deferred.

        Returns:
            asyncio.Future: A download future based on a downloader.
        """
        def done(task):
            try:
                task.result()
            except Exception:
                pass
            else:
                self.downloaded(downloader)
        if self._stored_model:
            downloader = NopDownloader()
            future = asyncio.ensure_future(downloader.run())
        else:
            downloader = self.remote.get_downloader(self.url)
            future = asyncio.ensure_future(downloader.run())
            future.add_done_callback(done)
        return future

    def downloaded(self, downloader):
        """
        The artifact (file) has been downloaded.
        A new _stored_model is created (and assigned) for the downloaded file.

        Args:
            downloader (BaseDownloader): The downloader that successfully completed.
        """
        self._stored_model = Artifact(file=downloader.path, **downloader.artifact_attributes)

    def artifact_q(self):
        """
        Get a query for the actual artifact.

        Returns:
            django.db.models.Q: A query to get the actual artifact.
        """
        q = Q(pk=None)
        for field in Artifact.RELIABLE_DIGEST_FIELDS:
            digest = getattr(self._model, field)
            if digest:
                q |= Q(**{field: digest})
        return q

    def settle(self):
        """
        Ensures that all prerequisite matters pertaining to adding the artifact
        to the DB have been settled:

        Notes:
            Called whenever an artifact has been processed.
        """
        self._settled = True

    def save(self):
        """
        Update the DB:
         - Create (or fetch) the Artifact.
         - Create (or fetch) the ContentArtifact.
         - Create (or update) the RemoteArtifact.
        """
        if self._stored_model:
            try:
                with transaction.atomic():
                    self._stored_model.save()
            except IntegrityError:
                q = self.artifact_q()
                self._stored_model = Artifact.objects.get(q)

        try:
            with transaction.atomic():
                content_artifact = ContentArtifact(
                    relative_path=self.relative_path,
                    content=self.content.stored_model,
                    artifact=self._stored_model)
                content_artifact.save()
        except IntegrityError:
            content_artifact = ContentArtifact.objects.get(
                relative_path=self.relative_path,
                content=self.content.stored_model)
            if self._stored_model:
                content_artifact.artifact = self._stored_model
                content_artifact.save()

        digests = {f: getattr(self._model, f) for f in Artifact.DIGEST_FIELDS}

        try:
            with transaction.atomic():
                remote_artifact = RemoteArtifact(
                    url=self.url,
                    remote=self.remote,
                    content_artifact=content_artifact,
                    size=self._model.size,
                    **digests)
                remote_artifact.save()
        except IntegrityError:
            q_set = RemoteArtifact.objects.filter(
                remote=self.remote,
                content_artifact=content_artifact)
            q_set.update(
                url=self.url,
                size=self._model.size,
                **digests)

    def __hash__(self):
        return hash(self.relative_path)
Example #35
0
class TestPackage(TestCase):
    """Test Package content type."""

    PACKAGE_PARAGRAPH = 'Package: aegir\n' \
                        'Version: 0.1-edda0\n' \
                        'Architecture: sea\n' \
                        'Maintainer: Utgardloki\n' \
                        'Description: A sea jötunn associated with the ocean.\n' \
                        'MD5sum: aabb\n' \
                        'SHA1: ccdd\n' \
                        'SHA256: eeff\n' \
                        'Filename: pool/a/aegir/aegir_0.1-edda0_sea.deb\n'

    def setUp(self):
        """Setup database fixtures."""
        self.package1 = Package(
            package_name='aegir',
            version='0.1-edda0',
            architecture='sea',
            maintainer='Utgardloki',
            description='A sea jötunn associated with the ocean.',
        )
        self.package1.save()
        self.artifact1 = Artifact(
            size=42,
            md5='aabb',
            sha1='ccdd',
            sha256='eeff',
            file=SimpleUploadedFile('test_filename', b'test content'),
        )
        self.artifact1.save()
        ContentArtifact(artifact=self.artifact1, content=self.package1).save()

    def test_str(self):
        """Test package str."""
        self.assertEqual(str(self.package1), '<Package: aegir_0.1-edda0_sea>')

    def test_filename(self):
        """Test that the pool filename of a package is correct."""
        self.assertEqual(self.package1.filename(),
                         'pool/a/aegir/aegir_0.1-edda0_sea.deb')

    def test_filename_with_component(self):
        """Test that the pool filename of a package with component is correct."""
        self.assertEqual(self.package1.filename('joetunn'),
                         'pool/joetunn/a/aegir/aegir_0.1-edda0_sea.deb')

    def test_to822(self):
        """Test if package transforms correctly into 822dict."""
        package_dict = self.package1.to822('joetunn')
        self.assertEqual(package_dict['package'], self.package1.package_name)
        self.assertEqual(package_dict['version'], self.package1.version)
        self.assertEqual(package_dict['architecture'], self.package1.architecture)
        self.assertEqual(package_dict['maintainer'], self.package1.maintainer)
        self.assertEqual(package_dict['description'], self.package1.description)
        self.assertEqual(package_dict['md5sum'], self.artifact1.md5)
        self.assertEqual(package_dict['sha1'], self.artifact1.sha1)
        self.assertEqual(package_dict['sha256'], self.artifact1.sha256)
        self.assertEqual(package_dict['filename'], self.package1.filename('joetunn'))

    def test_to822_dump(self):
        """Test dump to package index."""
        self.assertEqual(self.package1.to822().dump(), self.PACKAGE_PARAGRAPH)
Example #36
0
    async def run(self):
        """
        DockerFirstStage.
        """
        future_manifests = []
        tag_list = []
        to_download = []
        man_dcs = {}
        total_blobs = []

        with ProgressBar(message='Downloading tag list', total=1) as pb:
            repo_name = self.remote.namespaced_upstream_name
            relative_url = '/v2/{name}/tags/list'.format(name=repo_name)
            tag_list_url = urljoin(self.remote.url, relative_url)
            list_downloader = self.remote.get_downloader(url=tag_list_url)
            await list_downloader.run(extra_data={'repo_name': repo_name})

            with open(list_downloader.path) as tags_raw:
                tags_dict = json.loads(tags_raw.read())
                tag_list = tags_dict['tags']

            # check for the presence of the pagination link header
            link = list_downloader.response_headers.get('Link')
            await self.handle_pagination(link, repo_name, tag_list)
            whitelist_tags = self.remote.whitelist_tags
            if whitelist_tags:
                tag_list = list(set(tag_list) & set(whitelist_tags.split(',')))
            pb.increment()

        msg = 'Creating Download requests for v2 Tags'
        with ProgressBar(message=msg, total=len(tag_list)) as pb:
            for tag_name in tag_list:
                relative_url = '/v2/{name}/manifests/{tag}'.format(
                    name=self.remote.namespaced_upstream_name,
                    tag=tag_name,
                )
                url = urljoin(self.remote.url, relative_url)
                downloader = self.remote.get_downloader(url=url)
                to_download.append(downloader.run(extra_data={'headers': V2_ACCEPT_HEADERS}))
                pb.increment()

        pb_parsed_tags = ProgressBar(message='Processing v2 Tags', state='running')
        pb_parsed_ml_tags = ProgressBar(message='Parsing Manifest List Tags', state='running')
        pb_parsed_m_tags = ProgressBar(message='Parsing Manifests Tags', state='running')
        global pb_parsed_blobs
        pb_parsed_blobs = ProgressBar(message='Parsing Blobs', state='running')
        pb_parsed_man = ProgressBar(message='Parsing Manifests', state='running')

        for download_tag in asyncio.as_completed(to_download):
            tag = await download_tag
            with open(tag.path) as content_file:
                raw = content_file.read()
            content_data = json.loads(raw)
            mediatype = content_data.get('mediaType')
            tag.artifact_attributes['file'] = tag.path
            saved_artifact = Artifact(**tag.artifact_attributes)
            try:
                saved_artifact.save()
            except IntegrityError:
                del tag.artifact_attributes['file']
                saved_artifact = Artifact.objects.get(**tag.artifact_attributes)
            tag_dc = self.create_tag(mediatype, saved_artifact, tag.url)

            if type(tag_dc.content) is ManifestListTag:
                list_dc = self.create_tagged_manifest_list(
                    tag_dc, content_data)
                await self.put(list_dc)
                pb_parsed_ml_tags.increment()
                tag_dc.extra_data['list_relation'] = list_dc
                for manifest_data in content_data.get('manifests'):
                    man_dc = self.create_manifest(list_dc, manifest_data)
                    future_manifests.append(man_dc.get_or_create_future())
                    man_dcs[man_dc.content.digest] = man_dc
                    await self.put(man_dc)
                    pb_parsed_man.increment()
            elif type(tag_dc.content) is ManifestTag:
                man_dc = self.create_tagged_manifest(tag_dc, content_data)
                await self.put(man_dc)
                pb_parsed_m_tags.increment()
                tag_dc.extra_data['man_relation'] = man_dc
                self.handle_blobs(man_dc, content_data, total_blobs)
            await self.put(tag_dc)
            pb_parsed_tags.increment()

        pb_parsed_tags.state = 'completed'
        pb_parsed_tags.total = pb_parsed_tags.done
        pb_parsed_tags.save()
        pb_parsed_ml_tags.state = 'completed'
        pb_parsed_ml_tags.total = pb_parsed_ml_tags.done
        pb_parsed_ml_tags.save()
        pb_parsed_m_tags.state = 'completed'
        pb_parsed_m_tags.total = pb_parsed_m_tags.done
        pb_parsed_m_tags.save()
        pb_parsed_man.state = 'completed'
        pb_parsed_man.total = pb_parsed_man.done
        pb_parsed_man.save()

        for manifest_future in asyncio.as_completed(future_manifests):
            man = await manifest_future
            with man._artifacts.get().file.open() as content_file:
                raw = content_file.read()
            content_data = json.loads(raw)
            man_dc = man_dcs[man.digest]
            self.handle_blobs(man_dc, content_data, total_blobs)
        for blob in total_blobs:
            await self.put(blob)

        pb_parsed_blobs.state = 'completed'
        pb_parsed_blobs.total = pb_parsed_blobs.done
        pb_parsed_blobs.save()