Example #1
0
    def create(self, request):
        """
        Create GemContent from an artifact.
        """
        data = request.data
        try:
            artifact = self.get_resource(data.pop('artifact'), Artifact)
        except KeyError:
            raise serializers.ValidationError(
                detail={'artifact': _('This field is required')})

        name, version, spec_data = analyse_gem(artifact.file.name)
        data['name'] = name
        data['version'] = version

        serializer = self.get_serializer(data=data)
        serializer.is_valid(raise_exception=True)
        content = serializer.save()

        relative_path = os.path.join('gems', name + '-' + version + '.gem')
        spec_relative_path = os.path.join('quick/Marshal.4.8',
                                          name + '-' + version + '.gemspec.rz')
        ContentArtifact(artifact=artifact,
                        content=content,
                        relative_path=relative_path).save()
        ContentArtifact(artifact=_artifact_from_data(spec_data),
                        content=content,
                        relative_path=spec_relative_path).save()

        headers = self.get_success_headers(request.data)
        return Response(serializer.data,
                        status=status.HTTP_201_CREATED,
                        headers=headers)
Example #2
0
    def next_remote_artifact(self, additions):
        """
        Generator of ExampleContent, ContentArtifacts, and RemoteArtifacts.

        This generator is responsible for creating all the models needed to create ExampleContent in
        Pulp. It stores the ExampleContent in a dictionary to be used in the deferred_sync
        method. This generator emits a RemoteArtifact object.

        Args:
            additions (set of namedtuple Key): Set of Keys corresponding to ExampleContent that
                should be created.

        Yields:
            RemoteArtifact that is needed for the ExampleContent.
        """
        parsed_url = urlparse(self.feed_url)
        root_dir = os.path.dirname(parsed_url.path)
        for entry in self.read_manifest():
            key = Key(path=entry['path'], digest=entry['digest'])
            if key in additions:
                path = os.path.join(root_dir, entry['path'])
                url = urlunparse(parsed_url._replace(path=path))
                example_content = ExampleContent(path=entry['path'], digest=entry['digest'])
                self.content_dict[url] = example_content
                # The content is set on the content_artifact right before writing to the
                # database. This helps deal with race conditions when saving Content.
                content_artifact = ContentArtifact(relative_path=entry['path'])
                remote_artifact = RemoteArtifact(url=url,
                                                 importer=self,
                                                 sha256=entry['digest'],
                                                 size=entry['size'],
                                                 content_artifact=content_artifact)
                yield remote_artifact
Example #3
0
 def _save_manifest(self,
                    artifact,
                    manifest_digest,
                    content_type,
                    config_blob=None):
     manifest = models.Manifest(
         digest=manifest_digest,
         schema_version=2,
         media_type=content_type,
         config_blob=config_blob,
     )
     try:
         manifest.save()
     except IntegrityError:
         manifest = models.Manifest.objects.get(digest=manifest.digest)
         manifest.touch()
     ca = ContentArtifact(artifact=artifact,
                          content=manifest,
                          relative_path=manifest.digest)
     try:
         ca.save()
     except IntegrityError:
         ca = ContentArtifact.objects.get(content=manifest,
                                          relative_path=manifest.digest)
         if not ca.artifact:
             ca.artifact = artifact
             ca.save(update_fields=["artifact"])
     return manifest
Example #4
0
    def next_group(self, additions):
        """
        Generator of ExampleContent, ContentArtifacts, and RemoteArtifacts.

        This generator is responsible for creating all the models needed to create ExampleContent in
        Pulp. The ExampleContent object is stored in a dictionary so it can be referenced after
        downloads complete. This generator emits a
        :class:`pulpcore.plugin.download.asyncio.group.Group`.
        """
        parsed_url = urlparse(self.feed_url)
        root_dir = os.path.dirname(parsed_url.path)
        for entry in self.read_manifest():
            key = Key(path=entry['path'], digest=entry['digest'])
            if key in additions:
                path = os.path.join(root_dir, entry['path'])
                url = urlunparse(parsed_url._replace(path=path))
                example_content = ExampleContent(path=entry['path'], digest=entry['digest'])
                content_id = tuple(getattr(example_content, f) for f in
                                   example_content.natural_key_fields())
                self.content_dict[content_id] = example_content
                # The content is set on the content_artifact right before writing to the
                # database. This helps deal with race conditions when saving Content.
                content_artifact = ContentArtifact(relative_path=entry['path'])
                remote_artifacts = [RemoteArtifact(url=url,
                                                   importer=self,
                                                   sha256=entry['digest'],
                                                   size=entry['size'],
                                                   content_artifact=content_artifact)]
                yield Group(content_id, remote_artifacts)
Example #5
0
    async def run(self):
        """
        The coroutine for this stage.

        Returns:
            The coroutine for this stage.
        """
        async for batch in self.batches():
            content_artifact_bulk = []
            with transaction.atomic():
                await self._pre_save(batch)
                for d_content in batch:
                    if d_content.content.pk is None:
                        try:
                            with transaction.atomic():
                                d_content.content.save()
                        except IntegrityError:
                            d_content.content = \
                                d_content.content.__class__.objects.get(
                                    d_content.content.q())
                            continue
                        for d_artifact in d_content.d_artifacts:
                            content_artifact = ContentArtifact(
                                content=d_content.content,
                                artifact=d_artifact.artifact,
                                relative_path=d_artifact.relative_path)
                            content_artifact_bulk.append(content_artifact)
                ContentArtifact.objects.bulk_get_or_create(
                    content_artifact_bulk)
                await self._post_save(batch)
            for declarative_content in batch:
                await self.put(declarative_content)
Example #6
0
def get_or_create_blob(layer_json, manifest, path):
    """
    Creates Blob from json snippet of manifest.json

    Args:
        layer_json (json): json
        manifest (class:`pulp_container.app.models.Manifest`): The manifest
        path (str): Path of the directory that contains layer

    Returns:
        class:`pulp_container.app.models.Blob`

    """
    try:
        blob = Blob.objects.get(digest=layer_json["digest"])
    except Blob.DoesNotExist:
        layer_file_name = "{}{}".format(path, layer_json["digest"][7:])
        layer_artifact = Artifact.init_and_validate(layer_file_name)
        layer_artifact.save()
        blob = Blob(digest=layer_json["digest"],
                    media_type=layer_json["mediaType"])
        blob.save()
        ContentArtifact(artifact=layer_artifact,
                        content=blob,
                        relative_path=layer_json["digest"]).save()
    if blob.media_type != MEDIA_TYPE.CONFIG_BLOB_OCI:
        BlobManifest(manifest=manifest, manifest_blob=blob).save()
    return blob
Example #7
0
 def artifact(self, artifact):
     if self.pk:
         ca = ContentArtifact(artifact=artifact,
                              content=self,
                              relative_path="{}/{}/{}.tar.gz".format(
                                  self.role.namespace, self.role.name,
                                  self.version))
         ca.save()
Example #8
0
    def put(self, request, path, pk=None):
        """
        Responds with the actual manifest
        """
        _, repository = self.get_dr_push(request, path)
        # iterate over all the layers and create
        chunk = request.META["wsgi.input"]
        artifact = self.receive_artifact(chunk)
        with storage.open(artifact.file.name) as artifact_file:
            raw_data = artifact_file.read()
        content_data = json.loads(raw_data)
        config_layer = content_data.get("config")
        config_blob = models.Blob.objects.get(
            digest=config_layer.get("digest"))

        manifest = models.Manifest(
            digest="sha256:{id}".format(id=artifact.sha256),
            schema_version=2,
            media_type=request.content_type,
            config_blob=config_blob,
        )
        try:
            manifest.save()
        except IntegrityError:
            manifest = models.Manifest.objects.get(digest=manifest.digest)
        ca = ContentArtifact(artifact=artifact,
                             content=manifest,
                             relative_path=manifest.digest)
        try:
            ca.save()
        except IntegrityError:
            pass
        layers = content_data.get("layers")
        blobs = []
        for layer in layers:
            blobs.append(layer.get("digest"))
        blobs_qs = models.Blob.objects.filter(digest__in=blobs)
        thru = []
        for blob in blobs_qs:
            thru.append(
                models.BlobManifest(manifest=manifest, manifest_blob=blob))
        models.BlobManifest.objects.bulk_create(objs=thru,
                                                ignore_conflicts=True,
                                                batch_size=1000)
        tag = models.Tag(name=pk, tagged_manifest=manifest)
        try:
            tag.save()
        except IntegrityError:
            pass
        with repository.new_version() as new_version:
            new_version.add_content(
                models.Manifest.objects.filter(digest=manifest.digest))
            new_version.remove_content(
                models.Tag.objects.filter(name=tag.name))
            new_version.add_content(
                models.Tag.objects.filter(name=tag.name,
                                          tagged_manifest=manifest))
        return ManifestResponse(manifest, path, request, status=201)
Example #9
0
def add_image_from_directory_to_repository(path, repository, tag):
    """
    Creates a Manifest and all blobs from a directory with OCI image

    Args:
        path (str): Path to directory with the OCI image
        repository (class:`pulpcore.plugin.models.Repository`): The destination repository
        tag (str): Tag name for the new image in the repository

    Returns:
        A class:`pulpcore.plugin.models.RepositoryVersion` that contains the new OCI container
        image and tag.

    """
    manifest_path = "{}manifest.json".format(path)
    manifest_artifact = Artifact.init_and_validate(manifest_path)
    manifest_artifact.save()
    manifest_digest = "sha256:{}".format(manifest_artifact.sha256)
    manifest = Manifest(digest=manifest_digest,
                        schema_version=2,
                        media_type=MEDIA_TYPE.MANIFEST_OCI)
    manifest.save()
    ContentArtifact(artifact=manifest_artifact,
                    content=manifest,
                    relative_path=manifest_digest).save()
    tag = Tag(name=tag, tagged_manifest=manifest)
    tag.save()
    ContentArtifact(artifact=manifest_artifact,
                    content=tag,
                    relative_path=tag.name).save()
    with repository.new_version() as new_repo_version:
        new_repo_version.add_content(Manifest.objects.filter(pk=manifest.pk))
        new_repo_version.add_content(Tag.objects.filter(pk=tag.pk))
        with open(manifest_artifact.file.path, "r") as manifest_file:
            manifest_json = json.load(manifest_file)
            config_blob = get_or_create_blob(manifest_json["config"], manifest,
                                             path)
            manifest.config_blob = config_blob
            manifest.save()
            new_repo_version.add_content(
                Blob.objects.filter(pk=config_blob.pk))
            for layer in manifest_json["layers"]:
                blob = get_or_create_blob(layer, manifest, path)
                new_repo_version.add_content(Blob.objects.filter(pk=blob.pk))
    return new_repo_version
Example #10
0
 def artifact(self, artifact):
     """
     Set the artifact for this FileContent.
     """
     if self.pk:
         ca = ContentArtifact(artifact=artifact,
                              content=self,
                              relative_path=self.relative_path)
         ca.save()
Example #11
0
 def artifact(self, artifact):
     """
     Set the artifact for this Ansible Role version.
     """
     if self.pk:
         ca = ContentArtifact(
             artifact=artifact,
             content=self,
             relative_path="{namespace}/{name}/{version}.tar.gz".format(
                 namespace=self.role.namespace,
                 name=self.role.name,
                 version=self.version))
         ca.save()
Example #12
0
    def save(self):
        """
        Update the DB:
         - Create (or fetch) the Artifact.
         - Create (or fetch) the ContentArtifact.
         - Create (or update) the RemoteArtifact.
        """
        if self._stored_model:
            try:
                with transaction.atomic():
                    self._stored_model.save()
            except IntegrityError:
                q = self.artifact_q()
                self._stored_model = Artifact.objects.get(q)

        try:
            with transaction.atomic():
                content_artifact = ContentArtifact(
                    relative_path=self.relative_path,
                    content=self.content.stored_model,
                    artifact=self._stored_model)
                content_artifact.save()
        except IntegrityError:
            content_artifact = ContentArtifact.objects.get(
                relative_path=self.relative_path,
                content=self.content.stored_model)
            if self._stored_model:
                content_artifact.artifact = self._stored_model
                content_artifact.save()

        digests = {f: getattr(self._model, f) for f in Artifact.DIGEST_FIELDS}

        try:
            with transaction.atomic():
                remote_artifact = RemoteArtifact(
                    url=self.url,
                    remote=self.remote,
                    content_artifact=content_artifact,
                    size=self._model.size,
                    **digests)
                remote_artifact.save()
        except IntegrityError:
            q_set = RemoteArtifact.objects.filter(
                remote=self.remote,
                content_artifact=content_artifact)
            q_set.update(
                url=self.url,
                size=self._model.size,
                **digests)
Example #13
0
    def put(self, request, path, pk=None):
        """
        Create a blob from uploaded chunks.
        """
        _, repository = self.get_dr_push(request, path)

        digest = request.query_params["digest"]
        upload = models.Upload.objects.get(pk=pk, repository=repository)
        chunks = UploadChunk.objects.filter(upload=upload).order_by("offset")

        with NamedTemporaryFile("ab") as temp_file:
            for chunk in chunks:
                temp_file.write(chunk.file.read())
            temp_file.flush()

            uploaded_file = PulpTemporaryUploadedFile.from_file(
                File(open(temp_file.name, "rb")))

        if uploaded_file.hashers["sha256"].hexdigest() == digest[len("sha256:"
                                                                     ):]:
            try:
                artifact = Artifact.init_and_validate(uploaded_file)
                artifact.save()
            except IntegrityError:
                artifact = Artifact.objects.get(sha256=artifact.sha256)
            try:
                blob = models.Blob(digest=digest,
                                   media_type=models.MEDIA_TYPE.REGULAR_BLOB)
                blob.save()
            except IntegrityError:
                blob = models.Blob.objects.get(digest=digest)
            try:
                blob_artifact = ContentArtifact(artifact=artifact,
                                                content=blob,
                                                relative_path=digest)
                blob_artifact.save()
            except IntegrityError:
                pass

            with repository.new_version() as new_version:
                new_version.add_content(models.Blob.objects.filter(pk=blob.pk))

            upload.delete()

            return BlobResponse(blob, path, 201, request)
        else:
            raise Exception("The digest did not match")
Example #14
0
 def setUp(self):
     """Setup database fixtures."""
     self.package1 = Package(
         package_name="aegir",
         version="0.1-edda0",
         architecture="sea",
         maintainer="Utgardloki",
         description="A sea jötunn associated with the ocean.",
     )
     self.package1.save()
     self.artifact1 = Artifact(
         size=42,
         md5="aabb",
         sha1="ccdd",
         sha256="eeff",
         file=SimpleUploadedFile("test_filename", b"test content"),
     )
     self.artifact1.save()
     ContentArtifact(artifact=self.artifact1, content=self.package1).save()
Example #15
0
 def setUp(self):
     """Setup database fixtures."""
     self.package1 = Package(
         package_name='aegir',
         version='0.1-edda0',
         architecture='sea',
         maintainer='Utgardloki',
         description='A sea jötunn associated with the ocean.',
     )
     self.package1.save()
     self.artifact1 = Artifact(
         size=42,
         md5='aabb',
         sha1='ccdd',
         sha256='eeff',
         file=SimpleUploadedFile('test_filename', b'test content'),
     )
     self.artifact1.save()
     ContentArtifact(artifact=self.artifact1, content=self.package1).save()
Example #16
0
    async def run(self):
        """
        The coroutine for this stage.

        Returns:
            The coroutine for this stage.
        """
        async for batch in self.batches():
            content_artifact_bulk = []
            with transaction.atomic():
                await self._pre_save(batch)

                for d_content in batch:
                    # Are we saving to the database for the first time?
                    content_already_saved = not d_content.content._state.adding
                    if not content_already_saved:
                        try:
                            with transaction.atomic():
                                d_content.content.save()
                        except IntegrityError as e:
                            try:
                                d_content.content = d_content.content.__class__.objects.get(
                                    d_content.content.q())
                            except ObjectDoesNotExist:
                                raise e
                            continue
                        for d_artifact in d_content.d_artifacts:
                            if not d_artifact.artifact._state.adding:
                                artifact = d_artifact.artifact
                            else:
                                # set to None for on-demand synced artifacts
                                artifact = None
                            content_artifact = ContentArtifact(
                                content=d_content.content,
                                artifact=artifact,
                                relative_path=d_artifact.relative_path,
                            )
                            content_artifact_bulk.append(content_artifact)
                ContentArtifact.objects.bulk_get_or_create(
                    content_artifact_bulk)
                await self._post_save(batch)
            for declarative_content in batch:
                await self.put(declarative_content)
Example #17
0
    def put(self, request, path, pk=None):
        """Handles creation of Uploads."""
        _, repository = self.get_dr_push(request, path)

        digest = request.query_params["digest"]
        upload = models.Upload.objects.get(pk=pk, repository=repository)

        if upload.sha256 == digest[len("sha256:"):]:
            try:
                artifact = Artifact(
                    file=upload.file.name,
                    md5=upload.md5,
                    sha1=upload.sha1,
                    sha256=upload.sha256,
                    sha384=upload.sha384,
                    sha512=upload.sha512,
                    size=upload.file.size,
                )
                artifact.save()
            except IntegrityError:
                artifact = Artifact.objects.get(sha256=artifact.sha256)
            try:
                blob = models.Blob(digest=digest,
                                   media_type=models.MEDIA_TYPE.REGULAR_BLOB)
                blob.save()
            except IntegrityError:
                blob = models.Blob.objects.get(digest=digest)
            try:
                blob_artifact = ContentArtifact(artifact=artifact,
                                                content=blob,
                                                relative_path=digest)
                blob_artifact.save()
            except IntegrityError:
                pass

            with repository.new_version() as new_version:
                new_version.add_content(models.Blob.objects.filter(pk=blob.pk))

            upload.delete()

            return BlobResponse(blob, path, 201, request)
        else:
            raise Exception("The digest did not match")
Example #18
0
    def create_content_artifacts(self, dc):
        """
        Create ContentArtifacts to associate saved Content to saved Artifacts.

        Args:
            dc (class:`~pulpcore.plugin.stages.DeclarativeContent`): Object containing Content and
                                                                     Artifacts to relate.
        """
        for da in dc.d_artifacts:
            content_artifact = ContentArtifact(
                content=dc.content,
                artifact=da.artifact,
                relative_path=da.relative_path
            )
            try:
                content_artifact.save()
            except IntegrityError:
                content_artifact = ContentArtifact.objects.get(
                    content=dc.content,
                    artifact=da.artifact,
                    relative_path=da.relative_path
                )

            remote_artifact_data = {
                'url': da.url,
                'size': da.artifact.size,
                'md5': da.artifact.md5,
                'sha1': da.artifact.sha1,
                'sha224': da.artifact.sha224,
                'sha256': da.artifact.sha256,
                'sha384': da.artifact.sha384,
                'sha512': da.artifact.sha512,
                'remote': da.remote,
            }
            new_remote_artifact = RemoteArtifact(
                content_artifact=content_artifact, **remote_artifact_data
            )
            try:
                new_remote_artifact.save()
            except IntegrityError:
                pass
Example #19
0
    def create(self, validated_data):
        """
        Create a Package.

        Overriding default create() to deal with artifact properly.

        Args:
            validated_data (dict): Data used to create the Package

        Returns:
            models.Package: The created Package

        """
        artifact = validated_data.pop('artifact')

        package = Package.objects.create(**validated_data)
        ca = ContentArtifact(artifact=artifact,
                             content=package,
                             relative_path=package.filename)
        ca.save()

        return package
Example #20
0
    def put(self, request, path, pk=None):
        """
        Responds with the actual manifest
        """
        _, repository = self.get_dr_push(request, path)
        # iterate over all the layers and create
        chunk = request.META["wsgi.input"]
        artifact = self.receive_artifact(chunk)
        with storage.open(artifact.file.name) as artifact_file:
            raw_data = artifact_file.read()
        content_data = json.loads(raw_data)
        config_layer = content_data.get("config")
        config_blob = models.Blob.objects.get(digest=config_layer.get("digest"))

        manifest = models.Manifest(
            digest="sha256:{id}".format(id=artifact.sha256),
            schema_version=2,
            media_type=request.content_type,
            config_blob=config_blob,
        )
        try:
            manifest.save()
        except IntegrityError:
            manifest = models.Manifest.objects.get(digest=manifest.digest)
        ca = ContentArtifact(artifact=artifact, content=manifest, relative_path=manifest.digest)
        try:
            ca.save()
        except IntegrityError:
            pass
        layers = content_data.get("layers")
        blobs = []
        for layer in layers:
            blobs.append(layer.get("digest"))
        blobs_qs = models.Blob.objects.filter(digest__in=blobs)
        thru = []
        for blob in blobs_qs:
            thru.append(models.BlobManifest(manifest=manifest, manifest_blob=blob))
        models.BlobManifest.objects.bulk_create(objs=thru, ignore_conflicts=True, batch_size=1000)
        tag = models.Tag(name=pk, tagged_manifest=manifest)
        try:
            tag.save()
        except IntegrityError:
            tag = models.Tag.objects.get(name=tag.name, tagged_manifest=manifest)

        tags_to_remove = models.Tag.objects.filter(
            pk__in=repository.latest_version().content.all(), name=tag
        ).exclude(tagged_manifest=manifest)
        dispatched_task = dispatch(
            add_and_remove,
            [repository],
            kwargs={
                "repository_pk": str(repository.pk),
                "add_content_units": [str(tag.pk), str(manifest.pk)],
                "remove_content_units": [str(pk) for pk in tags_to_remove.values_list("pk")],
            },
        )

        # Wait a small amount of time
        for dummy in range(3):
            time.sleep(1)
            task = Task.objects.get(pk=dispatched_task.pk)
            if task.state == "completed":
                task.delete()
                return ManifestResponse(manifest, path, request, status=201)
            elif task.state in ["waiting", "running"]:
                continue
            else:
                error = task.error
                task.delete()
                raise Exception(str(error))
        raise Throttled()
Example #21
0
    def put(self, request, path, pk=None):
        """
        Create a blob from uploaded chunks.
        """
        _, repository = self.get_dr_push(request, path)

        digest = request.query_params["digest"]
        # Try to see if the client came back after we told it to backoff with the ``Throttled``
        # exception. In that case we answer based on the task state, or make it backoff again.
        # This mechanism seems to work with podman but not with docker. However we let the task run
        # anyway, since all clients will look with a HEAD request before attemting to upload a blob
        # again.
        try:
            upload = models.Upload.objects.get(pk=pk, repository=repository)
        except models.Upload.DoesNotExist as e_upload:
            # Upload has been deleted => task has started or even finished
            try:
                task = Task.objects.filter(
                    name__endswith="add_and_remove",
                    reserved_resources_record__resource=f"upload:{pk}",
                ).last()
            except Task.DoesNotExist:
                # No upload and no task for it => the upload probably never existed
                # return 404
                raise e_upload

            if task.state == "completed":
                task.delete()
                blob = models.Blob.objects.get(digest=digest)
                return BlobResponse(blob, path, 201, request)
            elif task.state in ["waiting", "running"]:
                raise Throttled()
            else:
                error = task.error
                task.delete()
                raise Exception(str(error))

        chunks = UploadChunk.objects.filter(upload=upload).order_by("offset")

        with NamedTemporaryFile("ab") as temp_file:
            for chunk in chunks:
                temp_file.write(chunk.file.read())
            temp_file.flush()

            uploaded_file = PulpTemporaryUploadedFile.from_file(File(open(temp_file.name, "rb")))

        if uploaded_file.hashers["sha256"].hexdigest() == digest[len("sha256:") :]:
            try:
                artifact = Artifact.init_and_validate(uploaded_file)
                artifact.save()
            except IntegrityError:
                artifact = Artifact.objects.get(sha256=artifact.sha256)
            try:
                blob = models.Blob(digest=digest, media_type=models.MEDIA_TYPE.REGULAR_BLOB)
                blob.save()
            except IntegrityError:
                blob = models.Blob.objects.get(digest=digest)
            try:
                blob_artifact = ContentArtifact(
                    artifact=artifact, content=blob, relative_path=digest
                )
                blob_artifact.save()
            except IntegrityError:
                pass

            upload.delete()

            dispatched_task = dispatch(
                add_and_remove,
                [f"upload:{pk}", repository],
                kwargs={
                    "repository_pk": str(repository.pk),
                    "add_content_units": [str(blob.pk)],
                    "remove_content_units": [],
                },
            )

            # Wait a small amount of time
            for dummy in range(3):
                time.sleep(1)
                task = Task.objects.get(pk=dispatched_task.pk)
                if task.state == "completed":
                    task.delete()
                    return BlobResponse(blob, path, 201, request)
                elif task.state in ["waiting", "running"]:
                    continue
                else:
                    error = task.error
                    task.delete()
                    raise Exception(str(error))
            raise Throttled()
        else:
            raise Exception("The digest did not match")
Example #22
0
 def artifact(self, artifact):
     if self.pk:
         ca = ContentArtifact(artifact=artifact,
                              content=self,
                              relative_path=self.relative_path)
         ca.save()
Example #23
0
    async def __call__(self, in_q, out_q):
        """
        The coroutine for this stage.

        Args:
            in_q (:class:`asyncio.Queue`): The queue to receive
                :class:`~pulpcore.plugin.stages.DeclarativeContent` objects from.
            out_q (:class:`asyncio.Queue`): The queue to put
                :class:`~pulpcore.plugin.stages.DeclarativeContent` into.

        Returns:
            The coroutine for this stage.
        """
        batch = []
        shutdown = False
        while True:
            try:
                declarative_content = in_q.get_nowait()
            except asyncio.QueueEmpty:
                if not batch and not shutdown:
                    declarative_content = await in_q.get()
                    batch.append(declarative_content)
                    continue
            else:
                batch.append(declarative_content)
                continue

            content_artifact_bulk = []
            remote_artifact_bulk = []
            remote_artifact_map = {}

            with transaction.atomic():
                for declarative_content in batch:
                    if declarative_content is None:
                        shutdown = True
                        continue
                    if declarative_content.content.pk is None:
                        declarative_content.content.save()
                        for declarative_artifact in declarative_content.d_artifacts:
                            content_artifact = ContentArtifact(
                                content=declarative_content.content,
                                artifact=declarative_artifact.artifact,
                                relative_path=declarative_artifact.
                                relative_path)
                            content_artifact_bulk.append(content_artifact)
                            remote_artifact_data = {
                                'url': declarative_artifact.url,
                                'size': declarative_artifact.artifact.size,
                                'md5': declarative_artifact.artifact.md5,
                                'sha1': declarative_artifact.artifact.sha1,
                                'sha224': declarative_artifact.artifact.sha224,
                                'sha256': declarative_artifact.artifact.sha256,
                                'sha384': declarative_artifact.artifact.sha384,
                                'sha512': declarative_artifact.artifact.sha512,
                                'remote': declarative_artifact.remote,
                            }
                            rel_path = declarative_artifact.relative_path
                            content_key = str(
                                content_artifact.content.pk) + rel_path
                            remote_artifact_map[
                                content_key] = remote_artifact_data

                for content_artifact in ContentArtifact.objects.bulk_create(
                        content_artifact_bulk):
                    rel_path = content_artifact.relative_path
                    content_key = str(content_artifact.content.pk) + rel_path
                    remote_artifact_data = remote_artifact_map.pop(content_key)
                    new_remote_artifact = RemoteArtifact(
                        content_artifact=content_artifact,
                        **remote_artifact_data)
                    remote_artifact_bulk.append(new_remote_artifact)

                RemoteArtifact.objects.bulk_create(remote_artifact_bulk)

            for declarative_content in batch:
                if declarative_content is None:
                    continue
                await out_q.put(declarative_content)
            if shutdown:
                break
            batch = []
        await out_q.put(None)
Example #24
0
            def process_batch():
                content_artifact_bulk = []
                to_update_ca_query = ContentArtifact.objects.none()
                to_update_ca_bulk = []
                to_update_ca_artifact = {}
                with transaction.atomic():
                    self._pre_save(batch)
                    # Process the batch in dc.content.natural_keys order.
                    # This prevents deadlocks when we're processing the same/similar content
                    # in concurrent workers.
                    batch.sort(key=lambda x: "".join(
                        map(str, x.content.natural_key())))
                    for d_content in batch:
                        # Are we saving to the database for the first time?
                        content_already_saved = not d_content.content._state.adding
                        if not content_already_saved:
                            try:
                                with transaction.atomic():
                                    d_content.content.save()
                            except IntegrityError as e:
                                try:
                                    d_content.content = d_content.content.__class__.objects.get(
                                        d_content.content.q())
                                except ObjectDoesNotExist:
                                    raise e
                            else:
                                for d_artifact in d_content.d_artifacts:
                                    if not d_artifact.artifact._state.adding:
                                        artifact = d_artifact.artifact
                                    else:
                                        # set to None for on-demand synced artifacts
                                        artifact = None
                                    content_artifact = ContentArtifact(
                                        content=d_content.content,
                                        artifact=artifact,
                                        relative_path=d_artifact.relative_path,
                                    )
                                    content_artifact_bulk.append(
                                        content_artifact)
                                continue
                        # When the Content already exists, check if ContentArtifacts need to be
                        # updated
                        for d_artifact in d_content.d_artifacts:
                            if not d_artifact.artifact._state.adding:
                                # the artifact is already present in the database; update references
                                # Creating one large query and one large dictionary
                                to_update_ca_query |= ContentArtifact.objects.filter(
                                    content=d_content.content,
                                    relative_path=d_artifact.relative_path,
                                )
                                key = (d_content.content.pk,
                                       d_artifact.relative_path)
                                to_update_ca_artifact[
                                    key] = d_artifact.artifact
                    # Query db once and update each object in memory for bulk_update call
                    for content_artifact in to_update_ca_query.iterator():
                        key = (content_artifact.content_id,
                               content_artifact.relative_path)
                        # Maybe remove dict elements after to reduce memory?
                        content_artifact.artifact = to_update_ca_artifact[key]
                        to_update_ca_bulk.append(content_artifact)

                    # Sort the lists we're about to do bulk updates/creates on.
                    # We know to_update_ca_bulk entries already are in the DB, so we can enforce
                    # order just using pulp_id.
                    to_update_ca_bulk.sort(key=lambda x: x.pulp_id)
                    content_artifact_bulk.sort(
                        key=lambda x: ContentArtifact.sort_key(x))

                    ContentArtifact.objects.bulk_update(
                        to_update_ca_bulk, ["artifact"])
                    ContentArtifact.objects.bulk_get_or_create(
                        content_artifact_bulk)
                    self._post_save(batch)
Example #25
0
    async def __call__(self, in_q, out_q):
        """
        The coroutine for this stage.

        Args:
            in_q (:class:`asyncio.Queue`): The queue to receive
                :class:`~pulpcore.plugin.stages.DeclarativeContent` objects from.
            out_q (:class:`asyncio.Queue`): The queue to put
                :class:`~pulpcore.plugin.stages.DeclarativeContent` into.

        Returns:
            The coroutine for this stage.
        """
        async for batch in self.batches(in_q):
            content_artifact_bulk = []
            remote_artifact_bulk = []
            remote_artifact_map = {}

            with transaction.atomic():
                await self._pre_save(batch)
                for declarative_content in batch:
                    if declarative_content.content.pk is None:
                        declarative_content.content.save()
                        for declarative_artifact in declarative_content.d_artifacts:
                            content_artifact = ContentArtifact(
                                content=declarative_content.content,
                                artifact=declarative_artifact.artifact,
                                relative_path=declarative_artifact.
                                relative_path)
                            content_artifact_bulk.append(content_artifact)
                            remote_artifact_data = {
                                'url': declarative_artifact.url,
                                'size': declarative_artifact.artifact.size,
                                'md5': declarative_artifact.artifact.md5,
                                'sha1': declarative_artifact.artifact.sha1,
                                'sha224': declarative_artifact.artifact.sha224,
                                'sha256': declarative_artifact.artifact.sha256,
                                'sha384': declarative_artifact.artifact.sha384,
                                'sha512': declarative_artifact.artifact.sha512,
                                'remote': declarative_artifact.remote,
                            }
                            rel_path = declarative_artifact.relative_path
                            content_key = str(
                                content_artifact.content.pk) + rel_path
                            remote_artifact_map[
                                content_key] = remote_artifact_data

                for content_artifact in ContentArtifact.objects.bulk_create(
                        content_artifact_bulk):
                    rel_path = content_artifact.relative_path
                    content_key = str(content_artifact.content.pk) + rel_path
                    remote_artifact_data = remote_artifact_map.pop(content_key)
                    new_remote_artifact = RemoteArtifact(
                        content_artifact=content_artifact,
                        **remote_artifact_data)
                    remote_artifact_bulk.append(new_remote_artifact)

                RemoteArtifact.objects.bulk_create(remote_artifact_bulk)
                await self._post_save(batch)

            for declarative_content in batch:
                await out_q.put(declarative_content)
        await out_q.put(None)