def post(self, request): """Upload an RPM package.""" serializer = OneShotUploadSerializer( data=request.data, context={'request': request}) serializer.is_valid(raise_exception=True) artifact = Artifact.init_and_validate(request.data['file']) if 'repository' in request.data: repository = serializer.validated_data['repository'] else: repository = None try: artifact.save() except IntegrityError: # if artifact already exists, let's use it artifact = Artifact.objects.get(sha256=artifact.sha256) async_result = enqueue_with_reservation( one_shot_upload, [artifact], kwargs={ 'artifact': artifact, 'repository': repository, }) return OperationPostponedResponse(async_result, request)
async def run(self): """ Build and emit `DeclarativeContent` from the Spec data. """ # Interpret policy to download Artifacts or not deferred_download = self.remote.policy != Remote.IMMEDIATE async with ProgressReport(message="Downloading Metadata") as progress: parsed_url = urlparse(self.remote.url) root_dir = parsed_url.path specs_path = os.path.join(root_dir, "specs.4.8.gz") specs_url = urlunparse(parsed_url._replace(path=specs_path)) downloader = self.remote.get_downloader(url=specs_url) result = await downloader.run() await progress.aincrement() async with ProgressReport(message="Parsing Metadata") as progress: for key in read_specs(result.path): relative_path = os.path.join( "gems", key.name + "-" + key.version + ".gem") path = os.path.join(root_dir, relative_path) url = urlunparse(parsed_url._replace(path=path)) spec_relative_path = os.path.join( "quick/Marshal.4.8", key.name + "-" + key.version + ".gemspec.rz") spec_path = os.path.join(root_dir, spec_relative_path) spec_url = urlunparse(parsed_url._replace(path=spec_path)) gem = GemContent(name=key.name, version=key.version) da_gem = DeclarativeArtifact( artifact=Artifact(), url=url, relative_path=relative_path, remote=self.remote, deferred_download=deferred_download, ) da_spec = DeclarativeArtifact( artifact=Artifact(), url=spec_url, relative_path=spec_relative_path, remote=self.remote, deferred_download=deferred_download, ) dc = DeclarativeContent(content=gem, d_artifacts=[da_gem, da_spec]) await progress.aincrement() await self.put(dc)
async def _read_package_index(self, package_index): """ Parse a package index file of apt Repositories. Put DeclarativeContent in the queue accordingly. Args: package_index: file object containing package paragraphs """ # Interpret policy to download Artifacts or not deferred_download = self.remote.policy != Remote.IMMEDIATE for package_paragraph in deb822.Packages.iter_paragraphs(package_index): try: package_relpath = package_paragraph["Filename"] package_sha256 = package_paragraph["sha256"] if package_relpath.endswith(".deb"): package_class = Package package_serializer_class = PackageSerializer elif package_relpath.endswith(".udeb"): package_class = InstallerPackage package_serializer_class = InstallerPackageSerializer try: package_content_unit = package_class.objects.get( sha256=package_sha256 ) except ObjectDoesNotExist: log.debug( "Downloading package {}".format(package_paragraph["Package"]) ) package_dict = package_class.from822(package_paragraph) package_dict["relative_path"] = package_relpath package_dict["sha256"] = package_sha256 package_serializer = package_serializer_class( data=package_dict, partial=True ) package_serializer.is_valid(raise_exception=True) package_content_unit = package_class( **package_serializer.validated_data ) package_path = os.path.join(self.parsed_url.path, package_relpath) package_artifact = Artifact(**_get_checksums(package_paragraph)) package_da = DeclarativeArtifact( artifact=package_artifact, url=urlunparse(self.parsed_url._replace(path=package_path)), relative_path=package_relpath, remote=self.remote, deferred_download=deferred_download, ) package_dc = DeclarativeContent( content=package_content_unit, d_artifacts=[package_da] ) yield package_dc except KeyError: log.warning( "Ignoring invalid package paragraph. {}".format(package_paragraph) )
async def __call__(self, in_q, out_q): """ Build and emit `DeclarativeContent` from the remote metadata. Fetch and parse the remote metadata, use the Project Specifiers on the Remote to determine which Python packages should be synced. Args: in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue. out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to. """ ps = ProjectSpecifier.objects.filter(remote=self.remote) with ProgressBar(message='Fetching Project Metadata') as pb: # Group multiple specifiers to the same project together, so that we only have to fetch # the metadata once, and can re-use it if there are multiple specifiers. for name, project_specifiers in groupby_unsorted( ps, key=lambda x: x.name): # Fetch the metadata from PyPI pb.increment() try: metadata = await self.get_project_metadata(name) except ClientResponseError as e: # Project doesn't exist, log a message and move on log.info( _("HTTP 404 'Not Found' for url '{url}'\n" "Does project '{name}' exist on the remote repository?" ).format(url=e.request_info.url, name=name)) continue project_specifiers = list(project_specifiers) # Determine which packages from the project match the criteria in the specifiers packages = await self.get_relevant_packages( metadata=metadata, includes=[ specifier for specifier in project_specifiers if not specifier.exclude ], excludes=[ specifier for specifier in project_specifiers if specifier.exclude ], prereleases=self.remote.prereleases) # For each package, create Declarative objects to pass into the next stage for entry in packages: url = entry.pop('url') artifact = Artifact(sha256=entry.pop('sha256_digest')) package = PythonPackageContent(**entry) da = DeclarativeArtifact(artifact, url, entry['filename'], self.remote) dc = DeclarativeContent(content=package, d_artifacts=[da]) await out_q.put(dc) await out_q.put(None)
async def create_artifact(self, pulp2_storage_path, expected_digests={}, expected_size=None): """ Create a hard link if possible and then create an Artifact. If it's not possible to create a hard link, file is copied to the Pulp 3 storage. """ if not expected_digests.get('sha256'): # TODO: all checksums are calculated for the pulp 2 storage path, is it ok? artifact = Artifact.init_and_validate(pulp2_storage_path, size=expected_size) sha256digest = expected_digests.get('sha256') or artifact.sha256 pulp3_storage_relative_path = storage.get_artifact_path(sha256digest) pulp3_storage_path = os.path.join(settings.MEDIA_ROOT, pulp3_storage_relative_path) os.makedirs(os.path.dirname(pulp3_storage_path), exist_ok=True) is_copied = False try: os.link(pulp2_storage_path, pulp3_storage_path) except FileExistsError: pass except OSError: _logger.debug('Hard link cannot be created, file will be copied.') shutil.copy2(pulp2_storage_path, pulp3_storage_path) is_copied = True expected_digests = {'sha256': sha256digest} if is_copied: # recalculate checksums to ensure that after being copied a file is still fine artifact = Artifact.init_and_validate( file=pulp3_storage_path, expected_digests=expected_digests, expected_size=expected_size) else: # a hard link has been created or a file has already been in the pulp 3 storage, so # artifact's path can be just updated and no checksum recalculation is needed. artifact.file = pulp3_storage_path return artifact
def downloaded(self, downloader): """ The artifact (file) has been downloaded. A new _stored_model is created (and assigned) for the downloaded file. Args: downloader (BaseDownloader): The downloader that successfully completed. """ self._stored_model = Artifact(file=downloader.path, **downloader.artifact_attributes)
def create(self, request, path): """ Dispatch a Collection creation task. """ distro = get_object_or_404(AnsibleDistribution, base_path=path) serializer = CollectionOneShotSerializer(data=request.data, context={"request": request}) serializer.is_valid(raise_exception=True) expected_digests = {} if serializer.validated_data["sha256"]: expected_digests["sha256"] = serializer.validated_data["sha256"] try: artifact = Artifact.init_and_validate( serializer.validated_data["file"], expected_digests=expected_digests) except DigestValidationError: raise serializers.ValidationError( _("The provided sha256 value does not match the sha256 of the uploaded file." )) try: artifact.save() except IntegrityError: raise serializers.ValidationError(_("Artifact already exists.")) kwargs = {} if serializer.validated_data["expected_namespace"]: kwargs["expected_namespace"] = serializer.validated_data[ "expected_namespace"] if serializer.validated_data["expected_name"]: kwargs["expected_name"] = serializer.validated_data[ "expected_name"] if serializer.validated_data["expected_version"]: kwargs["expected_version"] = serializer.validated_data[ "expected_version"] async_result = self._dispatch_import_collection_task( artifact.pk, distro.repository, **kwargs) CollectionImport.objects.create(task_id=async_result.id) data = { "task": reverse( "collection-imports-detail", kwargs={ "path": path, "pk": async_result.id }, request=None, ) } return Response(data, status=http_status.HTTP_202_ACCEPTED)
def setUp(self): """Setup database fixtures.""" self.package1 = Package( package_name='aegir', version='0.1-edda0', architecture='sea', maintainer='Utgardloki', description='A sea jötunn associated with the ocean.', ) self.package1.save() self.artifact1 = Artifact( size=42, md5='aabb', sha1='ccdd', sha256='eeff', file=SimpleUploadedFile('test_filename', b'test content'), ) self.artifact1.save() ContentArtifact(artifact=self.artifact1, content=self.package1).save()
def setUp(self): """Setup database fixtures.""" self.package1 = Package( package_name="aegir", version="0.1-edda0", architecture="sea", maintainer="Utgardloki", description="A sea jötunn associated with the ocean.", ) self.package1.save() self.artifact1 = Artifact( size=42, md5="aabb", sha1="ccdd", sha256="eeff", file=SimpleUploadedFile("test_filename", b"test content"), ) self.artifact1.save() ContentArtifact(artifact=self.artifact1, content=self.package1).save()
def put(self, request, path, pk=None): """Handles creation of Uploads.""" _, repository = self.get_dr_push(request, path) digest = request.query_params["digest"] upload = models.Upload.objects.get(pk=pk, repository=repository) if upload.sha256 == digest[len("sha256:"):]: try: artifact = Artifact( file=upload.file.name, md5=upload.md5, sha1=upload.sha1, sha256=upload.sha256, sha384=upload.sha384, sha512=upload.sha512, size=upload.file.size, ) artifact.save() except IntegrityError: artifact = Artifact.objects.get(sha256=artifact.sha256) try: blob = models.Blob(digest=digest, media_type=models.MEDIA_TYPE.REGULAR_BLOB) blob.save() except IntegrityError: blob = models.Blob.objects.get(digest=digest) try: blob_artifact = ContentArtifact(artifact=artifact, content=blob, relative_path=digest) blob_artifact.save() except IntegrityError: pass with repository.new_version() as new_version: new_version.add_content(models.Blob.objects.filter(pk=blob.pk)) upload.delete() return BlobResponse(blob, path, 201, request) else: raise Exception("The digest did not match")
def _to_d_artifact(self, relative_path, data=None): artifact = Artifact(**_get_checksums(data or {})) url_path = os.path.join(self.parsed_url.path, relative_path) return DeclarativeFailsafeArtifact( artifact, urlunparse(self.parsed_url._replace(path=url_path)), relative_path, self.remote, deferred_download=False, )
def generate(): for key in delta.additions: relative_path = os.path.join('gems', key.name + '-' + key.version + '.gem') path = os.path.join(root_dir, relative_path) url = urlunparse(parsed_url._replace(path=path)) spec_relative_path = os.path.join( 'quick/Marshal.4.8', key.name + '-' + key.version + '.gemspec.rz') spec_path = os.path.join(root_dir, spec_relative_path) spec_url = urlunparse(parsed_url._replace(path=spec_path)) gem = GemContent(name=key.name, version=key.version) content = PendingContent( gem, artifacts={ PendingArtifact(Artifact(), url, relative_path), PendingArtifact(Artifact(), spec_url, spec_relative_path), }) yield content
def _update_content(self, content, downloads): """Update the content using the download results.""" for download_result in downloads: def url_lookup(x): return x.url == download_result.url d_artifact = list(filter(url_lookup, content.d_artifacts))[0] if d_artifact.artifact.pk is None: new_artifact = Artifact(**download_result.artifact_attributes, file=download_result.path) d_artifact.artifact = new_artifact
def receive_artifact(self, chunk): """Handles assembling of Manifest as it's being uploaded.""" with NamedTemporaryFile("ab") as temp_file: size = 0 hashers = {} for algorithm in Artifact.DIGEST_FIELDS: hashers[algorithm] = getattr(hashlib, algorithm)() while True: subchunk = chunk.read(2000000) if not subchunk: break temp_file.write(subchunk) size += len(subchunk) for algorithm in Artifact.DIGEST_FIELDS: hashers[algorithm].update(subchunk) temp_file.flush() digests = {} for algorithm in Artifact.DIGEST_FIELDS: digests[algorithm] = hashers[algorithm].hexdigest() artifact = Artifact(file=temp_file.name, size=size, **digests) try: artifact.save() except IntegrityError: artifact = Artifact.objects.get(sha256=artifact.sha256) artifact.touch() return artifact
async def __call__(self, in_q, out_q): """ Build and emit `DeclarativeContent` from the Spec data. Args: in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue. out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to """ with ProgressBar(message='Downloading Metadata') as pb: parsed_url = urlparse(self.remote.url) root_dir = parsed_url.path specs_path = os.path.join(root_dir, 'specs.4.8.gz') specs_url = urlunparse(parsed_url._replace(path=specs_path)) downloader = self.remote.get_downloader(url=specs_url) result = await downloader.run() pb.increment() with ProgressBar(message='Parsing Metadata') as pb: for key in read_specs(result.path): relative_path = os.path.join( 'gems', key.name + '-' + key.version + '.gem') path = os.path.join(root_dir, relative_path) url = urlunparse(parsed_url._replace(path=path)) spec_relative_path = os.path.join( 'quick/Marshal.4.8', key.name + '-' + key.version + '.gemspec.rz') spec_path = os.path.join(root_dir, spec_relative_path) spec_url = urlunparse(parsed_url._replace(path=spec_path)) gem = GemContent(name=key.name, version=key.version) da_gem = DeclarativeArtifact(Artifact(), url, relative_path, self.remote) da_spec = DeclarativeArtifact(Artifact(), spec_url, spec_relative_path, self.remote) dc = DeclarativeContent(content=gem, d_artifacts=[da_gem, da_spec]) pb.increment() await out_q.put(dc) await out_q.put(None)
def to_d_artifact(data): nonlocal release artifact = Artifact(**_get_checksums(data)) relpath = os.path.join(os.path.dirname(release.relative_path), data["Name"]) urlpath = os.path.join(self.parsed_url.path, relpath) return DeclarativeFailsafeArtifact( artifact, urlunparse(self.parsed_url._replace(path=urlpath)), relpath, self.remote, deferred_download=False, )
async def create_artifact(self, pulp2_storage_path, expected_digests={}, expected_size=None, downloaded=True): """ Create a hard link if possible and then create an Artifact. If it's not possible to create a hard link, file is copied to the Pulp 3 storage. For non-downloaded content, artifact with its expected checksum and size is created. """ if not downloaded: if not expected_digests: raise ValueError( _('No digest is provided for on_demand content creation. Pulp 2 ' 'storage path: {}'.format(pulp2_storage_path))) artifact = Artifact(**expected_digests) artifact.size = expected_size return artifact artifact = Artifact.init_and_validate( pulp2_storage_path, expected_digests=expected_digests, expected_size=expected_size) pulp3_storage_relative_path = storage.get_artifact_path( artifact.sha256) pulp3_storage_path = os.path.join(settings.MEDIA_ROOT, pulp3_storage_relative_path) os.makedirs(os.path.dirname(pulp3_storage_path), exist_ok=True) is_copied = False try: os.link(pulp2_storage_path, pulp3_storage_path) except FileExistsError: pass except OSError: _logger.debug( _('Hard link cannot be created, file will be copied.')) shutil.copy2(pulp2_storage_path, pulp3_storage_path) is_copied = True if not expected_digests: expected_digests = {'sha256': artifact.sha256} if is_copied: # recalculate checksums to ensure that after being copied a file is still fine artifact = Artifact.init_and_validate( file=pulp3_storage_path, expected_digests=expected_digests, expected_size=expected_size) else: # a hard link has been created or a file has already been in the pulp 3 storage, so # artifact's path can be just updated and no checksum recalculation is needed. artifact.file = pulp3_storage_path return artifact
def _save_artifact_blocking(artifact_attributes): saved_artifact = Artifact(**artifact_attributes) try: saved_artifact.save() except IntegrityError: del artifact_attributes["file"] saved_artifact = Artifact.objects.get(**artifact_attributes) saved_artifact.touch() return saved_artifact
async def run(self): """ Build and emit `DeclarativeContent` from the Manifest data. Args: in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue. out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to """ downloader = self.remote.get_downloader(url=self.remote.url) result = await downloader.run() data = [self.get_json_data(result.path)] dependencies = data[0].get("dependencies") to_download = [] if dependencies: to_download.extend(dependencies.items()) downloaded = [] while to_download: next_batch = [] for name, version in to_download: new_url = self.remote.url.replace(data[0]["name"], name) new_url = new_url.replace(data[0]["version"], version.replace("^", "")) downloader = self.remote.get_downloader(url=new_url) result = await downloader.run() new_data = self.get_json_data(result.path) data.append(new_data) next_batch.extend(new_data.get("dependencies", {}).items()) downloaded.append((name, version)) to_download.extend(next_batch) for dependency in downloaded: if dependency in to_download: to_download.remove(dependency) for pkg in data: package = Package(name=pkg["name"], version=pkg["version"]) artifact = Artifact() # make Artifact in memory-only url = pkg["dist"]["tarball"] da = DeclarativeArtifact( artifact, url, url.split("/")[-1], self.remote, deferred_download=self.deferred_download, ) dc = DeclarativeContent(content=package, d_artifacts=[da]) await self.put(dc)
def validate(self, data): """Validate the GemContent data.""" data = super().validate(data) if "file" in data: if "artifact" in data: raise ValidationError(_("Only one of 'file' and 'artifact' may be specified.")) data["artifact"] = Artifact.init_and_validate(data.pop("file")) elif "artifact" not in data: raise ValidationError(_("One of 'file' and 'artifact' must be specified.")) if "request" not in self.context: data = self.deferred_validate(data) return data
async def _parse_packages(self, packages): progress_data = { "message": "Parsed Packages", "code": "sync.parsing.packages", "total": len(packages), } with ProgressReport(**progress_data) as packages_pb: while True: try: (_, pkg) = packages.popitem(last=False) except KeyError: break package = Package(**Package.createrepo_to_dict(pkg)) del pkg artifact = Artifact(size=package.size_package) checksum_type = getattr(CHECKSUM_TYPES, package.checksum_type.upper()) setattr(artifact, checksum_type, package.pkgId) url = urlpath_sanitize(self.data.remote_url, package.location_href) filename = os.path.basename(package.location_href) da = DeclarativeArtifact( artifact=artifact, url=url, relative_path=filename, remote=self.remote, deferred_download=self.deferred_download, ) dc = DeclarativeContent(content=package, d_artifacts=[da]) dc.extra_data = defaultdict(list) # find if a package relates to a modulemd if dc.content.nevra in self.data.nevra_to_module.keys(): dc.content.is_modular = True for dc_modulemd in self.data.nevra_to_module[ dc.content.nevra]: dc.extra_data["modulemd_relation"].append(dc_modulemd) dc_modulemd.extra_data["package_relation"].append(dc) if dc.content.name in self.data.pkgname_to_groups.keys(): for dc_group in self.data.pkgname_to_groups[ dc.content.name]: dc.extra_data["group_relations"].append(dc_group) dc_group.extra_data["related_packages"].append(dc) packages_pb.increment() await self.put(dc)
def _create_snippet(snippet_string): """ Create snippet of modulemd[-defaults] as artifact. Args: snippet_string (string): Snippet with modulemd[-defaults] yaml Returns: Snippet as unsaved Artifact object """ tmp_file = tempfile.NamedTemporaryFile(dir=os.getcwd(), delete=False) with open(tmp_file.name, "w") as snippet: snippet.write(snippet_string) return Artifact.init_and_validate(tmp_file.name)
def post(self, request, path): """ Queues a task that creates a new Collection from an uploaded artifact. """ distro = get_object_or_404(AnsibleDistribution, base_path=path) serializer = GalaxyCollectionUploadSerializer( data=request.data, context={"request": request}) serializer.is_valid(raise_exception=True) artifact = Artifact.init_and_validate( serializer.validated_data["file"]) artifact.save() async_result = self._dispatch_import_collection_task( artifact.pk, distro.repository) return OperationPostponedResponse(async_result, request)
def put(self, request, path, pk=None): """ Create a blob from uploaded chunks. """ _, repository = self.get_dr_push(request, path) digest = request.query_params["digest"] upload = models.Upload.objects.get(pk=pk, repository=repository) chunks = UploadChunk.objects.filter(upload=upload).order_by("offset") with NamedTemporaryFile("ab") as temp_file: for chunk in chunks: temp_file.write(chunk.file.read()) temp_file.flush() uploaded_file = PulpTemporaryUploadedFile.from_file( File(open(temp_file.name, "rb"))) if uploaded_file.hashers["sha256"].hexdigest() == digest[len("sha256:" ):]: try: artifact = Artifact.init_and_validate(uploaded_file) artifact.save() except IntegrityError: artifact = Artifact.objects.get(sha256=artifact.sha256) try: blob = models.Blob(digest=digest, media_type=models.MEDIA_TYPE.REGULAR_BLOB) blob.save() except IntegrityError: blob = models.Blob.objects.get(digest=digest) try: blob_artifact = ContentArtifact(artifact=artifact, content=blob, relative_path=digest) blob_artifact.save() except IntegrityError: pass with repository.new_version() as new_version: new_version.add_content(models.Blob.objects.filter(pk=blob.pk)) upload.delete() return BlobResponse(blob, path, 201, request) else: raise Exception("The digest did not match")
def setUp(self): with open(self.artifact_path, 'w') as f: f.write('Temp Artifact File') self.artifact = Artifact.init_and_validate(self.artifact_path) self.artifact.save() collection = Collection.objects.create(namespace='my_ns', name='my_name') self.collection_version = CollectionVersion.objects.create( collection=collection) self.collection_version.save() content_artifact = ContentArtifact.objects.create( artifact=self.artifact, content=self.collection_version, ) content_artifact.save()
def generate(): for entry in manifest.read(): key = Key(relative_path=entry.relative_path, digest=entry.digest) if key not in delta.additions: continue path = os.path.join(root_dir, entry.relative_path) url = urlunparse(parsed_url._replace(path=path)) file = FileContent(relative_path=entry.relative_path, digest=entry.digest) artifact = Artifact(size=entry.size, sha256=entry.digest) content = PendingContent(file, artifacts={ PendingArtifact( artifact, url, entry.relative_path) }) yield content
async def _read_installer_file_index(self, installer_file_index): """ Parse an installer file index file of apt Repositories. Put DeclarativeContent in the queue accordingly. Args: installer_file_index: object of type :class:`InstallerFileIndex` """ # Interpret policy to download Artifacts or not deferred_download = self.remote.policy != Remote.IMMEDIATE file_list = defaultdict(dict) for content_artifact in installer_file_index.contentartifact_set.all(): algorithm = InstallerFileIndex.FILE_ALGORITHM.get( os.path.basename(content_artifact.relative_path) ) if not algorithm: continue for line in content_artifact.artifact.file: digest, filename = line.decode().strip().split(maxsplit=1) filename = os.path.normpath(filename) if ( filename in InstallerFileIndex.FILE_ALGORITHM ): # strangely they may appear here continue file_list[filename][algorithm] = digest for filename, digests in file_list.items(): relpath = os.path.join(installer_file_index.relative_path, filename) urlpath = os.path.join(self.parsed_url.path, relpath) content_unit = GenericContent( sha256=digests["sha256"], relative_path=relpath ) d_artifact = DeclarativeArtifact( artifact=Artifact(**digests), url=urlunparse(self.parsed_url._replace(path=urlpath)), relative_path=relpath, remote=self.remote, deferred_download=deferred_download, ) d_content = DeclarativeContent( content=content_unit, d_artifacts=[d_artifact] ) yield d_content
async def run(self): """ Build and emit `DeclarativeContent` from the ansible metadata. """ msg = "Parsing CollectionVersion Metadata" with ProgressReport(message=msg, code="parsing.metadata") as pb: async for metadata in self._fetch_collections(): url = metadata["download_url"] collection_version = CollectionVersion( namespace=metadata["namespace"]["name"], name=metadata["collection"]["name"], version=metadata["version"], ) info = metadata["metadata"] info.pop("tags") for attr_name, attr_value in info.items(): if attr_value is None or attr_name not in collection_version.__dict__: continue setattr(collection_version, attr_name, attr_value) artifact = metadata["artifact"] d_artifact = DeclarativeArtifact( artifact=Artifact(sha256=artifact["sha256"], size=artifact["size"]), url=url, relative_path=collection_version.relative_path, remote=self.remote, deferred_download=self.deferred_download, ) extradata = dict( docs_blob_url=metadata["docs_blob_url"], deprecated=metadata["deprecated"], ) d_content = DeclarativeContent( content=collection_version, d_artifacts=[d_artifact], extra_data=extradata, ) pb.increment() await self.put(d_content)
def add_image_from_directory_to_repository(path, repository, tag): """ Creates a Manifest and all blobs from a directory with OCI image Args: path (str): Path to directory with the OCI image repository (class:`pulpcore.plugin.models.Repository`): The destination repository tag (str): Tag name for the new image in the repository Returns: A class:`pulpcore.plugin.models.RepositoryVersion` that contains the new OCI container image and tag. """ manifest_path = "{}manifest.json".format(path) manifest_artifact = Artifact.init_and_validate(manifest_path) manifest_artifact.save() manifest_digest = "sha256:{}".format(manifest_artifact.sha256) manifest = Manifest(digest=manifest_digest, schema_version=2, media_type=MEDIA_TYPE.MANIFEST_OCI) manifest.save() ContentArtifact(artifact=manifest_artifact, content=manifest, relative_path=manifest_digest).save() tag = Tag(name=tag, tagged_manifest=manifest) tag.save() ContentArtifact(artifact=manifest_artifact, content=tag, relative_path=tag.name).save() with repository.new_version() as new_repo_version: new_repo_version.add_content(Manifest.objects.filter(pk=manifest.pk)) new_repo_version.add_content(Tag.objects.filter(pk=tag.pk)) with open(manifest_artifact.file.path, "r") as manifest_file: manifest_json = json.load(manifest_file) config_blob = get_or_create_blob(manifest_json["config"], manifest, path) manifest.config_blob = config_blob manifest.save() new_repo_version.add_content( Blob.objects.filter(pk=config_blob.pk)) for layer in manifest_json["layers"]: blob = get_or_create_blob(layer, manifest, path) new_repo_version.add_content(Blob.objects.filter(pk=blob.pk)) return new_repo_version
def import_collection_from_path(path): """ Import a single collection by path. This method will not fail if the Artifact already exists. Args: path: The path to the tarball to import. """ artifact = Artifact.init_and_validate(path) try: artifact.save() except IntegrityError: artifact = Artifact.objects.get(sha256=artifact.sha256) import_collection(artifact.pk)
async def run(self): """ Build and emit `DeclarativeContent` from the Manifest data. Args: in_q (asyncio.Queue): Unused because the first stage doesn't read from an input queue. out_q (asyncio.Queue): The out_q to send `DeclarativeContent` objects to """ remote_url = self.remote.url if not remote_url.endswith('/index.yaml'): remote_url += '/index.yaml' # TODO Skip reading generator to list? index_yaml = [] with ProgressReport(message="Downloading Index", code="downloading.metadata") as pb: downloader = self.remote.get_downloader(url=remote_url) result = await downloader.run() index_yaml = list(self.read_index_yaml(result.path)) pb.increment() with ProgressReport(message="Parsing Entries", code="parsing.metadata") as pb: pb.total = len(index_yaml) pb.save() for entry in index_yaml: content_entry = dict( filter(lambda e: e[0] not in ('url'), entry.items())) unit = ChartContent(**content_entry) artifact = Artifact(sha256=entry['digest']) da = DeclarativeArtifact( artifact, urljoin(remote_url, entry['url']), "{}-{}.tgz".format(entry['name'], entry['version']), self.remote, deferred_download=self.deferred_download, ) dc = DeclarativeContent(content=unit, d_artifacts=[da]) pb.increment() await self.put(dc)
def setUp(self): """Setup database fixtures.""" self.package1 = Package( package_name='aegir', version='0.1-edda0', architecture='sea', maintainer='Utgardloki', description='A sea jötunn associated with the ocean.', ) self.package1.save() self.artifact1 = Artifact( size=42, md5='aabb', sha1='ccdd', sha256='eeff', file=SimpleUploadedFile('test_filename', b'test content'), ) self.artifact1.save() ContentArtifact(artifact=self.artifact1, content=self.package1).save()
def create_manifest(self, list_dc, manifest_data): """ Create an Image Manifest from manifest data in a ManifestList. Args: list_dc (pulpcore.plugin.stages.DeclarativeContent): dc for a ManifestList manifest_data (dict): Data about a single new ImageManifest. """ digest = manifest_data["digest"] relative_url = "/v2/{name}/manifests/{digest}".format( name=self.remote.namespaced_upstream_name, digest=digest) manifest_url = urljoin(self.remote.url, relative_url) da = DeclarativeArtifact( artifact=Artifact(), url=manifest_url, relative_path=digest, remote=self.remote, extra_data={"headers": V2_ACCEPT_HEADERS}, ) manifest = Manifest( digest=manifest_data["digest"], schema_version=2 if manifest_data["mediaType"] in (MEDIA_TYPE.MANIFEST_V2, MEDIA_TYPE.MANIFEST_OCI) else 1, media_type=manifest_data["mediaType"], ) platform = {} p = manifest_data["platform"] platform["architecture"] = p["architecture"] platform["os"] = p["os"] platform["features"] = p.get("features", "") platform["variant"] = p.get("variant", "") platform["os.version"] = p.get("os.version", "") platform["os.features"] = p.get("os.features", "") man_dc = DeclarativeContent( content=manifest, d_artifacts=[da], extra_data={ "relation": list_dc, "platform": platform }, ) return man_dc
class PendingArtifact(Pending): """ Represents an artifact related to content that is contained within the remote repository. Attributes: url (str): The URL used to download the artifact. relative_path (str): The relative path within the content. content (PendingContent): The associated pending content. This is the reverse relationship. Examples: >>> >>> from pulpcore.plugin.models import Artifact >>> >>> model = Artifact(...) # DB model instance. >>> download = ... >>> ... >>> artifact = PendingArtifact(model, 'http://zoo.org/lion.rpm', 'lion.rpm') >>> """ __slots__ = ( 'url', 'relative_path', 'content', ) def __init__(self, model, url, relative_path, content=None): """ Args: model (pulpcore.plugin.models.Artifact): A pending artifact model. url (str): The URL used to download the artifact. relative_path (str): The relative path within the content. content (PendingContent): The associated pending content. This is the reverse relationship. """ super().__init__(model) self.url = url self.relative_path = relative_path self.content = content if content: content.artifacts.add(self) @property def model(self): """ The model getter. Returns: pulpcore.plugin.models.Artifact: The pending model. """ return self._model @property def stored_model(self): """ The stored model getter. Returns: pulpcore.plugin.models.Artifact: The stored model. """ return self._stored_model @stored_model.setter def stored_model(self, model): """ The stored model setter. Args: model (pulpcore.plugin.models.Artifact): The stored model. """ self._stored_model = model @property def changeset(self): """ The changeset getter. Returns: pulpcore.plugin.changeset.Changeset: The active changeset. """ return self.content.changeset @property def remote(self): """ The remote getter. Returns: pulpcore.plugin.models.Remote: A remote. """ return self.changeset.remote @property def downloader(self): """ A downloader used to download the artifact. The downloader may be a NopDownloader (no-operation) when: - The _stored_model is set to an model fetched from the DB. - The download policy is deferred. Returns: asyncio.Future: A download future based on a downloader. """ def done(task): try: task.result() except Exception: pass else: self.downloaded(downloader) if self._stored_model: downloader = NopDownloader() future = asyncio.ensure_future(downloader.run()) else: downloader = self.remote.get_downloader(self.url) future = asyncio.ensure_future(downloader.run()) future.add_done_callback(done) return future def downloaded(self, downloader): """ The artifact (file) has been downloaded. A new _stored_model is created (and assigned) for the downloaded file. Args: downloader (BaseDownloader): The downloader that successfully completed. """ self._stored_model = Artifact(file=downloader.path, **downloader.artifact_attributes) def artifact_q(self): """ Get a query for the actual artifact. Returns: django.db.models.Q: A query to get the actual artifact. """ q = Q(pk=None) for field in Artifact.RELIABLE_DIGEST_FIELDS: digest = getattr(self._model, field) if digest: q |= Q(**{field: digest}) return q def settle(self): """ Ensures that all prerequisite matters pertaining to adding the artifact to the DB have been settled: Notes: Called whenever an artifact has been processed. """ self._settled = True def save(self): """ Update the DB: - Create (or fetch) the Artifact. - Create (or fetch) the ContentArtifact. - Create (or update) the RemoteArtifact. """ if self._stored_model: try: with transaction.atomic(): self._stored_model.save() except IntegrityError: q = self.artifact_q() self._stored_model = Artifact.objects.get(q) try: with transaction.atomic(): content_artifact = ContentArtifact( relative_path=self.relative_path, content=self.content.stored_model, artifact=self._stored_model) content_artifact.save() except IntegrityError: content_artifact = ContentArtifact.objects.get( relative_path=self.relative_path, content=self.content.stored_model) if self._stored_model: content_artifact.artifact = self._stored_model content_artifact.save() digests = {f: getattr(self._model, f) for f in Artifact.DIGEST_FIELDS} try: with transaction.atomic(): remote_artifact = RemoteArtifact( url=self.url, remote=self.remote, content_artifact=content_artifact, size=self._model.size, **digests) remote_artifact.save() except IntegrityError: q_set = RemoteArtifact.objects.filter( remote=self.remote, content_artifact=content_artifact) q_set.update( url=self.url, size=self._model.size, **digests) def __hash__(self): return hash(self.relative_path)
class TestPackage(TestCase): """Test Package content type.""" PACKAGE_PARAGRAPH = 'Package: aegir\n' \ 'Version: 0.1-edda0\n' \ 'Architecture: sea\n' \ 'Maintainer: Utgardloki\n' \ 'Description: A sea jötunn associated with the ocean.\n' \ 'MD5sum: aabb\n' \ 'SHA1: ccdd\n' \ 'SHA256: eeff\n' \ 'Filename: pool/a/aegir/aegir_0.1-edda0_sea.deb\n' def setUp(self): """Setup database fixtures.""" self.package1 = Package( package_name='aegir', version='0.1-edda0', architecture='sea', maintainer='Utgardloki', description='A sea jötunn associated with the ocean.', ) self.package1.save() self.artifact1 = Artifact( size=42, md5='aabb', sha1='ccdd', sha256='eeff', file=SimpleUploadedFile('test_filename', b'test content'), ) self.artifact1.save() ContentArtifact(artifact=self.artifact1, content=self.package1).save() def test_str(self): """Test package str.""" self.assertEqual(str(self.package1), '<Package: aegir_0.1-edda0_sea>') def test_filename(self): """Test that the pool filename of a package is correct.""" self.assertEqual(self.package1.filename(), 'pool/a/aegir/aegir_0.1-edda0_sea.deb') def test_filename_with_component(self): """Test that the pool filename of a package with component is correct.""" self.assertEqual(self.package1.filename('joetunn'), 'pool/joetunn/a/aegir/aegir_0.1-edda0_sea.deb') def test_to822(self): """Test if package transforms correctly into 822dict.""" package_dict = self.package1.to822('joetunn') self.assertEqual(package_dict['package'], self.package1.package_name) self.assertEqual(package_dict['version'], self.package1.version) self.assertEqual(package_dict['architecture'], self.package1.architecture) self.assertEqual(package_dict['maintainer'], self.package1.maintainer) self.assertEqual(package_dict['description'], self.package1.description) self.assertEqual(package_dict['md5sum'], self.artifact1.md5) self.assertEqual(package_dict['sha1'], self.artifact1.sha1) self.assertEqual(package_dict['sha256'], self.artifact1.sha256) self.assertEqual(package_dict['filename'], self.package1.filename('joetunn')) def test_to822_dump(self): """Test dump to package index.""" self.assertEqual(self.package1.to822().dump(), self.PACKAGE_PARAGRAPH)
async def run(self): """ DockerFirstStage. """ future_manifests = [] tag_list = [] to_download = [] man_dcs = {} total_blobs = [] with ProgressBar(message='Downloading tag list', total=1) as pb: repo_name = self.remote.namespaced_upstream_name relative_url = '/v2/{name}/tags/list'.format(name=repo_name) tag_list_url = urljoin(self.remote.url, relative_url) list_downloader = self.remote.get_downloader(url=tag_list_url) await list_downloader.run(extra_data={'repo_name': repo_name}) with open(list_downloader.path) as tags_raw: tags_dict = json.loads(tags_raw.read()) tag_list = tags_dict['tags'] # check for the presence of the pagination link header link = list_downloader.response_headers.get('Link') await self.handle_pagination(link, repo_name, tag_list) whitelist_tags = self.remote.whitelist_tags if whitelist_tags: tag_list = list(set(tag_list) & set(whitelist_tags.split(','))) pb.increment() msg = 'Creating Download requests for v2 Tags' with ProgressBar(message=msg, total=len(tag_list)) as pb: for tag_name in tag_list: relative_url = '/v2/{name}/manifests/{tag}'.format( name=self.remote.namespaced_upstream_name, tag=tag_name, ) url = urljoin(self.remote.url, relative_url) downloader = self.remote.get_downloader(url=url) to_download.append(downloader.run(extra_data={'headers': V2_ACCEPT_HEADERS})) pb.increment() pb_parsed_tags = ProgressBar(message='Processing v2 Tags', state='running') pb_parsed_ml_tags = ProgressBar(message='Parsing Manifest List Tags', state='running') pb_parsed_m_tags = ProgressBar(message='Parsing Manifests Tags', state='running') global pb_parsed_blobs pb_parsed_blobs = ProgressBar(message='Parsing Blobs', state='running') pb_parsed_man = ProgressBar(message='Parsing Manifests', state='running') for download_tag in asyncio.as_completed(to_download): tag = await download_tag with open(tag.path) as content_file: raw = content_file.read() content_data = json.loads(raw) mediatype = content_data.get('mediaType') tag.artifact_attributes['file'] = tag.path saved_artifact = Artifact(**tag.artifact_attributes) try: saved_artifact.save() except IntegrityError: del tag.artifact_attributes['file'] saved_artifact = Artifact.objects.get(**tag.artifact_attributes) tag_dc = self.create_tag(mediatype, saved_artifact, tag.url) if type(tag_dc.content) is ManifestListTag: list_dc = self.create_tagged_manifest_list( tag_dc, content_data) await self.put(list_dc) pb_parsed_ml_tags.increment() tag_dc.extra_data['list_relation'] = list_dc for manifest_data in content_data.get('manifests'): man_dc = self.create_manifest(list_dc, manifest_data) future_manifests.append(man_dc.get_or_create_future()) man_dcs[man_dc.content.digest] = man_dc await self.put(man_dc) pb_parsed_man.increment() elif type(tag_dc.content) is ManifestTag: man_dc = self.create_tagged_manifest(tag_dc, content_data) await self.put(man_dc) pb_parsed_m_tags.increment() tag_dc.extra_data['man_relation'] = man_dc self.handle_blobs(man_dc, content_data, total_blobs) await self.put(tag_dc) pb_parsed_tags.increment() pb_parsed_tags.state = 'completed' pb_parsed_tags.total = pb_parsed_tags.done pb_parsed_tags.save() pb_parsed_ml_tags.state = 'completed' pb_parsed_ml_tags.total = pb_parsed_ml_tags.done pb_parsed_ml_tags.save() pb_parsed_m_tags.state = 'completed' pb_parsed_m_tags.total = pb_parsed_m_tags.done pb_parsed_m_tags.save() pb_parsed_man.state = 'completed' pb_parsed_man.total = pb_parsed_man.done pb_parsed_man.save() for manifest_future in asyncio.as_completed(future_manifests): man = await manifest_future with man._artifacts.get().file.open() as content_file: raw = content_file.read() content_data = json.loads(raw) man_dc = man_dcs[man.digest] self.handle_blobs(man_dc, content_data, total_blobs) for blob in total_blobs: await self.put(blob) pb_parsed_blobs.state = 'completed' pb_parsed_blobs.total = pb_parsed_blobs.done pb_parsed_blobs.save()