def create(self, request, importer_pk): """Import a Pulp export into Pulp.""" try: importer = PulpImporter.objects.get(pk=importer_pk) except PulpImporter.DoesNotExist: raise Http404 serializer = PulpImportSerializer(data=request.data, context={"request": request}) serializer.is_valid(raise_exception=True) path = serializer.validated_data.get("path") toc = serializer.validated_data.get("toc") task_group = TaskGroup.objects.create(description=f"Import of {path}") dispatch( pulp_import, exclusive_resources=[importer], task_group=task_group, kwargs={ "importer_pk": importer.pk, "path": path, "toc": toc }, ) return TaskGroupOperationResponse(task_group, request)
def delete(self, request, format=None): """ Cleans up all the Content and Artifact orphans in the system """ task = dispatch(orphan_cleanup) return OperationPostponedResponse(task, request)
def reclaim(self, request): """ Triggers an asynchronous space reclaim operation. """ serializer = ReclaimSpaceSerializer(data=request.data) serializer.is_valid(raise_exception=True) repos = serializer.validated_data.get("repo_hrefs", []) keeplist = serializer.validated_data.get("repo_versions_keeplist", []) reclaim_repo_pks = [] keeplist_rv_pks = [] for repo in repos: reclaim_repo_pks.append(repo.pk) for rv in keeplist: repos.append(rv.repository) keeplist_rv_pks.append(rv.pk) task = dispatch( reclaim_space, shared_resources=repos, kwargs={ "repo_pks": reclaim_repo_pks, "keeplist_rv_pks": keeplist_rv_pks, }, ) return OperationPostponedResponse(task, request)
def destroy(self, request, repository_pk, number): """ Queues a task to handle deletion of a RepositoryVersion """ version = self.get_object() task = dispatch(tasks.repository.delete_version, [version.repository], kwargs={"pk": version.pk}) return OperationPostponedResponse(task, request)
def modify(self, request, pk): """ Queues a task that creates a new RepositoryVersion by adding and removing content units """ add_content_units = {} remove_content_units = {} repository = self.get_object() serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) if "base_version" in request.data: base_version_pk = self.get_resource(request.data["base_version"], RepositoryVersion).pk else: base_version_pk = None if "add_content_units" in request.data: for url in request.data["add_content_units"]: add_content_units[NamedModelViewSet.extract_pk(url)] = url content_units_pks = set(add_content_units.keys()) existing_content_units = Content.objects.filter( pk__in=content_units_pks) existing_content_units.touch() self.verify_content_units(existing_content_units, add_content_units) add_content_units = list(add_content_units.keys()) if "remove_content_units" in request.data: if "*" in request.data["remove_content_units"]: remove_content_units = ["*"] else: for url in request.data["remove_content_units"]: remove_content_units[NamedModelViewSet.extract_pk( url)] = url content_units_pks = set(remove_content_units.keys()) existing_content_units = Content.objects.filter( pk__in=content_units_pks) self.verify_content_units(existing_content_units, remove_content_units) remove_content_units = list(remove_content_units.keys()) task = dispatch( tasks.repository.add_and_remove, exclusive_resources=[repository], kwargs={ "repository_pk": pk, "base_version_pk": base_version_pk, "add_content_units": add_content_units, "remove_content_units": remove_content_units, }, ) return OperationPostponedResponse(task, request)
def delete(self, request, format=None): """ Cleans up all the Content and Artifact orphans in the system """ deprecation_logger.warning( "The `DELETE /pulp/api/v3/orphans/` call is deprecated. Use" "`POST /pulp/api/v3/orphans/cleanup/` instead.") task = dispatch(orphan_cleanup) return OperationPostponedResponse(task, request)
def purge(self, request): """ Purge task-records for tasks in 'final' states. """ serializer = PurgeSerializer(data=request.data) serializer.is_valid(raise_exception=True) task = dispatch(purge, args=[ serializer.data["finished_before"], list(serializer.data["states"]) ]) return OperationPostponedResponse(task, request)
def post(self, request): """ Repair artifacts. """ serializer = RepairSerializer(data=request.data) serializer.is_valid() verify_checksums = serializer.validated_data["verify_checksums"] task = dispatch(repair_all_artifacts, [], args=[verify_checksums]) return OperationPostponedResponse(task, request)
def destroy(self, request, pk, **kwargs): """ Delete a model instance """ instance = self.get_object() serializer = self.get_serializer(instance) app_label = instance._meta.app_label task = dispatch( tasks.base.general_delete, exclusive_resources=self.async_reserved_resources(instance), args=(pk, app_label, serializer.__class__.__name__), ) return OperationPostponedResponse(task, request)
def cleanup(self, request): """ Triggers an asynchronous orphan cleanup operation. """ serializer = OrphansCleanupSerializer(data=request.data) serializer.is_valid(raise_exception=True) content_pks = serializer.validated_data.get("content_hrefs", None) task = dispatch(orphan_cleanup, [], kwargs={"content_pks": content_pks}) return OperationPostponedResponse(task, request)
def update(self, request, pk, **kwargs): partial = kwargs.pop("partial", False) instance = self.get_object() serializer = self.get_serializer(instance, data=request.data, partial=partial) serializer.is_valid(raise_exception=True) app_label = instance._meta.app_label task = dispatch( tasks.base.general_update, exclusive_resources=self.async_reserved_resources(instance), args=(pk, app_label, serializer.__class__.__name__), kwargs={"data": request.data, "partial": partial}, ) return OperationPostponedResponse(task, request)
def create(self, request, *args, **kwargs): """ Dispatches a task with reservation for creating an instance. """ serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) app_label = self.queryset.model._meta.app_label task = dispatch( tasks.base.general_create, exclusive_resources=self.async_reserved_resources(None), args=(app_label, serializer.__class__.__name__), kwargs={"data": request.data}, ) return OperationPostponedResponse(task, request)
def commit(self, request, pk): """ Queues a Task that creates an Artifact, and the Upload gets deleted and cannot be re-used. """ serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) sha256 = serializer.validated_data["sha256"] upload = self.get_object() task = dispatch(tasks.upload.commit, exclusive_resources=[upload], args=(upload.pk, sha256)) return OperationPostponedResponse(task, request)
def create(self, request, importer_pk): """Import a Pulp export into Pulp.""" try: importer = PulpImporter.objects.get(pk=importer_pk) except PulpImporter.DoesNotExist: raise Http404 serializer = PulpImportSerializer(data=request.data, context={"request": request}) serializer.is_valid(raise_exception=True) path = serializer.validated_data.get("path") toc = serializer.validated_data.get("toc") task = dispatch( pulp_import, [importer], kwargs={"importer_pk": importer.pk, "path": path, "toc": toc} ) return OperationPostponedResponse(task, request)
def repair(self, request, repository_pk, number): """ Queues a task to repair corrupted artifacts corresponding to a RepositoryVersion """ version = self.get_object() serializer = RepairSerializer(data=request.data) serializer.is_valid() verify_checksums = serializer.validated_data["verify_checksums"] task = dispatch( tasks.repository.repair_version, shared_resources=[version.repository], args=[version.pk, verify_checksums], ) return OperationPostponedResponse(task, request)
def destroy(self, request, pk, **kwargs): acs = self.get_object() reservations = [] instance_ids = [] for path in acs.paths.all(): if path.repository_id: instance_ids.append((str( path.repository_id), "core", "RepositorySerializer"), ) reservations.append(acs) instance_ids.append( (str(acs.pk), "core", "AlternateContentSourceSerializer"), ) async_result = dispatch(tasks.base.general_multi_delete, exclusive_resources=reservations, args=(instance_ids, )) return OperationPostponedResponse(async_result, request)
def modify(self, request, pk): """ Queues a task that creates a new RepositoryVersion by adding and removing content units """ add_content_units = [] remove_content_units = [] repository = self.get_object() serializer = self.get_serializer(data=request.data) serializer.is_valid(raise_exception=True) if "base_version" in request.data: base_version_pk = self.get_resource(request.data["base_version"], RepositoryVersion).pk else: base_version_pk = None if "add_content_units" in request.data: for url in request.data["add_content_units"]: content = self.get_resource(url, Content) try: content.touch() except DatabaseError: # content has since been removed. call get_url to raise an exception. content = self.get_resource(url, Content) add_content_units.append(content.pk) if "remove_content_units" in request.data: for url in request.data["remove_content_units"]: if url == "*": remove_content_units = [url] break else: content = self.get_resource(url, Content) remove_content_units.append(content.pk) task = dispatch( tasks.repository.add_and_remove, [repository], kwargs={ "repository_pk": pk, "base_version_pk": base_version_pk, "add_content_units": add_content_units, "remove_content_units": remove_content_units, }, ) return OperationPostponedResponse(task, request)
def pulp_import(importer_pk, path, toc): """ Import a Pulp export into Pulp. Args: importer_pk (str): Primary key of PulpImporter to do the import path (str): Path to the export to be imported """ def _compute_hash(filename): sha256_hash = hashlib.sha256() with open(filename, "rb") as f: # Read and update hash string value in blocks of 4K for byte_block in iter(lambda: f.read(4096), b""): sha256_hash.update(byte_block) return sha256_hash.hexdigest() def validate_toc(toc_filename): """ Check validity of table-of-contents file. table-of-contents must: * exist * be valid JSON * point to chunked-export-files that exist 'next to' the 'toc' file * point to chunks whose checksums match the checksums stored in the 'toc' file Args: toc_filename (str): The user-provided toc-file-path to be validated. Raises: ValidationError: If toc is not a valid JSON table-of-contents file, or when toc points to chunked-export-files that can't be found in the same directory as the toc-file, or the checksums of the chunks do not match the checksums stored in toc. """ with open(toc_filename) as json_file: # Valid JSON? the_toc = json.load(json_file) if not the_toc.get("files", None) or not the_toc.get("meta", None): raise ValidationError( _("Missing 'files' or 'meta' keys in table-of-contents!")) base_dir = os.path.dirname(toc_filename) # Points at chunks that exist? missing_files = [] for f in sorted(the_toc["files"].keys()): if not os.path.isfile(os.path.join(base_dir, f)): missing_files.append(f) if missing_files: raise ValidationError( _("Missing import-chunks named in table-of-contents: {}.". format(str(missing_files)))) errs = [] # validate the sha256 of the toc-entries # gather errors for reporting at the end chunks = sorted(the_toc["files"].keys()) data = dict(message="Validating Chunks", code="validate.chunks", total=len(chunks)) with ProgressReport(**data) as pb: for chunk in pb.iter(chunks): a_hash = _compute_hash(os.path.join(base_dir, chunk)) if not a_hash == the_toc["files"][chunk]: err_str = "File {} expected checksum : {}, computed checksum : {}".format( chunk, the_toc["files"][chunk], a_hash) errs.append(err_str) # if there are any errors, report and fail if errs: raise ValidationError( _("Import chunk hash mismatch: {}).").format(str(errs))) return the_toc def validate_and_assemble(toc_filename): """Validate checksums of, and reassemble, chunks in table-of-contents file.""" the_toc = validate_toc(toc_filename) toc_dir = os.path.dirname(toc_filename) result_file = os.path.join(toc_dir, the_toc["meta"]["file"]) # if we have only one entry in "files", it must be the full .tar.gz - return it if len(the_toc["files"]) == 1: return os.path.join(toc_dir, list(the_toc["files"].keys())[0]) # We have multiple chunks. # reassemble into one file 'next to' the toc and return the resulting full-path chunk_size = int(the_toc["meta"]["chunk_size"]) offset = 0 block_size = 1024 blocks_per_chunk = int(chunk_size / block_size) # sorting-by-filename is REALLY IMPORTANT here # keys are of the form <base-export-name>.00..<base-export-name>.NN, # and must be reassembled IN ORDER the_chunk_files = sorted(the_toc["files"].keys()) data = dict(message="Recombining Chunks", code="recombine.chunks", total=len(the_chunk_files)) with ProgressReport(**data) as pb: for chunk in pb.iter(the_chunk_files): # For each chunk, add it to the reconstituted tar.gz, picking up where the previous # chunk left off subprocess.run([ "dd", "if={}".format(os.path.join(toc_dir, chunk)), "of={}".format(result_file), "bs={}".format(str(block_size)), "seek={}".format(str(offset)), ], ) offset += blocks_per_chunk # To keep from taking up All The Disk, we delete each chunk after it has been added # to the recombined file. try: subprocess.run(["rm", "-f", os.path.join(toc_dir, chunk)]) except OSError: log.warning( _("Failed to remove chunk {} after recombining. Continuing." ).format(os.path.join(toc_dir, chunk)), exc_info=True, ) combined_hash = _compute_hash(result_file) if combined_hash != the_toc["meta"]["global_hash"]: raise ValidationError( _("Mismatch between combined .tar.gz checksum [{}] and originating [{}])." ).format(combined_hash, the_toc["meta"]["global_hash"])) # if we get this far, then: the chunk-files all existed, they all pass checksum validation, # and there exists a combined .tar.gz, which *also* passes checksum-validation. # Let the rest of the import process do its thing on the new combined-file. return result_file if toc: log.info(_("Validating TOC {}.").format(toc)) path = validate_and_assemble(toc) log.info(_("Importing {}.").format(path)) current_task = Task.current() importer = PulpImporter.objects.get(pk=importer_pk) the_import = PulpImport.objects.create(importer=importer, task=current_task, params={"path": path}) CreatedResource.objects.create(content_object=the_import) task_group = TaskGroup.objects.create(description=f"Import of {path}") Task.objects.filter(pk=current_task.pk).update(task_group=task_group) current_task.refresh_from_db() CreatedResource.objects.create(content_object=task_group) with tempfile.TemporaryDirectory() as temp_dir: with tarfile.open(path, "r:gz") as tar: tar.extractall(path=temp_dir) # Check version info with open(os.path.join(temp_dir, VERSIONS_FILE)) as version_file: version_json = json.load(version_file) _check_versions(version_json) # Artifacts ar_result = _import_file(os.path.join(temp_dir, ARTIFACT_FILE), ArtifactResource) data = dict(message="Importing Artifacts", code="import.artifacts", total=len(ar_result.rows)) with ProgressReport(**data) as pb: for row in pb.iter(ar_result.rows): artifact = Artifact.objects.get(pk=row.object_id) base_path = os.path.join("artifact", artifact.sha256[0:2], artifact.sha256[2:]) src = os.path.join(temp_dir, base_path) dest = os.path.join(settings.MEDIA_ROOT, base_path) if not default_storage.exists(dest): with open(src, "rb") as f: default_storage.save(dest, f) with open(os.path.join(temp_dir, REPO_FILE), "r") as repo_data_file: data = json.load(repo_data_file) gpr = GroupProgressReport( message="Importing repository versions", code="import.repo.versions", total=len(data), done=0, task_group=task_group, ) gpr.save() for src_repo in data: try: dest_repo = _destination_repo(importer, src_repo["name"]) except Repository.DoesNotExist: log.warning( _("Could not find destination repo for {}. Skipping."). format(src_repo["name"])) continue dispatch( import_repository_version, [dest_repo], args=[importer.pk, dest_repo.pk, src_repo["name"], path], task_group=task_group, ) task_group.finish()