Esempio n. 1
0
def pulp_import(importer_pk, path):
    """
    Import a Pulp export into Pulp.

    Args:
        importer_pk (str): Primary key of PulpImporter to do the import
        path (str): Path to the export to be imported
    """

    def destination_repo(source_repo_name):
        """Find the destination repository based on source repo's name."""
        if importer.repo_mapping and importer.repo_mapping.get(source_repo_name):
            dest_repo_name = importer.repo_mapping[source_repo_name]
        else:
            dest_repo_name = source_repo_name
        return Repository.objects.get(name=dest_repo_name)

    log.info(_("Importing {}.").format(path))
    importer = PulpImporter.objects.get(pk=importer_pk)
    pulp_import = PulpImport.objects.create(importer=importer,
                                            task=Task.current(),
                                            params={"path": path})
    CreatedResource.objects.create(content_object=pulp_import)

    task_group = TaskGroup.objects.create(description=f"Import of {path}")
    CreatedResource.objects.create(content_object=task_group)

    with tempfile.TemporaryDirectory() as temp_dir:
        with tarfile.open(path, "r:gz") as tar:
            tar.extractall(path=temp_dir)

        # Artifacts
        ar_result = _import_file(os.path.join(temp_dir, ARTIFACT_FILE), ArtifactResource)
        for row in ar_result.rows:
            artifact = Artifact.objects.get(pk=row.object_id)
            base_path = os.path.join('artifact', artifact.sha256[0:2], artifact.sha256[2:])
            src = os.path.join(temp_dir, base_path)
            dest = os.path.join(settings.MEDIA_ROOT, base_path)

            if not default_storage.exists(dest):
                with open(src, 'rb') as f:
                    default_storage.save(dest, f)

        with open(os.path.join(temp_dir, REPO_FILE), "r") as repo_data_file:
            data = json.load(repo_data_file)

            for src_repo in data:
                try:
                    dest_repo = destination_repo(src_repo["name"])
                except Repository.DoesNotExist:
                    log.warn(_("Could not find destination repo for {}. "
                               "Skipping.").format(src_repo["name"]))
                    continue

                enqueue_with_reservation(
                    import_repository_version,
                    [dest_repo],
                    args=[dest_repo.pk, src_repo['pulp_id'], path],
                    task_group=task_group,
                )
Esempio n. 2
0
def fs_publication_export(exporter_pk, publication_pk):
    """
    Export a publication to the file system.

    Args:
        exporter_pk (str): FilesystemExporter pk
        publication_pk (str): Publication pk
    """
    exporter = Exporter.objects.get(pk=exporter_pk).cast()
    publication = Publication.objects.get(pk=publication_pk).cast()
    export = FilesystemExport.objects.create(
        exporter=exporter,
        params={"publication": publication_pk},
        task=Task.current(),
    )
    ExportedResource.objects.create(export=export, content_object=publication)
    CreatedResource.objects.create(content_object=export)

    log.info(
        _("Exporting: file_system_exporter={exporter}, publication={publication}, path=path"
          ).format(exporter=exporter.name,
                   publication=publication.pk,
                   path=exporter.path))

    content_artifacts = ContentArtifact.objects.filter(
        pk__in=publication.published_artifact.values_list(
            "content_artifact__pk", flat=True))

    if publication.pass_through:
        content_artifacts |= ContentArtifact.objects.filter(
            content__in=publication.repository_version.content)

    _export_to_file_system(exporter.path, content_artifacts)
Esempio n. 3
0
def fs_repo_version_export(exporter_pk, repo_version_pk):
    """
    Export a repository version to the file system.

    Args:
        exporter_pk (str): FilesystemExporter pk
        repo_version_pk (str): RepositoryVersion pk
    """
    exporter = Exporter.objects.get(pk=exporter_pk).cast()
    repo_version = RepositoryVersion.objects.get(pk=repo_version_pk)
    export = FilesystemExport.objects.create(
        exporter=exporter,
        params={"repository_version": repo_version_pk},
        task=Task.current(),
    )
    ExportedResource.objects.create(export=export, content_object=repo_version)
    CreatedResource.objects.create(content_object=export)

    log.info(
        _("Exporting: file_system_exporter={exporter}, repo_version={repo_version}, path=path"
          ).format(exporter=exporter.name,
                   repo_version=repo_version.pk,
                   path=exporter.path))

    content_artifacts = ContentArtifact.objects.filter(
        content__in=repo_version.content)

    _export_to_file_system(exporter.path, content_artifacts)
Esempio n. 4
0
def pulp_export(pulp_exporter):
    """
    Create a PulpExport to export pulp_exporter.repositories

    1) Spit out all Artifacts, ArtifactResource.json, and RepositoryResource.json
    2) Spit out all *resource JSONs in per-repo-version directories
    3) Compute and store the sha256 and filename of the resulting tar.gz

    Args:
        pulp_exporter (models.PulpExporter): PulpExporter instance

    Raises:
        ValidationError: When path is not in the ALLOWED_EXPORT_PATHS setting,
            OR path exists and is not a directory
    """

    from pulpcore.app.serializers.exporter import ExporterSerializer
    ExporterSerializer.validate_path(pulp_exporter.path, check_is_dir=True)

    repositories = pulp_exporter.repositories.all()
    export = PulpExport.objects.create(exporter=pulp_exporter, task=Task.current(), params=None)
    tarfile_fp = export.export_tarfile_path()
    os.makedirs(pulp_exporter.path, exist_ok=True)

    with tarfile.open(tarfile_fp, 'w:gz') as tar:
        export.tarfile = tar
        CreatedResource.objects.create(content_object=export)

        artifacts = []
        repo_versions = []
        # Gather up the versions and artifacts
        for repo in repositories:
            version = repo.latest_version()
            # Check version-content to make sure we're not being asked to export an on_demand repo
            content_artifacts = ContentArtifact.objects.filter(content__in=version.content)
            if content_artifacts.filter(artifact=None).exists():
                RuntimeError(_("Remote artifacts cannot be exported."))

            repo_versions.append(version)
            artifacts.extend(version.artifacts.all())

        from pulpcore.app.importexport import export_artifacts, export_content
        # Export the top-level entities (artifacts and repositories)
        export_artifacts(export, artifacts, pulp_exporter.last_export)
        # Export the repository-version data, per-version
        for version in repo_versions:
            export_content(export, version, pulp_exporter.last_export)
            ExportedResource.objects.create(export=export, content_object=version)

    sha256_hash = hashlib.sha256()
    with open(tarfile_fp, "rb") as f:
        # Read and update hash string value in blocks of 4K
        for byte_block in iter(lambda: f.read(4096), b""):
            sha256_hash.update(byte_block)
        export.sha256 = sha256_hash.hexdigest()
    export.filename = tarfile_fp
    export.save()
    pulp_exporter.last_export = export
    pulp_exporter.save()
Esempio n. 5
0
def create_profile_db_and_connection():
    """
    Create a profile db from this tasks UUID and a sqlite3 connection to that databases.

    The database produced has three tables with the following SQL format:

    The `stages` table stores info about the pipeline itself and stores 3 fields
    * uuid - the uuid of the stage
    * name - the name of the stage
    * num - the number of the stage starting at 0

    The `traffic` table stores 3 fields:
    * uuid - the uuid of the stage this queue feeds into
    * waiting_time - the amount of time the item is waiting in the queue before it enters the stage.
    * service_time - the service time the item spent in the stage.

    The `system` table stores 3 fields:
    * uuid - The uuid of stage this queue feeds into
    * length - The length of items in this queue, measured just before each arrival.
    * interarrival_time - The amount of time since the last arrival.
    """
    debug_data_dir = "/var/lib/pulp/debug/"
    pathlib.Path(debug_data_dir).mkdir(parents=True, exist_ok=True)
    current_task = Task.current()
    if current_task:
        db_path = debug_data_dir + str(current_task.pk)
    else:
        db_path = debug_data_dir + str(uuid.uuid4())

    import sqlite3

    global CONN
    CONN = sqlite3.connect(db_path)
    c = CONN.cursor()

    # Create table
    c.execute(
        """CREATE TABLE stages
                 (uuid varchar(36), name text, num int)"""
    )

    # Create table
    c.execute(
        """CREATE TABLE traffic
                 (uuid varchar(36), waiting_time real, service_time real)"""
    )

    # Create table
    c.execute(
        """CREATE TABLE system
                 (uuid varchar(36), length int, interarrival_time real)"""
    )

    return CONN
Esempio n. 6
0
def dispatch(func, resources, args=None, kwargs=None, task_group=None):
    """
    Enqueue a message to Pulp workers with a reservation.

    This method provides normal enqueue functionality, while also requesting necessary locks for
    serialized urls. No two tasks that claim the same resource can execute concurrently. It
    accepts resources which it transforms into a list of urls (one for each resource).

    This method creates a :class:`pulpcore.app.models.Task` object and returns it.

    The values in `args` and `kwargs` must be JSON serializable, but may contain instances of
    ``uuid.UUID``.

    Args:
        func (callable): The function to be run by RQ when the necessary locks are acquired.
        resources (list): A list of resources to this task needs exclusive access to while running.
                          Each resource can be either a `str` or a `django.models.Model` instance.
        args (tuple): The positional arguments to pass on to the task.
        kwargs (dict): The keyword arguments to pass on to the task.
        task_group (pulpcore.app.models.TaskGroup): A TaskGroup to add the created Task to.

    Returns (pulpcore.app.models.Task): The Pulp Task that was created.

    Raises:
        ValueError: When `resources` is an unsupported type.
    """
    if settings.USE_NEW_WORKER_TYPE:
        args_as_json = json.dumps(args, cls=UUIDEncoder)
        kwargs_as_json = json.dumps(kwargs, cls=UUIDEncoder)
        resources = _validate_and_get_resources(resources)
        with transaction.atomic():
            task = Task.objects.create(
                state=TASK_STATES.WAITING,
                logging_cid=(get_guid() or ""),
                task_group=task_group,
                name=f"{func.__module__}.{func.__name__}",
                args=args_as_json,
                kwargs=kwargs_as_json,
                parent_task=Task.current(),
                reserved_resources_record=resources,
            )
        # Notify workers
        with db_connection.connection.cursor() as cursor:
            cursor.execute("NOTIFY pulp_worker_wakeup")
        return task
    else:
        RQ_job_id = _enqueue_with_reservation(func,
                                              resources=resources,
                                              args=args,
                                              kwargs=kwargs,
                                              task_group=task_group)
        return Task.objects.get(pk=RQ_job_id.id)
Esempio n. 7
0
    def save(self, *args, **kwargs):
        """
        Auto-set the task_id if running inside a task

        If the task_id is already set it will not be updated. If it is unset and this is running
        inside of a task it will be auto-set prior to saving.

        args (list): positional arguments to be passed on to the real save
        kwargs (dict): keyword arguments to be passed on to the real save
        """
        now = timezone.now()

        if not self.task_id:
            self.task = Task.current()

        if self._using_context_manager and self._last_save_time:
            if now - self._last_save_time >= datetime.timedelta(
                    milliseconds=BATCH_INTERVAL):
                super().save(*args, **kwargs)
                self._last_save_time = now
        else:
            super().save(*args, **kwargs)
            self._last_save_time = now
Esempio n. 8
0
def pulp_import(importer_pk, path, toc):
    """
    Import a Pulp export into Pulp.

    Args:
        importer_pk (str): Primary key of PulpImporter to do the import
        path (str): Path to the export to be imported
    """
    def _compute_hash(filename):
        sha256_hash = hashlib.sha256()
        with open(filename, "rb") as f:
            # Read and update hash string value in blocks of 4K
            for byte_block in iter(lambda: f.read(4096), b""):
                sha256_hash.update(byte_block)
            return sha256_hash.hexdigest()

    def validate_toc(toc_filename):
        """
        Check validity of table-of-contents file.

        table-of-contents must:
          * exist
          * be valid JSON
          * point to chunked-export-files that exist 'next to' the 'toc' file
          * point to chunks whose checksums match the checksums stored in the 'toc' file

        Args:
            toc_filename (str): The user-provided toc-file-path to be validated.

        Raises:
            ValidationError: If toc is not a valid JSON table-of-contents file,
            or when toc points to chunked-export-files that can't be found in the same
            directory as the toc-file, or the checksums of the chunks do not match the
            checksums stored in toc.
        """
        with open(toc_filename) as json_file:
            # Valid JSON?
            the_toc = json.load(json_file)
            if not the_toc.get("files", None) or not the_toc.get("meta", None):
                raise ValidationError(
                    _("Missing 'files' or 'meta' keys in table-of-contents!"))

            base_dir = os.path.dirname(toc_filename)
            # Points at chunks that exist?
            missing_files = []
            for f in sorted(the_toc["files"].keys()):
                if not os.path.isfile(os.path.join(base_dir, f)):
                    missing_files.append(f)
            if missing_files:
                raise ValidationError(
                    _("Missing import-chunks named in table-of-contents: {}.".
                      format(str(missing_files))))

            errs = []
            # validate the sha256 of the toc-entries
            # gather errors for reporting at the end
            chunks = sorted(the_toc["files"].keys())
            data = dict(message="Validating Chunks",
                        code="validate.chunks",
                        total=len(chunks))
            with ProgressReport(**data) as pb:
                for chunk in pb.iter(chunks):
                    a_hash = _compute_hash(os.path.join(base_dir, chunk))
                    if not a_hash == the_toc["files"][chunk]:
                        err_str = "File {} expected checksum : {}, computed checksum : {}".format(
                            chunk, the_toc["files"][chunk], a_hash)
                        errs.append(err_str)

            # if there are any errors, report and fail
            if errs:
                raise ValidationError(
                    _("Import chunk hash mismatch: {}).").format(str(errs)))

        return the_toc

    def validate_and_assemble(toc_filename):
        """Validate checksums of, and reassemble, chunks in table-of-contents file."""
        the_toc = validate_toc(toc_filename)
        toc_dir = os.path.dirname(toc_filename)
        result_file = os.path.join(toc_dir, the_toc["meta"]["file"])

        # if we have only one entry in "files", it must be the full .tar.gz - return it
        if len(the_toc["files"]) == 1:
            return os.path.join(toc_dir, list(the_toc["files"].keys())[0])

        # We have multiple chunks.
        # reassemble into one file 'next to' the toc and return the resulting full-path
        chunk_size = int(the_toc["meta"]["chunk_size"])
        offset = 0
        block_size = 1024
        blocks_per_chunk = int(chunk_size / block_size)

        # sorting-by-filename is REALLY IMPORTANT here
        # keys are of the form <base-export-name>.00..<base-export-name>.NN,
        # and must be reassembled IN ORDER
        the_chunk_files = sorted(the_toc["files"].keys())

        data = dict(message="Recombining Chunks",
                    code="recombine.chunks",
                    total=len(the_chunk_files))
        with ProgressReport(**data) as pb:
            for chunk in pb.iter(the_chunk_files):
                # For each chunk, add it to the reconstituted tar.gz, picking up where the previous
                # chunk left off
                subprocess.run([
                    "dd",
                    "if={}".format(os.path.join(toc_dir, chunk)),
                    "of={}".format(result_file),
                    "bs={}".format(str(block_size)),
                    "seek={}".format(str(offset)),
                ], )
                offset += blocks_per_chunk
                # To keep from taking up All The Disk, we delete each chunk after it has been added
                # to the recombined file.
                try:
                    subprocess.run(["rm", "-f", os.path.join(toc_dir, chunk)])
                except OSError:
                    log.warning(
                        _("Failed to remove chunk {} after recombining. Continuing."
                          ).format(os.path.join(toc_dir, chunk)),
                        exc_info=True,
                    )

        combined_hash = _compute_hash(result_file)
        if combined_hash != the_toc["meta"]["global_hash"]:
            raise ValidationError(
                _("Mismatch between combined .tar.gz checksum [{}] and originating [{}])."
                  ).format(combined_hash, the_toc["meta"]["global_hash"]))
        # if we get this far, then: the chunk-files all existed, they all pass checksum validation,
        # and there exists a combined .tar.gz, which *also* passes checksum-validation.
        # Let the rest of the import process do its thing on the new combined-file.
        return result_file

    if toc:
        log.info(_("Validating TOC {}.").format(toc))
        path = validate_and_assemble(toc)

    log.info(_("Importing {}.").format(path))
    current_task = Task.current()
    importer = PulpImporter.objects.get(pk=importer_pk)
    the_import = PulpImport.objects.create(importer=importer,
                                           task=current_task,
                                           params={"path": path})
    CreatedResource.objects.create(content_object=the_import)

    task_group = TaskGroup.objects.create(description=f"Import of {path}")
    Task.objects.filter(pk=current_task.pk).update(task_group=task_group)
    current_task.refresh_from_db()
    CreatedResource.objects.create(content_object=task_group)

    with tempfile.TemporaryDirectory() as temp_dir:
        with tarfile.open(path, "r:gz") as tar:
            tar.extractall(path=temp_dir)

        # Check version info
        with open(os.path.join(temp_dir, VERSIONS_FILE)) as version_file:
            version_json = json.load(version_file)
            _check_versions(version_json)

        # Artifacts
        ar_result = _import_file(os.path.join(temp_dir, ARTIFACT_FILE),
                                 ArtifactResource)
        data = dict(message="Importing Artifacts",
                    code="import.artifacts",
                    total=len(ar_result.rows))
        with ProgressReport(**data) as pb:
            for row in pb.iter(ar_result.rows):
                artifact = Artifact.objects.get(pk=row.object_id)
                base_path = os.path.join("artifact", artifact.sha256[0:2],
                                         artifact.sha256[2:])
                src = os.path.join(temp_dir, base_path)
                dest = os.path.join(settings.MEDIA_ROOT, base_path)

                if not default_storage.exists(dest):
                    with open(src, "rb") as f:
                        default_storage.save(dest, f)

        with open(os.path.join(temp_dir, REPO_FILE), "r") as repo_data_file:
            data = json.load(repo_data_file)
            gpr = GroupProgressReport(
                message="Importing repository versions",
                code="import.repo.versions",
                total=len(data),
                done=0,
                task_group=task_group,
            )
            gpr.save()

            for src_repo in data:
                try:
                    dest_repo = _destination_repo(importer, src_repo["name"])
                except Repository.DoesNotExist:
                    log.warning(
                        _("Could not find destination repo for {}. Skipping.").
                        format(src_repo["name"]))
                    continue

                dispatch(
                    import_repository_version,
                    [dest_repo],
                    args=[importer.pk, dest_repo.pk, src_repo["name"], path],
                    task_group=task_group,
                )

    task_group.finish()
Esempio n. 9
0
def pulp_export(the_export):
    """
    Create a PulpExport to export pulp_exporter.repositories.

    1) Spit out all Artifacts, ArtifactResource.json, and RepositoryResource.json
    2) Spit out all *resource JSONs in per-repo-version directories
    3) Compute and store the sha256 and filename of the resulting tar.gz/chunks

    Args:
        the_export (models.PulpExport): PulpExport instance

    Raises:
        ValidationError: When path is not in the ALLOWED_EXPORT_PATHS setting,
            OR path exists and is not a directory
    """
    try:
        pulp_exporter = the_export.exporter
        the_export.task = Task.current()

        tarfile_fp = the_export.export_tarfile_path()

        path = Path(pulp_exporter.path)
        if not path.is_dir():
            path.mkdir(mode=0o775, parents=True)

        rslts = {}
        if the_export.validated_chunk_size:
            # write it into chunks
            with subprocess.Popen(
                [
                    "split",
                    "-a",
                    "4",
                    "-b",
                    str(the_export.validated_chunk_size),
                    "-d",
                    "-",
                    tarfile_fp + ".",
                ],
                stdin=subprocess.PIPE,
            ) as split_process:
                try:
                    with tarfile.open(tarfile_fp, "w|gz", fileobj=split_process.stdin) as tar:
                        _do_export(pulp_exporter, tar, the_export)
                except Exception:
                    # no matter what went wrong, we can't trust the files we (may have) created.
                    # Delete the ones we can find and pass the problem up.
                    for pathname in glob(tarfile_fp + ".*"):
                        os.remove(pathname)
                    raise
            # compute the hashes
            global_hash = hashlib.sha256()
            paths = sorted([str(Path(p)) for p in glob(tarfile_fp + ".*")])
            for a_file in paths:
                a_hash = _compute_hash(a_file, global_hash)
                rslts[a_file] = a_hash
            tarfile_hash = global_hash.hexdigest()

        else:
            # write into the file
            try:
                with tarfile.open(tarfile_fp, "w:gz") as tar:
                    _do_export(pulp_exporter, tar, the_export)
            except Exception:
                # no matter what went wrong, we can't trust the file we created.
                # Delete it if it exists and pass the problem up.
                if os.path.exists(tarfile_fp):
                    os.remove(tarfile_fp)
                raise
            # compute the hash
            tarfile_hash = _compute_hash(tarfile_fp)
            rslts[tarfile_fp] = tarfile_hash

        # store the outputfile/hash info
        the_export.output_file_info = rslts

        # write outputfile/hash info to a file 'next to' the output file(s)
        output_file_info_path = tarfile_fp.replace(".tar.gz", "-toc.json")
        with open(output_file_info_path, "w") as outfile:
            if the_export.validated_chunk_size:
                chunk_size = the_export.validated_chunk_size
            else:
                chunk_size = 0
            chunk_toc = {
                "meta": {
                    "chunk_size": chunk_size,
                    "file": os.path.basename(tarfile_fp),
                    "global_hash": tarfile_hash,
                },
                "files": {},
            }
            # Build a toc with just filenames (not the path on the exporter-machine)
            for a_path in rslts.keys():
                chunk_toc["files"][os.path.basename(a_path)] = rslts[a_path]
            json.dump(chunk_toc, outfile)

        # store toc info
        toc_hash = _compute_hash(output_file_info_path)
        the_export.output_file_info[output_file_info_path] = toc_hash
        the_export.toc_info = {"file": output_file_info_path, "sha256": toc_hash}
    finally:
        # whatever may have happened, make sure we save the export
        the_export.save()

    # If an exception was thrown, we'll never get here - which is good, because we don't want a
    # 'failed' export to be the last_export we derive the next incremental from
    # mark it as 'last'
    pulp_exporter.last_export = the_export
    # save the exporter
    pulp_exporter.save()
Esempio n. 10
0
def pulp_import(importer_pk, path):
    """
    Import a Pulp export into Pulp.

    Args:
        importer_pk (str): Primary key of PulpImporter to do the import
        path (str): Path to the export to be imported
    """
    def import_file(fpath, resource_class):
        log.info(_("Importing file {}.").format(fpath))
        with open(fpath, "r") as json_file:
            data = Dataset().load(json_file.read(), format="json")
            resource = resource_class()
            return resource.import_data(data, raise_errors=True)

    def destination_repo(source_repo_name):
        """Find the destination repository based on source repo's name."""
        if importer.repo_mapping and importer.repo_mapping.get(
                source_repo_name):
            dest_repo_name = importer.repo_mapping[source_repo_name]
        else:
            dest_repo_name = source_repo_name
        return Repository.objects.get(name=dest_repo_name)

    def repo_version_path(temp_dir, src_repo):
        """Find the repo version path in the export based on src_repo json."""
        src_repo_version = int(src_repo["next_version"]) - 1
        return os.path.join(
            temp_dir, f"repository-{src_repo['pulp_id']}_{src_repo_version}")

    log.info(_("Importing {}.").format(path))
    importer = PulpImporter.objects.get(pk=importer_pk)
    pulp_import = PulpImport.objects.create(importer=importer,
                                            task=Task.current(),
                                            params={"path": path})
    CreatedResource.objects.create(content_object=pulp_import)

    with tempfile.TemporaryDirectory() as temp_dir:
        with tarfile.open(path, "r|gz") as tar:
            tar.extractall(path=temp_dir)

        # Artifacts
        ar_result = import_file(os.path.join(temp_dir, ARTIFACT_FILE),
                                ArtifactResource)
        for row in ar_result.rows:
            artifact = Artifact.objects.get(pk=row.object_id)
            base_path = os.path.join('artifact', artifact.sha256[0:2],
                                     artifact.sha256[2:])
            src = os.path.join(temp_dir, base_path)
            dest = os.path.join(settings.MEDIA_ROOT, base_path)

            if not default_storage.exists(dest):
                with open(src, 'rb') as f:
                    default_storage.save(dest, f)

        # Repo Versions
        with open(os.path.join(temp_dir, REPO_FILE), "r") as repo_data_file:
            data = json.load(repo_data_file)

            for src_repo in data:
                try:
                    dest_repo = destination_repo(src_repo["name"])
                except Repository.DoesNotExist:
                    log.warn(
                        _("Could not find destination repo for {}. "
                          "Skipping.").format(src_repo["name"]))
                    continue

                rv_path = repo_version_path(temp_dir, src_repo)

                # Untyped Content
                content_path = os.path.join(rv_path, CONTENT_FILE)
                c_result = import_file(content_path, ContentResource)
                content = Content.objects.filter(
                    pk__in=[r.object_id for r in c_result.rows])

                # Content Artifacts
                ca_path = os.path.join(rv_path, CA_FILE)
                import_file(ca_path, ContentArtifactResource)

                # Content
                plugin_name = src_repo["pulp_type"].split('.')[0]
                cfg = get_plugin_config(plugin_name)
                for res_class in cfg.exportable_classes:
                    filename = f"{res_class.__module__}.{res_class.__name__}.json"
                    import_file(os.path.join(rv_path, filename), res_class)

                # Create the repo version
                with dest_repo.new_version() as new_version:
                    new_version.set_content(content)

    return importer
Esempio n. 11
0
def pulp_export(the_export):
    """
    Create a PulpExport to export pulp_exporter.repositories.

    1) Spit out all Artifacts, ArtifactResource.json, and RepositoryResource.json
    2) Spit out all *resource JSONs in per-repo-version directories
    3) Compute and store the sha256 and filename of the resulting tar.gz/chunks

    Args:
        the_export (models.PulpExport): PulpExport instance

    Raises:
        ValidationError: When path is not in the ALLOWED_EXPORT_PATHS setting,
            OR path exists and is not a directory
    """

    pulp_exporter = the_export.exporter
    the_export.task = Task.current()

    tarfile_fp = the_export.export_tarfile_path()
    os.makedirs(pulp_exporter.path, exist_ok=True)
    rslts = {}

    if the_export.validated_chunk_size:
        # write it into chunks
        with subprocess.Popen(
            [
                "split",
                "-a",
                "4",
                "-b",
                str(the_export.validated_chunk_size),
                "-d",
                "-",
                tarfile_fp + ".",
            ],
            stdin=subprocess.PIPE,
        ) as split_process:
            with tarfile.open(tarfile_fp, "w|gz", fileobj=split_process.stdin) as tar:
                _do_export(pulp_exporter, tar, the_export)

        # compute the hashes
        paths = [str(Path(p)) for p in glob(tarfile_fp + ".*")]
        for a_file in paths:
            a_hash = _compute_hash(a_file)
            rslts[a_file] = a_hash
    else:
        # write into the file
        with tarfile.open(tarfile_fp, "w:gz") as tar:
            _do_export(pulp_exporter, tar, the_export)
        # compute the hash
        tarfile_hash = _compute_hash(tarfile_fp)
        rslts[tarfile_fp] = tarfile_hash

    # store the outputfile/hash info
    the_export.output_file_info = rslts
    # save the export
    the_export.save()
    # mark it as 'last'
    pulp_exporter.last_export = the_export
    # save the exporter
    pulp_exporter.save()