def import_repository_version(importer_pk, destination_repo_pk, source_repo_name, tar_path): """ Import a repository version from a Pulp export. Args: importer_pk (str): Importer we are working with destination_repo_pk (str): Primary key of Repository to import into. source_repo_name (str): Name of the Repository in the export. tar_path (str): A path to export tar. """ dest_repo = Repository.objects.get(pk=destination_repo_pk) importer = PulpImporter.objects.get(pk=importer_pk) pb = ProgressReport( message=f"Importing content for {dest_repo.name}", code="import.repo.version.content", state=TASK_STATES.RUNNING, ) pb.save() with tempfile.TemporaryDirectory() as temp_dir: # Extract the repo file for the repo info with tarfile.open(tar_path, "r:gz") as tar: tar.extract(REPO_FILE, path=temp_dir) with open(os.path.join(temp_dir, REPO_FILE), "r") as repo_data_file: data = json.load(repo_data_file) src_repo = next(repo for repo in data if repo["name"] == source_repo_name) if dest_repo.pulp_type != src_repo["pulp_type"]: raise ValidationError( _("Repository type mismatch: {src_repo} ({src_type}) vs {dest_repo} " "({dest_type}).").format( src_repo=src_repo["name"], src_type=src_repo["pulp_type"], dest_repo=dest_repo.name, dest_type=dest_repo.pulp_type, )) rv_name = "" # Extract the repo version files with tarfile.open(tar_path, "r:gz") as tar: for mem in tar.getmembers(): match = re.search( fr"(^repository-{source_repo_name}_[0-9]+)/.+", mem.name) if match: rv_name = match.group(1) tar.extract(mem, path=temp_dir) if not rv_name: raise ValidationError( _("No RepositoryVersion found for {}").format(rv_name)) rv_path = os.path.join(temp_dir, rv_name) # Content plugin_name = src_repo["pulp_type"].split(".")[0] cfg = get_plugin_config(plugin_name) resulting_content_ids = [] for res_class in cfg.exportable_classes: filename = f"{res_class.__module__}.{res_class.__name__}.json" a_result = _import_file(os.path.join(rv_path, filename), res_class, do_raise=False) # django import-export can have a problem with concurrent-imports that are # importing the same 'thing' (e.g., a Package that exists in two different # repo-versions that are being imported at the same time). We will try an import # that will simply record errors as they happen (rather than failing with an exception) # first. If errors happen, we'll do one retry before we give up on this repo-version's # import. if a_result.has_errors(): log.info( _("...{} import-errors encountered importing {} from {}, retrying" ).format(a_result.totals["error"], filename, rv_name)) # Second attempt, we allow to raise an exception on any problem. # This will either succeed, or log a fatal error and fail. try: a_result = _import_file(os.path.join(rv_path, filename), res_class) except Exception as e: # noqa log on ANY exception and then re-raise log.error( _("FATAL import-failure importing {} from {}").format( filename, rv_name)) raise resulting_content_ids.extend(row.object_id for row in a_result.rows if row.import_type in ("new", "update")) # Once all content exists, create the ContentArtifact links ca_path = os.path.join(rv_path, CA_FILE) _import_file(ca_path, ContentArtifactResource) # see if we have a content mapping mapping_path = f"{rv_name}/{CONTENT_MAPPING_FILE}" mapping = {} with tarfile.open(tar_path, "r:gz") as tar: if mapping_path in tar.getnames(): tar.extract(mapping_path, path=temp_dir) with open(os.path.join(temp_dir, mapping_path), "r") as mapping_file: mapping = json.load(mapping_file) if mapping: # use the content mapping to map content to repos for repo_name, content_ids in mapping.items(): repo = _destination_repo(importer, repo_name) content = Content.objects.filter(upstream_id__in=content_ids) with repo.new_version() as new_version: new_version.set_content(content) else: # just map all the content to our destination repo content = Content.objects.filter(pk__in=resulting_content_ids) with dest_repo.new_version() as new_version: new_version.set_content(content) content_count = content.count() pb.total = content_count pb.done = content_count pb.state = TASK_STATES.COMPLETED pb.save() gpr = TaskGroup.current().group_progress_reports.filter( code="import.repo.versions") gpr.update(done=F("done") + 1)
def import_repository_version(importer_pk, destination_repo_pk, source_repo_name, tar_path): """ Import a repository version from a Pulp export. Args: importer_pk (str): Importer we are working with destination_repo_pk (str): Primary key of Repository to import into. source_repo_name (str): Name of the Repository in the export. tar_path (str): A path to export tar. """ dest_repo = Repository.objects.get(pk=destination_repo_pk) importer = PulpImporter.objects.get(pk=importer_pk) pb = ProgressReport( message=f"Importing content for {dest_repo.name}", code="import.repo.version.content", state=TASK_STATES.RUNNING, ) pb.save() with tempfile.TemporaryDirectory() as temp_dir: # Extract the repo file for the repo info with tarfile.open(tar_path, "r:gz") as tar: tar.extract(REPO_FILE, path=temp_dir) with open(os.path.join(temp_dir, REPO_FILE), "r") as repo_data_file: data = json.load(repo_data_file) src_repo = next(repo for repo in data if repo["name"] == source_repo_name) rv_path = os.path.join(temp_dir, _repo_version_path(src_repo)) if dest_repo.pulp_type != src_repo["pulp_type"]: raise ValidationError( _( "Repository type mismatch: {src_repo} ({src_type}) vs {dest_repo} " "({dest_type})." ).format( src_repo=src_repo["name"], src_type=src_repo["pulp_type"], dest_repo=dest_repo.name, dest_type=dest_repo.pulp_type, ) ) # Extract the repo version files with tarfile.open(tar_path, "r:gz") as tar: for mem in tar.getmembers(): if re.match(fr"^{_repo_version_path(src_repo)}/.+", mem.name): tar.extract(mem, path=temp_dir) # Content plugin_name = src_repo["pulp_type"].split(".")[0] cfg = get_plugin_config(plugin_name) resulting_content_ids = [] for res_class in cfg.exportable_classes: filename = f"{res_class.__module__}.{res_class.__name__}.json" a_result = _import_file(os.path.join(rv_path, filename), res_class) resulting_content_ids.extend( row.object_id for row in a_result.rows if row.import_type in ("new", "update") ) # Once all content exists, create the ContentArtifact links ca_path = os.path.join(rv_path, CA_FILE) _import_file(ca_path, ContentArtifactResource) # see if we have a content mapping mapping_path = f"{_repo_version_path(src_repo)}/{CONTENT_MAPPING_FILE}" mapping = {} with tarfile.open(tar_path, "r:gz") as tar: if mapping_path in tar.getnames(): tar.extract(mapping_path, path=temp_dir) with open(os.path.join(temp_dir, mapping_path), "r") as mapping_file: mapping = json.load(mapping_file) if mapping: # use the content mapping to map content to repos for repo_name, content_ids in mapping.items(): repo = _destination_repo(importer, repo_name) content = Content.objects.filter(upstream_id__in=content_ids) with repo.new_version() as new_version: new_version.set_content(content) else: # just map all the content to our destination repo content = Content.objects.filter(pk__in=resulting_content_ids) with dest_repo.new_version() as new_version: new_version.set_content(content) content_count = content.count() pb.total = content_count pb.done = content_count pb.state = TASK_STATES.COMPLETED pb.save() gpr = TaskGroup.current().group_progress_reports.filter(code="import.repo.versions") gpr.update(done=F("done") + 1)