def create_content(artifact_sha256, filename): """ Creates PythonPackageContent from artifact. Args: artifact_sha256: validated artifact filename: file name Returns: queryset of the new created content """ artifact = Artifact.objects.get(sha256=artifact_sha256) metadata = get_project_metadata_from_artifact(filename, artifact) data = parse_project_metadata(vars(metadata)) data['packagetype'] = metadata.packagetype data['version'] = metadata.version data['filename'] = filename data['sha256'] = artifact.sha256 @transaction.atomic() def create(): content = PythonPackageContent.objects.create(**data) ContentArtifact.objects.create(artifact=artifact, content=content, relative_path=filename) return content new_content = create() resource = CreatedResource(content_object=new_content) resource.save() return PythonPackageContent.objects.filter(pk=new_content.pk)
def new_version(self, base_version=None): """ Create a new RepositoryVersion for this Repository. Creation of a RepositoryVersion should be done in a RQ Job. Args: repository (pulpcore.app.models.Repository): to create a new version of base_version (pulpcore.app.models.RepositoryVersion): an optional repository version whose content will be used as the set of content for the new version Returns: pulpcore.app.models.RepositoryVersion: The Created RepositoryVersion """ with transaction.atomic(): version = RepositoryVersion( repository=self, number=int(self.next_version), base_version=base_version) version.save() if base_version: # first remove the content that isn't in the base version version.remove_content(version.content.exclude(pk__in=base_version.content)) # now add any content that's in the base_version but not in version version.add_content(base_version.content.exclude(pk__in=version.content)) if Task.current() and not self.sub_repo: resource = CreatedResource(content_object=version) resource.save() return version
def migrate_repo_distributor(dist_migrator, progress_dist, pulp2dist, repo_version=None): """ Migrate repo distributor. Args: dist_migrator(Pulp2to3Distributor): distributor migrator class progress_dist: GroupProgressReport queryset for distribution creation pulp2dist(Pulp2Distributor): a pre-migrated distributor to migrate repo_version(RepositoryVersion): a pulp3 repo version """ publication, distribution, created = dist_migrator.migrate_to_pulp3( pulp2dist, repo_version) if publication: pulp2dist.pulp3_publication = publication pulp2dist.pulp3_distribution = distribution pulp2dist.is_migrated = True pulp2dist.save() progress_dist.update(done=F('done') + 1) # CreatedResource were added here because publications and repo versions # were listed among created resources and distributions were not. it could # create some confusion remotes are not listed still # TODO figure out what to do to make the output consistent if created: resource = CreatedResource(content_object=distribution) resource.save()
async def migrate_repo_distributor(pb, dist_migrator, pulp2dist, repo_version=None): """ Migrate repo distributor. Args: dist_migrator(Pulp2to3Distributor): distributor migrator class pulp2dist(Pulp2Distributor): a pre-migrated distributor to migrate repo_version(RepositoryVersion): a pulp3 repo version """ publication, distribution, created = await dist_migrator.migrate_to_pulp3( pulp2dist, repo_version) if publication: pulp2dist.pulp3_publication = publication if distribution: pulp2dist.pulp3_distribution = distribution pulp2dist.is_migrated = True pulp2dist.save() # CreatedResource were added here because publications and repo versions # were listed among created resources and distributions were not. it could # create some confusion remotes are not listed still # TODO figure out what to do to make the output consistent if created: resource = CreatedResource(content_object=distribution) resource.save() pb.increment() else: pb.total -= 1 pb.save()
def tag_image(manifest_pk, tag, repository_pk): """ Create a new repository version out of the passed tag name and the manifest. If the tag name is already associated with an existing manifest with the same digest, no new content is created. Note that a same tag name cannot be used for two different manifests. Due to this fact, an old Tag object is going to be removed from a new repository version when a manifest contains a digest which is not equal to the digest passed with POST request. """ manifest = Manifest.objects.get(pk=manifest_pk) repository = Repository.objects.get(pk=repository_pk).cast() latest_version = repository.latest_version() tags_to_remove = Tag.objects.filter(pk__in=latest_version.content.all(), name=tag).exclude( tagged_manifest=manifest ) manifest_tag, created = Tag.objects.get_or_create(name=tag, tagged_manifest=manifest) if created: resource = CreatedResource(content_object=manifest_tag) resource.save() else: manifest_tag.touch() tags_to_add = Tag.objects.filter(pk=manifest_tag.pk).exclude( pk__in=latest_version.content.all() ) with repository.new_version() as repository_version: repository_version.remove_content(tags_to_remove) repository_version.add_content(tags_to_add)
def upload_comps(tmp_file_id, repo_id=None, replace=False): """ Upload comps.xml file. Args: tmp_file_id: uploaded comps.xml file. repo_id: repository primary key to associate incoming comps-content to. replace: if true, replace existing comps-related Content in the specified repository with those in the incoming comps.xml file. """ temp_file = PulpTemporaryFile.objects.get(pk=tmp_file_id) created, all_objs = parse_comps_components(temp_file) for content in all_objs: crsrc = CreatedResource(content_object=content) crsrc.save() if repo_id: repository = RpmRepository.objects.get(pk=repo_id) if repository: all_ids = [obj.content_ptr_id for obj in all_objs] with repository.new_version() as new_version: if replace: latest = repository.latest_version() rmv_ids = latest.content.filter(pulp_type__in=( PackageCategory.get_pulp_type(), PackageEnvironment.get_pulp_type(), PackageGroup.get_pulp_type(), PackageLangpacks.get_pulp_type(), )) new_version.remove_content( Content.objects.filter(pk__in=rmv_ids)) new_version.add_content(Content.objects.filter(pk__in=all_ids))
def migrate_from_pulp2( migration_plan_pk, validate=False, dry_run=False, skip_corrupted=False ): """ Main task to migrate from Pulp 2 to Pulp 3. Schedule other tasks based on the specified Migration Plan. Args: migration_plan_pk (str): The migration plan PK. validate (bool): If True, don't migrate unless validation is successful. dry_run (bool): If True, nothing is migrated, only validation happens. skip_corrupted (bool): If True, corrupted content is skipped during migration, no task failure. """ # MongoDB connection initialization connection.initialize() plan = MigrationPlan.objects.get(pk=migration_plan_pk) missing_resources = plan.get_missing_resources() if (validate or dry_run) and missing_resources: raise PlanValidationError( "Validation failed: resources missing {}".format(missing_resources) ) if dry_run: return task_group = TaskGroup(description="Migration Sub-tasks") task_group.save() GroupProgressReport( message="Repo version creation", code="create.repo_version", task_group=task_group, ).save() GroupProgressReport( message="Distribution creation", code="create.distribution", task_group=task_group, ).save() current_task = Task.current() current_task.task_group = task_group current_task.save() resource = CreatedResource(content_object=task_group) resource.save() # TODO: if plan is empty for a plugin, only migrate downloaded content pre_migrate_all_without_content(plan) pre_migrate_all_content(plan) handle_outdated_resources(plan) migrate_repositories(plan) migrate_importers(plan) migrate_content(plan, skip_corrupted=skip_corrupted) create_repoversions_publications_distributions(plan) task_group.finish()
def create(*args, **kwargs): """ Creates a :class:`~pulp_docker.app.models.DockerDistribution`. Raises: ValidationError: If the DockerDistributionSerializer is not valid """ data = kwargs.pop('data', None) serializer = DockerDistributionSerializer(data=data) serializer.is_valid(raise_exception=True) serializer.save() resource = CreatedResource(content_object=serializer.instance) resource.save()
def one_shot_upload(artifact_pk, filename, repository_pk=None): """ One shot upload for pulp_python Args: artifact_pk: validated artifact filename: file name repository_pk: optional repository to add Content to """ chart = {} with tempfile.TemporaryDirectory() as td: temp_path = os.path.join(td, filename) artifact = Artifact.objects.get(pk=artifact_pk) shutil.copy2(artifact.file.path, temp_path) with tarfile.open(temp_path) as tarball: chart_member = [ m for m in tf.getmembers() if m.name.endswith('Chart.yaml') and m.name.count('/') == 1 ] if len(chart_member) != 1: raise serializers.ValidationError('Unable to find Chart.yaml') chart_file = tarball.extractfile(chart_member) doc = yaml.load(chart_file, loader=yaml.SafeLoader) chart = { 'name': doc['name'], 'version': doc['version'], 'digest': doc['digest'], # TODO: Handle multiple URLs better, maybe failover? 'url': doc['urls'][0], 'created': doc.get('created'), 'app_version': doc.get('appVersion'), 'description': doc.get('description'), 'icon': doc.get('icon'), 'keywords': doc.get('keywords', []) } new_content = ChartContent(**chart) new_content.save() if repository_pk: queryset = ChartContent.objects.filter(pk=new_content.pk) repository = ChartRepository.objects.get(pk=repository_pk) with repository.new_version() as new_version: new_version.add_content(queryset) resource = CreatedResource(content_object=new_content) resource.save()
def one_shot_upload(artifact_pk, filename, repository_pk=None): """ One shot upload for pulp_python Args: artifact_pk: validated artifact filename: file name repository_pk: optional repository to add Content to """ # iterate through extensions since splitext does not support things like .tar.gz for ext, packagetype in DIST_EXTENSIONS.items(): if filename.endswith(ext): # Copy file to a temp directory under the user provided filename, we do this # because pkginfo validates that the filename has a valid extension before # reading it with tempfile.TemporaryDirectory() as td: temp_path = os.path.join(td, filename) artifact = Artifact.objects.get(pk=artifact_pk) shutil.copy2(artifact.file.path, temp_path) metadata = DIST_TYPES[packagetype](temp_path) metadata.packagetype = packagetype break else: raise serializers.ValidationError( _("Extension on {} is not a valid python extension " "(.whl, .exe, .egg, .tar.gz, .tar.bz2, .zip)").format(filename)) data = parse_project_metadata(vars(metadata)) data['classifiers'] = [{ 'name': classifier } for classifier in metadata.classifiers] data['packagetype'] = metadata.packagetype data['version'] = metadata.version data['filename'] = filename data['_relative_path'] = filename new_content = PythonPackageContent.objects.create( filename=filename, packagetype=metadata.packagetype, name=data['classifiers'], version=data['version']) queryset = PythonPackageContent.objects.filter(pk=new_content.pk) if repository_pk: repository = Repository.objects.get(pk=repository_pk) with RepositoryVersion.create(repository) as new_version: new_version.add_content(queryset) resource = CreatedResource(content_object=new_content) resource.save()
def create_content(artifact_sha256, filename): """ Creates PythonPackageContent from artifact. Args: artifact_sha256: validated artifact filename: file name Returns: queryset of the new created content """ # iterate through extensions since splitext does not support things like .tar.gz extensions = list(DIST_EXTENSIONS.keys()) pkg_type_index = [filename.endswith(ext) for ext in extensions].index(True) packagetype = DIST_EXTENSIONS[extensions[pkg_type_index]] # Copy file to a temp directory under the user provided filename, we do this # because pkginfo validates that the filename has a valid extension before # reading it artifact = Artifact.objects.get(sha256=artifact_sha256) artifact_file = storage.open(artifact.file.name) with tempfile.NamedTemporaryFile('wb', suffix=filename) as temp_file: shutil.copyfileobj(artifact_file, temp_file) temp_file.flush() metadata = DIST_TYPES[packagetype](temp_file.name) metadata.packagetype = packagetype data = parse_project_metadata(vars(metadata)) data['packagetype'] = metadata.packagetype data['version'] = metadata.version data['filename'] = filename data['sha256'] = artifact.sha256 @transaction.atomic() def create(): content = PythonPackageContent.objects.create(**data) ContentArtifact.objects.create(artifact=artifact, content=content, relative_path=filename) return content new_content = create() resource = CreatedResource(content_object=new_content) resource.save() return PythonPackageContent.objects.filter(pk=new_content.pk)
def one_shot_upload(artifact_pk, filename, repository_pk=None): """ One shot upload for RPM package. Args: artifact_pk: validated artifact for a file filename : name of file repository_pk: repository to extend with new pkg """ artifact = Artifact.objects.get(pk=artifact_pk) # export META from rpm and prepare dict as saveable format try: new_pkg = _prepare_package(artifact, filename) except OSError: raise OSError('RPM file cannot be parsed for metadata.') pkg, created = Package.objects.get_or_create(**new_pkg) if not created: raise OSError('RPM package {} already exists.'.format(pkg.filename)) ContentArtifact.objects.create( artifact=artifact, content=pkg, relative_path=filename ) resource = CreatedResource(content_object=pkg) resource.save() if repository_pk: repository = Repository.objects.get(pk=repository_pk) content_to_add = Package.objects.filter(pkgId=pkg.pkgId) # create new repo version with uploaded package with RepositoryVersion.create(repository) as new_version: new_version.add_content(content_to_add)
def migrate_from_pulp2(migration_plan_pk, validate=False, dry_run=False): """ Main task to migrate from Pulp 2 to Pulp 3. Schedule other tasks based on the specified Migration Plan. Args: migration_plan_pk (str): The migration plan PK. validate (bool): If True, don't migrate unless validation is successful. dry_run (bool): If True, nothing is migrated, only validation happens. """ def get_repo_types(plan): """ Create mappings for pulp 2 repository types. Identify type by inspecting content of a repo. One mapping is repo_id -> repo_type, the other is repo_type -> list of repo_ids. It's used later during pre-migration and identification of removed repos from pulp 2 Args: plan(MigrationPlan): A Migration Plan Returns: repo_id_to_type(dict): mapping from a pulp 2 repo_id to a plugin/repo type type_to_repo_ids(dict): mapping from a plugin/repo type to the list of repo_ids """ repo_id_to_type = {} type_to_repo_ids = defaultdict(set) # mapping content type -> plugin/repo type, e.g. 'docker_blob' -> 'docker' content_type_to_plugin = {} for plugin in plan.get_plugin_plans(): for content_type in plugin.migrator.pulp2_content_models: content_type_to_plugin[ content_type] = plugin.migrator.pulp2_plugin repos = set(plugin.get_repositories()) repos |= set(plugin.get_importers_repos()) repos |= set(plugin.get_distributors_repos()) for repo in repos: repo_id_to_type[repo] = plugin.type type_to_repo_ids[plugin.type].update(repos) # TODO: optimizations. # It looks at each content at the moment. Potential optimizations: # - This is a big query, paginate? # - Filter by repos from the plan # - Query any but one record for a repo for rec in RepositoryContentUnit.objects().\ only('repo_id', 'unit_type_id').as_pymongo().no_cache(): repo_id = rec['repo_id'] unit_type_id = rec['unit_type_id'] # a type for a repo is already known or this content/repo type is not supported if repo_id in repo_id_to_type or unit_type_id not in content_type_to_plugin: continue plugin_name = content_type_to_plugin[unit_type_id] repo_id_to_type[repo_id] = plugin_name type_to_repo_ids[plugin_name].add(repo_id) return repo_id_to_type, type_to_repo_ids # MongoDB connection initialization connection.initialize() plan = MigrationPlan.objects.get(pk=migration_plan_pk) missing_resources = plan.get_missing_resources() if (validate or dry_run) and missing_resources: raise PlanValidationError( "Validation failed: resources missing {}".format( missing_resources)) if dry_run: return task_group = TaskGroup(description="Migration Sub-tasks") task_group.save() GroupProgressReport(message="Repo version creation", code="create.repo_version", task_group=task_group).save() GroupProgressReport(message="Distribution creation", code="create.distribution", task_group=task_group).save() current_task = Task.current() current_task.task_group = task_group current_task.save() resource = CreatedResource(content_object=task_group) resource.save() # call it here and not inside steps below to generate mapping only once repo_id_to_type, type_to_repo_ids = get_repo_types(plan) # TODO: if plan is empty for a plugin, only migrate downloaded content pre_migrate_all_without_content(plan, type_to_repo_ids, repo_id_to_type) pre_migrate_all_content(plan) mark_removed_resources(plan, type_to_repo_ids) delete_old_resources(plan) migrate_repositories(plan) migrate_importers(plan) migrate_content(plan) create_repoversions_publications_distributions(plan) task_group.finish()