def create_repoversions_publications_distributions(plan, parallel=True): """ A coroutine to create repository versions. Content to a repo version is added based on pre-migrated RepoContentUnit and info provided in the migration plan. Args: plan (MigrationPlan): Migration Plan to use. Kwargs: parallel (bool): If True, attempt to migrate things in parallel where possible. """ for plugin in plan.get_plugin_plans(): pulp3_repo_setup = plugin.get_repo_creation_setup() if not pulp3_repo_setup: task_func = simple_plugin_migration task_args = [plugin] task_func(*task_args) else: task_func = complex_repo_migration repo_ver_to_create = 0 dist_to_create = 0 if parallel: for repo_name in pulp3_repo_setup: repo_versions = pulp3_repo_setup[repo_name][ 'repository_versions'] repo_ver_to_create += len(repo_versions) for repo_ver in repo_versions: dist_to_create += len(repo_ver['dist_repo_ids']) repo = Repository.objects.get(name=repo_name).cast() task_args = [plugin, pulp3_repo_setup, repo_name] enqueue_with_reservation(task_func, [repo], args=task_args, task_group=TaskGroup.current()) task_group = TaskGroup.current() progress_rv = task_group.group_progress_reports.filter( code='create.repo_version') progress_rv.update(total=F('total') + repo_ver_to_create) progress_dist = task_group.group_progress_reports.filter( code='create.distribution') progress_dist.update(total=F('total') + dist_to_create) else: # Serial (non-parallel) for repo_name in pulp3_repo_setup: task_args = [plugin, pulp3_repo_setup, repo_name] task_func(*task_args)
def simple_plugin_migration(plugin): """Migrate everything for a given plugin. Args: plugin: Plugin object pb: A progress report object """ distributor_migrators = plugin.migrator.distributor_migrators distributor_types = list(plugin.migrator.distributor_migrators.keys()) pulp2distributors_qs = Pulp2Distributor.objects.filter( pulp3_distribution=None, pulp3_publication=None, not_in_plan=False, pulp2_type_id__in=distributor_types) repos_to_migrate = Pulp2Repository.objects.filter( pulp2_repo_type=plugin.type, not_in_plan=False) task_group = TaskGroup.current() # find appropriate group_progress_reports that later will be updated progress_dist = task_group.group_progress_reports.filter( code='create.distribution') progress_dist.update(total=F('total') + pulp2distributors_qs.count()) progress_rv = task_group.group_progress_reports.filter( code='create.repo_version') progress_rv.update(total=F('total') + repos_to_migrate.count()) for pulp2_repo in repos_to_migrate: # Create one repo version for each pulp 2 repo if needed. create_repo_version(plugin.migrator, progress_rv, pulp2_repo) for pulp2_dist in pulp2distributors_qs: dist_migrator = distributor_migrators.get(pulp2_dist.pulp2_type_id) migrate_repo_distributor(dist_migrator, progress_dist, pulp2_dist)
def migrate_from_pulp2( migration_plan_pk, validate=False, dry_run=False, skip_corrupted=False ): """ Main task to migrate from Pulp 2 to Pulp 3. Schedule other tasks based on the specified Migration Plan. Args: migration_plan_pk (str): The migration plan PK. validate (bool): If True, don't migrate unless validation is successful. dry_run (bool): If True, nothing is migrated, only validation happens. skip_corrupted (bool): If True, corrupted content is skipped during migration, no task failure. """ # MongoDB connection initialization connection.initialize() plan = MigrationPlan.objects.get(pk=migration_plan_pk) missing_resources = plan.get_missing_resources() if (validate or dry_run) and missing_resources: raise PlanValidationError( "Validation failed: resources missing {}".format(missing_resources) ) if dry_run: return task_group = TaskGroup(description="Migration Sub-tasks") task_group.save() GroupProgressReport( message="Repo version creation", code="create.repo_version", task_group=task_group, ).save() GroupProgressReport( message="Distribution creation", code="create.distribution", task_group=task_group, ).save() current_task = Task.current() current_task.task_group = task_group current_task.save() resource = CreatedResource(content_object=task_group) resource.save() # TODO: if plan is empty for a plugin, only migrate downloaded content pre_migrate_all_without_content(plan) pre_migrate_all_content(plan) handle_outdated_resources(plan) migrate_repositories(plan) migrate_importers(plan) migrate_content(plan, skip_corrupted=skip_corrupted) create_repoversions_publications_distributions(plan) task_group.finish()
def migrate_from_pulp2(migration_plan_pk, validate=False, dry_run=False): """ Main task to migrate from Pulp 2 to Pulp 3. Schedule other tasks based on the specified Migration Plan. Args: migration_plan_pk (str): The migration plan PK. validate (bool): If True, don't migrate unless validation is successful. dry_run (bool): If True, nothing is migrated, only validation happens. """ def get_repo_types(plan): """ Create mappings for pulp 2 repository types. Identify type by inspecting content of a repo. One mapping is repo_id -> repo_type, the other is repo_type -> list of repo_ids. It's used later during pre-migration and identification of removed repos from pulp 2 Args: plan(MigrationPlan): A Migration Plan Returns: repo_id_to_type(dict): mapping from a pulp 2 repo_id to a plugin/repo type type_to_repo_ids(dict): mapping from a plugin/repo type to the list of repo_ids """ repo_id_to_type = {} type_to_repo_ids = defaultdict(set) # mapping content type -> plugin/repo type, e.g. 'docker_blob' -> 'docker' content_type_to_plugin = {} for plugin in plan.get_plugin_plans(): for content_type in plugin.migrator.pulp2_content_models: content_type_to_plugin[ content_type] = plugin.migrator.pulp2_plugin repos = set(plugin.get_repositories()) repos |= set(plugin.get_importers_repos()) repos |= set(plugin.get_distributors_repos()) for repo in repos: repo_id_to_type[repo] = plugin.type type_to_repo_ids[plugin.type].update(repos) # TODO: optimizations. # It looks at each content at the moment. Potential optimizations: # - This is a big query, paginate? # - Filter by repos from the plan # - Query any but one record for a repo for rec in RepositoryContentUnit.objects().\ only('repo_id', 'unit_type_id').as_pymongo().no_cache(): repo_id = rec['repo_id'] unit_type_id = rec['unit_type_id'] # a type for a repo is already known or this content/repo type is not supported if repo_id in repo_id_to_type or unit_type_id not in content_type_to_plugin: continue plugin_name = content_type_to_plugin[unit_type_id] repo_id_to_type[repo_id] = plugin_name type_to_repo_ids[plugin_name].add(repo_id) return repo_id_to_type, type_to_repo_ids # MongoDB connection initialization connection.initialize() plan = MigrationPlan.objects.get(pk=migration_plan_pk) missing_resources = plan.get_missing_resources() if (validate or dry_run) and missing_resources: raise PlanValidationError( "Validation failed: resources missing {}".format( missing_resources)) if dry_run: return task_group = TaskGroup(description="Migration Sub-tasks") task_group.save() GroupProgressReport(message="Repo version creation", code="create.repo_version", task_group=task_group).save() GroupProgressReport(message="Distribution creation", code="create.distribution", task_group=task_group).save() current_task = Task.current() current_task.task_group = task_group current_task.save() resource = CreatedResource(content_object=task_group) resource.save() # call it here and not inside steps below to generate mapping only once repo_id_to_type, type_to_repo_ids = get_repo_types(plan) # TODO: if plan is empty for a plugin, only migrate downloaded content pre_migrate_all_without_content(plan, type_to_repo_ids, repo_id_to_type) pre_migrate_all_content(plan) mark_removed_resources(plan, type_to_repo_ids) delete_old_resources(plan) migrate_repositories(plan) migrate_importers(plan) migrate_content(plan) create_repoversions_publications_distributions(plan) task_group.finish()
def create_repoversions_publications_distributions(plan, parallel=True): """ A coroutine to create repository versions. Content to a repo version is added based on pre-migrated RepoContentUnit and info provided in the migration plan. Args: plan (MigrationPlan): Migration Plan to use. Kwargs: parallel (bool): If True, attempt to migrate things in parallel where possible. """ for plugin in plan.get_plugin_plans(): # verify whether all pulp2 repos and distributors have been migrated not_migrated_repos = Pulp2Repository.objects.filter( is_migrated=False, not_in_plan=False, pulp2_repo_type=plugin.type) not_migrated_dists = Pulp2Distributor.objects.filter( is_migrated=False, not_in_plan=False, pulp2_type_id__in=plugin.migrator.distributor_migrators.keys()) # no need to proceed - everything is migrated if not not_migrated_repos and not not_migrated_dists: continue not_migrated_repo_ids = not_migrated_repos.values_list('pulp2_repo_id', flat=True) not_migrated_repo_ids_dists = not_migrated_dists.values_list( 'pulp2_repo_id', flat=True) repos_ids_to_check = set(not_migrated_repo_ids).union( not_migrated_repo_ids_dists) pulp3_repo_setup = plugin.get_repo_creation_setup() repo_ver_to_create = 0 dist_to_create = 0 if parallel: for repo_name in pulp3_repo_setup: repo_versions = pulp3_repo_setup[repo_name][ 'repository_versions'] needs_a_task = False for repo_ver in repo_versions: repos = set(repo_ver['dist_repo_ids'] + [repo_ver['repo_id']]) # check whether any resources are not migrated and need a task if repos.intersection(repos_ids_to_check): needs_a_task = True dist_to_create += len(repo_ver['dist_repo_ids']) if needs_a_task: repo_ver_to_create += len(repo_versions) repo = Repository.objects.get(name=repo_name).cast() task_args = [plugin, pulp3_repo_setup, repo_name] enqueue_with_reservation(complex_repo_migration, [repo], args=task_args, task_group=TaskGroup.current()) else: # Serial (non-parallel) for repo_name in pulp3_repo_setup: repo_versions = pulp3_repo_setup[repo_name][ 'repository_versions'] needs_a_task = False for repo_ver in repo_versions: repos = set(repo_ver['dist_repo_ids'] + [repo_ver['repo_id']]) # check whether any resources are not migrated and need a task if repos.intersection(repos_ids_to_check): needs_a_task = True if needs_a_task: task_args = [plugin, pulp3_repo_setup, repo_name] complex_repo_migration(*task_args) task_group = TaskGroup.current() progress_rv = task_group.group_progress_reports.filter( code='create.repo_version') progress_rv.update(total=F('total') + repo_ver_to_create) progress_dist = task_group.group_progress_reports.filter( code='create.distribution') progress_dist.update(total=F('total') + dist_to_create)
def complex_repo_migration(plugin, pulp3_repo_setup, repo_name): """Perform a complex migration for a particular repo using the repo setup config. Create all repository versions, publications, distributions. Args: plugin: Plugin object pulp3_repo_setup: Pulp 3 repo setup config for a plugin repo_name: Name of the repo to be migrated """ distributor_migrators = plugin.migrator.distributor_migrators distributor_types = list(plugin.migrator.distributor_migrators.keys()) repo_versions_setup = pulp3_repo_setup[repo_name]['repository_versions'] # importer might not be migrated, e.g. config is empty or it's not specified in a MP pulp3_remote = None pulp2_importer_repo_id = pulp3_repo_setup[repo_name].get( 'pulp2_importer_repository_id') if pulp2_importer_repo_id: try: pulp2_importer = Pulp2Importer.objects.get( pulp2_repo_id=pulp2_importer_repo_id, not_in_plan=False) pulp3_remote = pulp2_importer.pulp3_remote except Pulp2Importer.DoesNotExist: pass task_group = TaskGroup.current() # find appropriate group_progress_reports that later will be updated progress_dist = task_group.group_progress_reports.filter( code='create.distribution') progress_rv = task_group.group_progress_reports.filter( code='create.repo_version') for pulp2_repo_info in repo_versions_setup: try: pulp2_repo = Pulp2Repository.objects.get( pulp2_repo_id=pulp2_repo_info['repo_id'], not_in_plan=False) except Pulp2Repository.DoesNotExist: # not in Pulp 2 anymore continue else: # it's possible to have a random order of the repo versions (after migration # re-run, a repo can be changed in pulp 2 and it might not be for the last # repo version) create_repo_version(progress_rv, pulp2_repo, pulp3_remote) for pulp2_repo_info in repo_versions_setup: # find pulp2repo by id repo_id = pulp2_repo_info['repo_id'] dist_repositories = pulp2_repo_info['dist_repo_ids'] try: migrated_repo = Pulp2Repository.objects.get(pulp2_repo_id=repo_id, not_in_plan=False, is_migrated=True) except Pulp2Repository.DoesNotExist: # not in Pulp 2 anymore continue else: pulp2dist = Pulp2Distributor.objects.filter( is_migrated=False, not_in_plan=False, pulp2_repo_id__in=dist_repositories, pulp2_type_id__in=distributor_types, ) # decrease the number of total because some dists have already been migrated decrease_total = len(dist_repositories) - len(pulp2dist) if decrease_total: progress_dist.update(total=F('total') - decrease_total) for dist in pulp2dist: dist_migrator = distributor_migrators.get(dist.pulp2_type_id) migrate_repo_distributor( dist_migrator, progress_dist, dist, migrated_repo.pulp3_repository_version) # add distirbutors specified in the complex plan # these can be native and not native distributors migrated_repo.pulp2_dists.add(dist)
def complex_repo_migration(plugin_type, pulp3_repo_setup, repo_name): """Perform a complex migration for a particular repo using the repo setup config. Create all repository versions, publications, distributions. Args: plugin_type(str): Plugin type pulp3_repo_setup: Pulp 3 repo setup config for a plugin repo_name: Name of the repo to be migrated """ from pulp_2to3_migration.app.plugin import PLUGIN_MIGRATORS migrator = PLUGIN_MIGRATORS.get(plugin_type) distributor_migrators = migrator.distributor_migrators distributor_types = list(distributor_migrators.keys()) repo_versions_setup = pulp3_repo_setup[repo_name]["repository_versions"] signing_service = None signing_service_name = pulp3_repo_setup[repo_name].get("signing_service") if signing_service_name: _logger.info( "Signing Service %r requested for %r", signing_service_name, repo_name ) try: signing_service = SigningService.objects.get(name=signing_service_name) except SigningService.DoesNotExist: _logger.warning( "Could not find signing-service named %r", signing_service_name ) raise # importer might not be migrated, e.g. config is empty or it's not specified in a MP pulp3_remote = None pulp2_importer_repo_id = pulp3_repo_setup[repo_name].get( "pulp2_importer_repository_id" ) if pulp2_importer_repo_id: try: pulp2_importer = Pulp2Importer.objects.get( pulp2_repo_id=pulp2_importer_repo_id, not_in_plan=False ) pulp3_remote = pulp2_importer.pulp3_remote except Pulp2Importer.DoesNotExist: pass task_group = TaskGroup.current() # find appropriate group_progress_reports that later will be updated progress_dist = task_group.group_progress_reports.filter(code="create.distribution") progress_rv = task_group.group_progress_reports.filter(code="create.repo_version") for pulp2_repo_info in repo_versions_setup: try: pulp2_repo = Pulp2Repository.objects.get( pulp2_repo_id=pulp2_repo_info["repo_id"], not_in_plan=False ) except Pulp2Repository.DoesNotExist: # not in Pulp 2 anymore continue else: # it's possible to have a random order of the repo versions (after migration # re-run, a repo can be changed in pulp 2 and it might not be for the last # repo version) create_repo_version(progress_rv, pulp2_repo, pulp3_remote) for pulp2_repo_info in repo_versions_setup: # find pulp2repo by id repo_id = pulp2_repo_info["repo_id"] dist_repositories = pulp2_repo_info["dist_repo_ids"] try: migrated_repo = Pulp2Repository.objects.get( pulp2_repo_id=repo_id, not_in_plan=False, is_migrated=True ) except Pulp2Repository.DoesNotExist: # not in Pulp 2 anymore continue else: pulp2dist = Pulp2Distributor.objects.filter( is_migrated=False, not_in_plan=False, pulp2_repo_id__in=dist_repositories, pulp2_type_id__in=distributor_types, ) # decrease the number of total because some dists have already been migrated decrease_total = len(dist_repositories) - len(pulp2dist) if decrease_total: progress_dist.update(total=F("total") - decrease_total) for dist in pulp2dist: dist_migrator = distributor_migrators.get(dist.pulp2_type_id) migrate_repo_distributor( dist_migrator, progress_dist, dist, migrated_repo.pulp3_repository_version, signing_service, ) # add distirbutors specified in the complex plan # these can be native and not native distributors migrated_repo.pulp2_dists.add(dist)