def pre_migrate_all_without_content(plan): """ Pre-migrate repositories, relations to their contents, importers and distributors. Look at the last updated times in the pulp2to3 tables for repositories/importers/distributors: * pulp2_last_unit_added or pulp2_last_unit_removed for repositories * pulp2_last_updated for importers and distributors Query empty-never-had-content repos (can't filter them out in any way) and repos for which there were: * content changes since the last run * importer changes since the last run * distributor changes since the last run Query in order of last_unit_added for the case when pre-migration is interrupted before we are done with repositories. Args: plan(MigrationPlan): A Migration Plan """ _logger.debug('Pre-migrating Pulp 2 repositories') with ProgressReport( message='Processing Pulp 2 repositories, importers, distributors', code='processing.repositories', total=0) as pb: for plugin_plan in plan.get_plugin_plans(): repos = plugin_plan.get_repositories() importers_repos = plugin_plan.get_importers_repos() distributors_repos = plugin_plan.get_distributors_repos() importer_types = list( plugin_plan.migrator.importer_migrators.keys()) distributor_migrators = plugin_plan.migrator.distributor_migrators distributor_types = list(distributor_migrators.keys()) # figure out which repos/importers/distributors have been updated since the last run epoch = datetime.utcfromtimestamp(0) repo_type_q = Q(pulp2_repo_type=plugin_plan.type) imp_type_q = Q(pulp2_type_id__in=importer_types) dist_type_q = Q(pulp2_type_id__in=distributor_types) plugin_pulp2repos = Pulp2Repository.objects.filter(repo_type_q) repo_premigrated_last_by_added = plugin_pulp2repos.aggregate( Max('pulp2_last_unit_added') )['pulp2_last_unit_added__max'] or epoch repo_premigrated_last_by_removed = plugin_pulp2repos.aggregate( Max('pulp2_last_unit_removed') )['pulp2_last_unit_removed__max'] or epoch imp_premigrated_last = Pulp2Importer.objects.filter( imp_type_q).aggregate(Max( 'pulp2_last_updated'))['pulp2_last_updated__max'] or epoch dist_premigrated_last = Pulp2Distributor.objects.filter( dist_type_q).aggregate(Max( 'pulp2_last_updated'))['pulp2_last_updated__max'] or epoch is_content_added_q = mongo_Q( last_unit_added__gte=repo_premigrated_last_by_added) is_content_removed_q = mongo_Q( last_unit_removed__gte=repo_premigrated_last_by_removed) is_new_enough_repo_q = is_content_added_q | is_content_removed_q is_empty_repo_q = mongo_Q(last_unit_added__exists=False) is_new_enough_imp_q = mongo_Q( last_updated__gte=imp_premigrated_last) is_new_enough_dist_q = mongo_Q( last_updated__gte=dist_premigrated_last) repo_repo_id_q = mongo_Q(repo_id__in=repos) imp_repo_id_q = mongo_Q(repo_id__in=importers_repos) dist_repo_id_q = mongo_Q(repo_id__in=distributors_repos) updated_importers = Importer.objects( imp_repo_id_q & is_new_enough_imp_q).only('repo_id') updated_imp_repos = set(imp.repo_id for imp in updated_importers) updated_distributors = Distributor.objects( dist_repo_id_q & is_new_enough_dist_q).only('repo_id') updated_dist_repos = set(dist.repo_id for dist in updated_distributors) updated_impdist_repos = updated_imp_repos | updated_dist_repos mongo_updated_repo_q = repo_repo_id_q & (is_new_enough_repo_q | is_empty_repo_q) mongo_updated_imp_dist_repo_q = mongo_Q( repo_id__in=updated_impdist_repos) mongo_repo_qs = Repository.objects( mongo_updated_repo_q | mongo_updated_imp_dist_repo_q).order_by('last_unit_added') pb.total += mongo_repo_qs.count() pb.save() for repo_data in mongo_repo_qs.only('id', 'repo_id', 'last_unit_added', 'last_unit_removed', 'description'): repo_id = repo_data.repo_id with transaction.atomic(): if repo_id in repos: pre_migrate_repo(repo_data, plan.repo_id_to_type) if repo_id in importers_repos: pre_migrate_importer(repo_id, importer_types) if repo_id in distributors_repos: pre_migrate_distributor(repo_id, distributor_migrators) pb.increment()
def handle_outdated_resources(plan): """ Marks repositories, importers, distributors which are no longer present in Pulp2. Delete Publications and Distributions which are no longer present in Pulp2. Args: plan(MigrationPlan): A Migration Plan """ plugins = plan.get_plugins() RepoSetup.mark_changed_relations(plugins) RepoSetup.finalize(plugins) for plugin_plan in plan.get_plugin_plans(): inplan_repos = plugin_plan.get_repositories() # filter by repo type and by the repos specified in a plan repos_to_consider = plan.type_to_repo_ids[plugin_plan.type] repos_to_consider = set(inplan_repos).intersection(repos_to_consider) mongo_repo_q = mongo_Q(repo_id__in=repos_to_consider) mongo_repo_obj_ids = set( str(i.id) for i in Repository.objects(mongo_repo_q).only("id")) repo_type_q = Q(pulp2_repo_type=plugin_plan.type) inplan_repo_q = Q(pulp2_object_id__in=mongo_repo_obj_ids) Pulp2Repository.objects.filter(repo_type_q).exclude( inplan_repo_q).update(not_in_plan=True) # Mark removed or excluded importers inplan_imp_repos = plugin_plan.get_importers_repos() mongo_imp_q = mongo_Q(repo_id__in=inplan_imp_repos) mongo_imp_obj_ids = set( str(i.id) for i in Importer.objects(mongo_imp_q).only("id")) imp_types = plugin_plan.migrator.importer_migrators.keys() imp_type_q = Q(pulp2_type_id__in=imp_types) inplan_imp_q = Q(pulp2_object_id__in=mongo_imp_obj_ids) Pulp2Importer.objects.filter(imp_type_q).exclude(inplan_imp_q).update( not_in_plan=True) # Mark removed or excluded distributors inplan_dist_repos = plugin_plan.get_distributors_repos() mongo_dist_q = mongo_Q(repo_id__in=inplan_dist_repos) mongo_dist_obj_ids = set( str(i.id) for i in Distributor.objects(mongo_dist_q).only("id")) dist_types = plugin_plan.migrator.distributor_migrators.keys() dist_type_q = Q(pulp2_type_id__in=dist_types) inplan_dist_q = Q(pulp2_object_id__in=mongo_dist_obj_ids) Pulp2Distributor.objects.filter(dist_type_q).exclude( inplan_dist_q).update(not_in_plan=True) # Delete old Publications/Distributions which are no longer present in Pulp2. # It's critical to remove Distributions to avoid base_path overlap. # It makes the migration logic easier if we remove old Publications as well. # Delete criteria: # - pulp2distributor is no longer in plan # - pulp2repository content changed (repo.is_migrated=False) or it is no longer in plan repos_with_old_distributions_qs = Pulp2Repository.objects.filter( Q(is_migrated=False) | Q(not_in_plan=True)) old_dist_query = Q(pulp3_distribution__isnull=False) | Q( pulp3_publication__isnull=False) old_dist_query &= Q(pulp2_repos__in=repos_with_old_distributions_qs) | Q( not_in_plan=True) with transaction.atomic(): pulp2distributors_with_old_distributions_qs = Pulp2Distributor.objects.filter( old_dist_query) pulp2distributors_with_old_distributions_qs.update(is_migrated=False) # If publication is shared by multiple distributions, on the corresponding distributors # flip the flag to false so the affected distributions will be updated with the new # publication Pulp2Distributor.objects.filter( pulp3_publication__in=Publication.objects.filter( pulp2distributor__in=pulp2distributors_with_old_distributions_qs )).update(is_migrated=False) # Delete outdated publications Publication.objects.filter( pulp2distributor__in=pulp2distributors_with_old_distributions_qs ).delete() # Delete outdated distributions Distribution.objects.filter( pulp2distributor__in=pulp2distributors_with_old_distributions_qs ).delete() # Remove relations to the pulp2repository in case the relation changed. # Pulp2Distributors with is_migrated=false is handled and re-added properly at # migration stage. # NOTE: this needs to be removed last, the queries above use this relation. not_migrated_dists = Pulp2Distributor.objects.filter( is_migrated=False).only("pulp_id") Pulp2Distributor.pulp2_repos.through.objects.filter( pulp2distributor__in=not_migrated_dists).delete()
def pre_migrate_distributor(repo_id, distributor_migrators): """ Pre-migrate a pulp 2 distributor. Args: repo_id(str): An id of a pulp 2 repository which distributor should be migrated distributor_migrators(dict): supported distributor types and their models for migration """ distributor_types = list(distributor_migrators.keys()) mongo_distributor_q = mongo_Q(repo_id=repo_id, distributor_type_id__in=distributor_types) mongo_distributor_qs = Distributor.objects(mongo_distributor_q) if not mongo_distributor_qs: # Either the distributor no longer exists in Pulp2, # or it was filtered out by the Migration Plan, # or it has an empty config return for dist_data in mongo_distributor_qs: last_updated = dist_data.last_updated and timezone.make_aware( dist_data.last_updated, timezone.utc) distributor, created = Pulp2Distributor.objects.get_or_create( pulp2_object_id=dist_data.id, defaults={ "pulp2_id": dist_data.distributor_id, "pulp2_type_id": dist_data.distributor_type_id, "pulp2_last_updated": last_updated, "pulp2_config": dist_data.config, "pulp2_repo_id": repo_id, "is_migrated": False, }, ) if not created: # if it was marked as such because it was not present in the migration plan distributor.not_in_plan = False if last_updated != distributor.pulp2_last_updated: distributor.pulp2_config = dist_data.config distributor.pulp2_last_updated = last_updated distributor.is_migrated = False dist_migrator = distributor_migrators.get( distributor.pulp2_type_id) needs_new_publication = dist_migrator.needs_new_publication( distributor) needs_new_distribution = dist_migrator.needs_new_distribution( distributor) remove_publication = (needs_new_publication and distributor.pulp3_publication) remove_distribution = (needs_new_distribution and distributor.pulp3_distribution) if remove_publication: # check if publication is shared by multiple distributions # on the corresponding distributor flip the flag to false so the affected # distribution will be updated with the new publication pulp2dists = (distributor.pulp3_publication. pulp2distributor_set.all()) for dist in pulp2dists: if dist.is_migrated: dist.is_migrated = False dist.save() distributor.pulp3_publication.delete() distributor.pulp3_publication = None if remove_publication or remove_distribution: distributor.pulp3_distribution.delete() distributor.pulp3_distribution = None distributor.save()
def pre_migrate_importer(repo_id, importer_types): """ Pre-migrate a pulp 2 importer. Args: repo_id(str): An id of a pulp 2 repository which importer should be migrated importer_types(list): a list of supported importer types """ mongo_importer_q = mongo_Q(repo_id=repo_id, importer_type_id__in=importer_types) # importers with empty config are not needed - nothing to migrate mongo_importer_q &= mongo_Q(config__exists=True) & mongo_Q(config__ne={}) mongo_importer_qs = Importer.objects(mongo_importer_q) if not mongo_importer_qs: # Either the importer no longer exists in Pulp2, # or it was filtered out by the Migration Plan, # or it has an empty config return importer_data = mongo_importer_qs.only("id", "repo_id", "importer_type_id", "last_updated", "config").first() if not importer_data.config.get("feed"): # Pulp 3 remotes require URL msg = "Importer from {repo} cannot be migrated because it does not have a feed".format( repo=repo_id) _logger.warn(msg) return last_updated = importer_data.last_updated and timezone.make_aware( importer_data.last_updated, timezone.utc) importer, created = Pulp2Importer.objects.get_or_create( pulp2_object_id=importer_data.id, defaults={ "pulp2_type_id": importer_data.importer_type_id, "pulp2_last_updated": last_updated, "pulp2_config": importer_data.config, "pulp2_repo_id": repo_id, "is_migrated": False, }, ) if not created: # if it was marked as such because it was not present in the migration plan importer.not_in_plan = False # check if there were any changes since last time if last_updated != importer.pulp2_last_updated: # remove Remote in case of feed change if importer.pulp2_config.get("feed") != importer_data.config.get( "feed"): if importer.pulp3_remote: importer.pulp3_remote.delete() importer.pulp3_remote = None # do not flip is_migrated to False for LCE for at least once migrated importer importer.pulp2_last_updated = last_updated importer.pulp2_config = importer_data.config importer.is_migrated = False importer.save()
async def pre_migrate_all_without_content(plan): """ Pre-migrate repositories, relations to their contents, importers and distributors. NOTE: MongoDB and Django handle datetime fields differently. MongoDB doesn't care about timezones and provides "naive" time, while Django is complaining about time without a timezone. The problem is that naive time != time with specified timezone, that's why all the time for MongoDB comparisons should be naive and all the time for Django/PostgreSQL should be timezone aware. Args: plan(MigrationPlan): A Migration Plan """ repos = plan.get_repositories() importers = plan.get_importers() distributors = plan.get_distributors() _logger.debug('Pre-migrating Pulp 2 repositories') # the latest time we have in the migration tool in Pulp2Repository table zero_datetime = timezone.make_aware(datetime(1970, 1, 1), timezone.utc) last_added = Pulp2Repository.objects.aggregate(Max('pulp2_last_unit_added'))[ 'pulp2_last_unit_added__max'] or zero_datetime last_removed = Pulp2Repository.objects.aggregate(Max('pulp2_last_unit_removed'))[ 'pulp2_last_unit_removed__max'] or zero_datetime last_updated = max(last_added, last_removed) last_updated_naive = timezone.make_naive(last_updated, timezone=timezone.utc) with ProgressReport(message='Pre-migrating Pulp 2 repositories, importers, distributors', code='premigrating.repositories') as pb: # we pre-migrate: # - empty repos (last_unit_added is not set) # - repos which were updated since last migration (last_unit_added/removed >= last_updated) mongo_repo_q = (mongo_Q(last_unit_added__exists=False) | mongo_Q(last_unit_added__gte=last_updated_naive) | mongo_Q(last_unit_removed__gte=last_updated_naive)) # in case only certain repositories are specified in the migration plan if repos: mongo_repo_q &= mongo_Q(repo_id__in=repos) # filter repo type repo_types = [f'{type}-repo' for type in plan.get_plugins()] mongo_repo_q &= mongo_Q(__raw__={"notes._repo-type": {'$in': repo_types}}) mongo_repo_qs = Repository.objects(mongo_repo_q) pb.total = mongo_repo_qs.count() pb.save() for repo_data in mongo_repo_qs.only('id', 'repo_id', 'last_unit_added', 'last_unit_removed', 'description', 'notes'): with transaction.atomic(): repo = await pre_migrate_repo(repo_data) await pre_migrate_importer(repo, importers) await pre_migrate_distributor(repo, distributors) await pre_migrate_repocontent(repo) pb.increment()
async def mark_removed_resources(plan, type_to_repo_ids): """ Marks repositories, importers and distributors which are no longer present in Pulp2. Args: plan(MigrationPlan): A Migration Plan type_to_repo_ids(dict): A mapping from a pulp 2 repo type to a list of pulp 2 repo_ids """ repos = plan.get_repositories() # filter by repo type repos_to_consider = [] for repo_type in plan.get_plugins(): repos_to_consider += type_to_repo_ids[repo_type] # in case only certain repositories are specified in the migration plan if repos: repos_to_consider = set(repos).intersection(repos_to_consider) mongo_repo_q = mongo_Q(repo_id__in=repos_to_consider) mongo_repo_object_ids = set( str(i.id) for i in Repository.objects(mongo_repo_q).only('id')) psql_repo_types = plan.get_plugins() premigrated_repos = Pulp2Repository.objects.filter( type__in=psql_repo_types) premigrated_repo_object_ids = set( premigrated_repos.values_list('pulp2_object_id', flat=True)) removed_repo_object_ids = premigrated_repo_object_ids - mongo_repo_object_ids removed_repos = [] for pulp2repo in Pulp2Repository.objects.filter( pulp2_object_id__in=removed_repo_object_ids): pulp2repo.not_in_pulp2 = True removed_repos.append(pulp2repo) Pulp2Repository.objects.bulk_update(objs=removed_repos, fields=['not_in_pulp2'], batch_size=1000) # Mark importers mongo_imp_object_ids = set(str(i.id) for i in Importer.objects.only('id')) premigrated_imps = Pulp2Importer.objects.filter( pulp2_repository__type__in=psql_repo_types) premigrated_imp_object_ids = set( premigrated_imps.values_list('pulp2_object_id', flat=True)) removed_imp_object_ids = premigrated_imp_object_ids - mongo_imp_object_ids removed_imps = [] for pulp2importer in Pulp2Importer.objects.filter( pulp2_object_id__in=removed_imp_object_ids): pulp2importer.not_in_pulp2 = True removed_imps.append(pulp2importer) Pulp2Importer.objects.bulk_update(objs=removed_imps, fields=['not_in_pulp2'], batch_size=1000) # Mark distributors mongo_dist_object_ids = set( str(i.id) for i in Distributor.objects.only('id')) premigrated_dists = Pulp2Distributor.objects.filter( pulp2_repository__type__in=psql_repo_types) premigrated_dist_object_ids = set( premigrated_dists.values_list('pulp2_object_id', flat=True)) removed_dist_object_ids = premigrated_dist_object_ids - mongo_dist_object_ids removed_dists = [] for pulp2dist in Pulp2Distributor.objects.filter( pulp2_object_id__in=removed_dist_object_ids): pulp2dist.not_in_pulp2 = True removed_dists.append(pulp2dist) Pulp2Distributor.objects.bulk_update(objs=removed_dists, fields=['not_in_pulp2'], batch_size=1000)