def pre_migrate_all_without_content(plan):
    """
    Pre-migrate repositories, relations to their contents, importers and distributors.

    Look at the last updated times in the pulp2to3 tables for repositories/importers/distributors:
     * pulp2_last_unit_added or pulp2_last_unit_removed for repositories
     * pulp2_last_updated for importers and distributors

    Query empty-never-had-content repos (can't filter them out in any way) and repos for which
    there were:
     * content changes since the last run
     * importer changes since the last run
     * distributor changes since the last run

    Query in order of last_unit_added for the case when pre-migration is interrupted before we are
    done with repositories.

    Args:
        plan(MigrationPlan): A Migration Plan
    """

    _logger.debug('Pre-migrating Pulp 2 repositories')

    with ProgressReport(
            message='Processing Pulp 2 repositories, importers, distributors',
            code='processing.repositories',
            total=0) as pb:

        for plugin_plan in plan.get_plugin_plans():
            repos = plugin_plan.get_repositories()
            importers_repos = plugin_plan.get_importers_repos()
            distributors_repos = plugin_plan.get_distributors_repos()

            importer_types = list(
                plugin_plan.migrator.importer_migrators.keys())
            distributor_migrators = plugin_plan.migrator.distributor_migrators
            distributor_types = list(distributor_migrators.keys())

            # figure out which repos/importers/distributors have been updated since the last run
            epoch = datetime.utcfromtimestamp(0)
            repo_type_q = Q(pulp2_repo_type=plugin_plan.type)
            imp_type_q = Q(pulp2_type_id__in=importer_types)
            dist_type_q = Q(pulp2_type_id__in=distributor_types)

            plugin_pulp2repos = Pulp2Repository.objects.filter(repo_type_q)
            repo_premigrated_last_by_added = plugin_pulp2repos.aggregate(
                Max('pulp2_last_unit_added')
            )['pulp2_last_unit_added__max'] or epoch
            repo_premigrated_last_by_removed = plugin_pulp2repos.aggregate(
                Max('pulp2_last_unit_removed')
            )['pulp2_last_unit_removed__max'] or epoch
            imp_premigrated_last = Pulp2Importer.objects.filter(
                imp_type_q).aggregate(Max(
                    'pulp2_last_updated'))['pulp2_last_updated__max'] or epoch
            dist_premigrated_last = Pulp2Distributor.objects.filter(
                dist_type_q).aggregate(Max(
                    'pulp2_last_updated'))['pulp2_last_updated__max'] or epoch

            is_content_added_q = mongo_Q(
                last_unit_added__gte=repo_premigrated_last_by_added)
            is_content_removed_q = mongo_Q(
                last_unit_removed__gte=repo_premigrated_last_by_removed)
            is_new_enough_repo_q = is_content_added_q | is_content_removed_q
            is_empty_repo_q = mongo_Q(last_unit_added__exists=False)
            is_new_enough_imp_q = mongo_Q(
                last_updated__gte=imp_premigrated_last)
            is_new_enough_dist_q = mongo_Q(
                last_updated__gte=dist_premigrated_last)
            repo_repo_id_q = mongo_Q(repo_id__in=repos)
            imp_repo_id_q = mongo_Q(repo_id__in=importers_repos)
            dist_repo_id_q = mongo_Q(repo_id__in=distributors_repos)

            updated_importers = Importer.objects(
                imp_repo_id_q & is_new_enough_imp_q).only('repo_id')
            updated_imp_repos = set(imp.repo_id for imp in updated_importers)
            updated_distributors = Distributor.objects(
                dist_repo_id_q & is_new_enough_dist_q).only('repo_id')
            updated_dist_repos = set(dist.repo_id
                                     for dist in updated_distributors)
            updated_impdist_repos = updated_imp_repos | updated_dist_repos

            mongo_updated_repo_q = repo_repo_id_q & (is_new_enough_repo_q
                                                     | is_empty_repo_q)
            mongo_updated_imp_dist_repo_q = mongo_Q(
                repo_id__in=updated_impdist_repos)

            mongo_repo_qs = Repository.objects(
                mongo_updated_repo_q
                | mongo_updated_imp_dist_repo_q).order_by('last_unit_added')

            pb.total += mongo_repo_qs.count()
            pb.save()

            for repo_data in mongo_repo_qs.only('id', 'repo_id',
                                                'last_unit_added',
                                                'last_unit_removed',
                                                'description'):
                repo_id = repo_data.repo_id
                with transaction.atomic():
                    if repo_id in repos:
                        pre_migrate_repo(repo_data, plan.repo_id_to_type)
                    if repo_id in importers_repos:
                        pre_migrate_importer(repo_id, importer_types)
                    if repo_id in distributors_repos:
                        pre_migrate_distributor(repo_id, distributor_migrators)
                    pb.increment()
Exemple #2
0
def handle_outdated_resources(plan):
    """
    Marks repositories, importers, distributors which are no longer present in Pulp2.

    Delete Publications and Distributions which are no longer present in Pulp2.

    Args:
        plan(MigrationPlan): A Migration Plan
    """
    plugins = plan.get_plugins()
    RepoSetup.mark_changed_relations(plugins)
    RepoSetup.finalize(plugins)

    for plugin_plan in plan.get_plugin_plans():
        inplan_repos = plugin_plan.get_repositories()

        # filter by repo type and by the repos specified in a plan
        repos_to_consider = plan.type_to_repo_ids[plugin_plan.type]
        repos_to_consider = set(inplan_repos).intersection(repos_to_consider)

        mongo_repo_q = mongo_Q(repo_id__in=repos_to_consider)
        mongo_repo_obj_ids = set(
            str(i.id) for i in Repository.objects(mongo_repo_q).only("id"))

        repo_type_q = Q(pulp2_repo_type=plugin_plan.type)
        inplan_repo_q = Q(pulp2_object_id__in=mongo_repo_obj_ids)
        Pulp2Repository.objects.filter(repo_type_q).exclude(
            inplan_repo_q).update(not_in_plan=True)

        # Mark removed or excluded importers
        inplan_imp_repos = plugin_plan.get_importers_repos()
        mongo_imp_q = mongo_Q(repo_id__in=inplan_imp_repos)
        mongo_imp_obj_ids = set(
            str(i.id) for i in Importer.objects(mongo_imp_q).only("id"))
        imp_types = plugin_plan.migrator.importer_migrators.keys()

        imp_type_q = Q(pulp2_type_id__in=imp_types)
        inplan_imp_q = Q(pulp2_object_id__in=mongo_imp_obj_ids)
        Pulp2Importer.objects.filter(imp_type_q).exclude(inplan_imp_q).update(
            not_in_plan=True)

        # Mark removed or excluded distributors
        inplan_dist_repos = plugin_plan.get_distributors_repos()
        mongo_dist_q = mongo_Q(repo_id__in=inplan_dist_repos)
        mongo_dist_obj_ids = set(
            str(i.id) for i in Distributor.objects(mongo_dist_q).only("id"))
        dist_types = plugin_plan.migrator.distributor_migrators.keys()

        dist_type_q = Q(pulp2_type_id__in=dist_types)
        inplan_dist_q = Q(pulp2_object_id__in=mongo_dist_obj_ids)
        Pulp2Distributor.objects.filter(dist_type_q).exclude(
            inplan_dist_q).update(not_in_plan=True)

    # Delete old Publications/Distributions which are no longer present in Pulp2.

    # It's critical to remove Distributions to avoid base_path overlap.
    # It makes the migration logic easier if we remove old Publications as well.

    # Delete criteria:
    #     - pulp2distributor is no longer in plan
    #     - pulp2repository content changed (repo.is_migrated=False) or it is no longer in plan

    repos_with_old_distributions_qs = Pulp2Repository.objects.filter(
        Q(is_migrated=False) | Q(not_in_plan=True))

    old_dist_query = Q(pulp3_distribution__isnull=False) | Q(
        pulp3_publication__isnull=False)
    old_dist_query &= Q(pulp2_repos__in=repos_with_old_distributions_qs) | Q(
        not_in_plan=True)

    with transaction.atomic():
        pulp2distributors_with_old_distributions_qs = Pulp2Distributor.objects.filter(
            old_dist_query)

        pulp2distributors_with_old_distributions_qs.update(is_migrated=False)

        # If publication is shared by multiple distributions, on the corresponding distributors
        # flip the flag to false so the affected distributions will be updated with the new
        # publication
        Pulp2Distributor.objects.filter(
            pulp3_publication__in=Publication.objects.filter(
                pulp2distributor__in=pulp2distributors_with_old_distributions_qs
            )).update(is_migrated=False)

        # Delete outdated publications
        Publication.objects.filter(
            pulp2distributor__in=pulp2distributors_with_old_distributions_qs
        ).delete()

        # Delete outdated distributions
        Distribution.objects.filter(
            pulp2distributor__in=pulp2distributors_with_old_distributions_qs
        ).delete()

        # Remove relations to the pulp2repository in case the relation changed.
        # Pulp2Distributors with is_migrated=false is handled and re-added properly at
        # migration stage.
        # NOTE: this needs to be removed last, the queries above use this relation.
        not_migrated_dists = Pulp2Distributor.objects.filter(
            is_migrated=False).only("pulp_id")
        Pulp2Distributor.pulp2_repos.through.objects.filter(
            pulp2distributor__in=not_migrated_dists).delete()
Exemple #3
0
def pre_migrate_distributor(repo_id, distributor_migrators):
    """
    Pre-migrate a pulp 2 distributor.

    Args:
        repo_id(str): An id of a pulp 2 repository which distributor should be migrated
        distributor_migrators(dict): supported distributor types and their models for migration
    """
    distributor_types = list(distributor_migrators.keys())
    mongo_distributor_q = mongo_Q(repo_id=repo_id,
                                  distributor_type_id__in=distributor_types)

    mongo_distributor_qs = Distributor.objects(mongo_distributor_q)
    if not mongo_distributor_qs:
        # Either the distributor no longer exists in Pulp2,
        # or it was filtered out by the Migration Plan,
        # or it has an empty config
        return

    for dist_data in mongo_distributor_qs:
        last_updated = dist_data.last_updated and timezone.make_aware(
            dist_data.last_updated, timezone.utc)

        distributor, created = Pulp2Distributor.objects.get_or_create(
            pulp2_object_id=dist_data.id,
            defaults={
                "pulp2_id": dist_data.distributor_id,
                "pulp2_type_id": dist_data.distributor_type_id,
                "pulp2_last_updated": last_updated,
                "pulp2_config": dist_data.config,
                "pulp2_repo_id": repo_id,
                "is_migrated": False,
            },
        )

        if not created:
            # if it was marked as such because it was not present in the migration plan
            distributor.not_in_plan = False

            if last_updated != distributor.pulp2_last_updated:
                distributor.pulp2_config = dist_data.config
                distributor.pulp2_last_updated = last_updated
                distributor.is_migrated = False
                dist_migrator = distributor_migrators.get(
                    distributor.pulp2_type_id)
                needs_new_publication = dist_migrator.needs_new_publication(
                    distributor)
                needs_new_distribution = dist_migrator.needs_new_distribution(
                    distributor)
                remove_publication = (needs_new_publication
                                      and distributor.pulp3_publication)
                remove_distribution = (needs_new_distribution
                                       and distributor.pulp3_distribution)

                if remove_publication:
                    # check if publication is shared by multiple distributions
                    # on the corresponding distributor flip the flag to false so the affected
                    # distribution will be updated with the new publication
                    pulp2dists = (distributor.pulp3_publication.
                                  pulp2distributor_set.all())
                    for dist in pulp2dists:
                        if dist.is_migrated:
                            dist.is_migrated = False
                            dist.save()
                    distributor.pulp3_publication.delete()
                    distributor.pulp3_publication = None
                if remove_publication or remove_distribution:
                    distributor.pulp3_distribution.delete()
                    distributor.pulp3_distribution = None

            distributor.save()
Exemple #4
0
def pre_migrate_importer(repo_id, importer_types):
    """
    Pre-migrate a pulp 2 importer.

    Args:
        repo_id(str): An id of a pulp 2 repository which importer should be migrated
        importer_types(list): a list of supported importer types
    """
    mongo_importer_q = mongo_Q(repo_id=repo_id,
                               importer_type_id__in=importer_types)

    # importers with empty config are not needed - nothing to migrate
    mongo_importer_q &= mongo_Q(config__exists=True) & mongo_Q(config__ne={})

    mongo_importer_qs = Importer.objects(mongo_importer_q)
    if not mongo_importer_qs:
        # Either the importer no longer exists in Pulp2,
        # or it was filtered out by the Migration Plan,
        # or it has an empty config
        return

    importer_data = mongo_importer_qs.only("id", "repo_id", "importer_type_id",
                                           "last_updated", "config").first()

    if not importer_data.config.get("feed"):
        # Pulp 3 remotes require URL
        msg = "Importer from {repo} cannot be migrated because it does not have a feed".format(
            repo=repo_id)
        _logger.warn(msg)
        return

    last_updated = importer_data.last_updated and timezone.make_aware(
        importer_data.last_updated, timezone.utc)

    importer, created = Pulp2Importer.objects.get_or_create(
        pulp2_object_id=importer_data.id,
        defaults={
            "pulp2_type_id": importer_data.importer_type_id,
            "pulp2_last_updated": last_updated,
            "pulp2_config": importer_data.config,
            "pulp2_repo_id": repo_id,
            "is_migrated": False,
        },
    )

    if not created:
        # if it was marked as such because it was not present in the migration plan
        importer.not_in_plan = False
        # check if there were any changes since last time
        if last_updated != importer.pulp2_last_updated:
            # remove Remote in case of feed change
            if importer.pulp2_config.get("feed") != importer_data.config.get(
                    "feed"):
                if importer.pulp3_remote:
                    importer.pulp3_remote.delete()
                importer.pulp3_remote = None
                # do not flip is_migrated to False for LCE for at least once migrated importer

            importer.pulp2_last_updated = last_updated
            importer.pulp2_config = importer_data.config
            importer.is_migrated = False
        importer.save()
async def pre_migrate_all_without_content(plan):
    """
    Pre-migrate repositories, relations to their contents, importers and distributors.

    NOTE: MongoDB and Django handle datetime fields differently. MongoDB doesn't care about
    timezones and provides "naive" time, while Django is complaining about time without a timezone.
    The problem is that naive time != time with specified timezone, that's why all the time for
    MongoDB comparisons should be naive and all the time for Django/PostgreSQL should be timezone
    aware.

    Args:
        plan(MigrationPlan): A Migration Plan
    """
    repos = plan.get_repositories()
    importers = plan.get_importers()
    distributors = plan.get_distributors()

    _logger.debug('Pre-migrating Pulp 2 repositories')

    # the latest time we have in the migration tool in Pulp2Repository table
    zero_datetime = timezone.make_aware(datetime(1970, 1, 1), timezone.utc)
    last_added = Pulp2Repository.objects.aggregate(Max('pulp2_last_unit_added'))[
        'pulp2_last_unit_added__max'] or zero_datetime
    last_removed = Pulp2Repository.objects.aggregate(Max('pulp2_last_unit_removed'))[
        'pulp2_last_unit_removed__max'] or zero_datetime
    last_updated = max(last_added, last_removed)
    last_updated_naive = timezone.make_naive(last_updated, timezone=timezone.utc)

    with ProgressReport(message='Pre-migrating Pulp 2 repositories, importers, distributors',
                        code='premigrating.repositories') as pb:
        # we pre-migrate:
        #  - empty repos (last_unit_added is not set)
        #  - repos which were updated since last migration (last_unit_added/removed >= last_updated)
        mongo_repo_q = (mongo_Q(last_unit_added__exists=False)
                        | mongo_Q(last_unit_added__gte=last_updated_naive)
                        | mongo_Q(last_unit_removed__gte=last_updated_naive))

        # in case only certain repositories are specified in the migration plan
        if repos:
            mongo_repo_q &= mongo_Q(repo_id__in=repos)

        # filter repo type
        repo_types = [f'{type}-repo' for type in plan.get_plugins()]
        mongo_repo_q &= mongo_Q(__raw__={"notes._repo-type": {'$in': repo_types}})

        mongo_repo_qs = Repository.objects(mongo_repo_q)
        pb.total = mongo_repo_qs.count()
        pb.save()

        for repo_data in mongo_repo_qs.only('id',
                                            'repo_id',
                                            'last_unit_added',
                                            'last_unit_removed',
                                            'description',
                                            'notes'):

            with transaction.atomic():
                repo = await pre_migrate_repo(repo_data)
                await pre_migrate_importer(repo, importers)
                await pre_migrate_distributor(repo, distributors)
                await pre_migrate_repocontent(repo)
            pb.increment()
Exemple #6
0
async def mark_removed_resources(plan, type_to_repo_ids):
    """
    Marks repositories, importers and distributors which are no longer present in Pulp2.

    Args:
        plan(MigrationPlan): A Migration Plan
        type_to_repo_ids(dict): A mapping from a pulp 2 repo type to a list of pulp 2 repo_ids
    """
    repos = plan.get_repositories()

    # filter by repo type
    repos_to_consider = []

    for repo_type in plan.get_plugins():
        repos_to_consider += type_to_repo_ids[repo_type]

    # in case only certain repositories are specified in the migration plan
    if repos:
        repos_to_consider = set(repos).intersection(repos_to_consider)

    mongo_repo_q = mongo_Q(repo_id__in=repos_to_consider)

    mongo_repo_object_ids = set(
        str(i.id) for i in Repository.objects(mongo_repo_q).only('id'))

    psql_repo_types = plan.get_plugins()
    premigrated_repos = Pulp2Repository.objects.filter(
        type__in=psql_repo_types)
    premigrated_repo_object_ids = set(
        premigrated_repos.values_list('pulp2_object_id', flat=True))
    removed_repo_object_ids = premigrated_repo_object_ids - mongo_repo_object_ids

    removed_repos = []
    for pulp2repo in Pulp2Repository.objects.filter(
            pulp2_object_id__in=removed_repo_object_ids):
        pulp2repo.not_in_pulp2 = True
        removed_repos.append(pulp2repo)

    Pulp2Repository.objects.bulk_update(objs=removed_repos,
                                        fields=['not_in_pulp2'],
                                        batch_size=1000)

    # Mark importers
    mongo_imp_object_ids = set(str(i.id) for i in Importer.objects.only('id'))
    premigrated_imps = Pulp2Importer.objects.filter(
        pulp2_repository__type__in=psql_repo_types)
    premigrated_imp_object_ids = set(
        premigrated_imps.values_list('pulp2_object_id', flat=True))
    removed_imp_object_ids = premigrated_imp_object_ids - mongo_imp_object_ids

    removed_imps = []
    for pulp2importer in Pulp2Importer.objects.filter(
            pulp2_object_id__in=removed_imp_object_ids):
        pulp2importer.not_in_pulp2 = True
        removed_imps.append(pulp2importer)

    Pulp2Importer.objects.bulk_update(objs=removed_imps,
                                      fields=['not_in_pulp2'],
                                      batch_size=1000)

    # Mark distributors
    mongo_dist_object_ids = set(
        str(i.id) for i in Distributor.objects.only('id'))
    premigrated_dists = Pulp2Distributor.objects.filter(
        pulp2_repository__type__in=psql_repo_types)
    premigrated_dist_object_ids = set(
        premigrated_dists.values_list('pulp2_object_id', flat=True))
    removed_dist_object_ids = premigrated_dist_object_ids - mongo_dist_object_ids

    removed_dists = []
    for pulp2dist in Pulp2Distributor.objects.filter(
            pulp2_object_id__in=removed_dist_object_ids):
        pulp2dist.not_in_pulp2 = True
        removed_dists.append(pulp2dist)

    Pulp2Distributor.objects.bulk_update(objs=removed_dists,
                                         fields=['not_in_pulp2'],
                                         batch_size=1000)