Beispiel #1
0
    def create(self, profile_hash, repo_id, profile, applicability,
               all_profiles_hash):
        """
        Create and return a RepoProfileApplicability object.

        :param profile_hash:  The hash of the profile that is a part of the profile set of a
                              consumer
        :type  profile_hash:  basestring
        :param repo_id:       The repo ID that this applicability data is for
        :type  repo_id:       basestring
        :param profile:       The entire profile that resulted in the profile_hash
        :type  profile:       object
        :param applicability: A dictionary structure mapping unit type IDs to lists of applicable
                              Unit IDs.
        :type  applicability: dict
        :param all_profiles_hash: The hash of the set of the profiles that this applicability
                                  data is for
        :type  all_profiles_hash: basestring
        :return:              A new RepoProfileApplicability object
        :rtype:               pulp.server.db.model.consumer.RepoProfileApplicability
        """
        applicability = RepoProfileApplicability(
            profile_hash=profile_hash,
            repo_id=repo_id,
            profile=profile,
            applicability=applicability,
            all_profiles_hash=all_profiles_hash)
        applicability.save()
        return applicability
Beispiel #2
0
    def batch_regenerate_applicability(repo_id, existing_applicability_ids):
        """
        Regenerate and save applicability data for a batch of existing applicabilities

        :param repo_id: Repository id for which applicability is being calculated
        :type repo_id: str
        :param existing_applicability_ids: Tuple of Object Ids for applicability profiles
        :type existing_applicability_ids: tuple of dicts in form of {"_id": ObjectID('mongo-id')}
        """
        id_list = [id['_id'] for id in existing_applicability_ids]
        existing_applicabilities = RepoProfileApplicability.get_collection().find(
            {"_id": {"$in": id_list}})
        for existing_applicability in existing_applicabilities:
                # Convert cursor to RepoProfileApplicability object
            existing_applicability = RepoProfileApplicability(**dict(existing_applicability))
            profile_hash = existing_applicability['profile_hash']
            unit_profile = UnitProfile.get_collection().find_one({'profile_hash': profile_hash},
                                                                 fields=['id', 'content_type'])
            if unit_profile is None:
                # Unit profiles change whenever packages are installed or removed on consumers,
                # and it is possible that existing_applicability references a UnitProfile
                # that no longer exists. This is harmless, as Pulp has a monthly cleanup task
                # that will identify these dangling references and remove them.
                continue

            # Regenerate applicability data for given unit_profile and repo id
            ApplicabilityRegenerationManager.regenerate_applicability(
                profile_hash, unit_profile['content_type'], unit_profile['id'], repo_id,
                existing_applicability)
Beispiel #3
0
    def filter(self, query_params):
        """
        Get a list of RepoProfileApplicability objects with the given MongoDB query dict.

        :param query_params: A MongoDB query dictionary that selects RepoProfileApplicability
                             documents
        :type  query_params: dict
        :return:             A list of RepoProfileApplicability objects that match the given query
        :rtype:              list
        """
        collection = RepoProfileApplicability.get_collection()
        mongo_applicabilities = collection.find(query_params)
        applicabilities = [RepoProfileApplicability(**dict(applicability)) \
                           for applicability in mongo_applicabilities]
        return applicabilities
Beispiel #4
0
    def regenerate_applicability_for_repos(repo_criteria):
        """
        Regenerate and save applicability data affected by given updated repositories.

        :param repo_criteria: The repo selection criteria
        :type repo_criteria: dict
        """
        repo_criteria = Criteria.from_dict(repo_criteria)
        repo_query_manager = managers.repo_query_manager()

        # Process repo criteria
        repo_criteria.fields = ["id"]
        repo_ids = [r["id"] for r in repo_query_manager.find_by_criteria(repo_criteria)]

        for repo_id in repo_ids:
            # Find all existing applicabilities for given repo_id
            existing_applicabilities = RepoProfileApplicability.get_collection().find({"repo_id": repo_id})
            for existing_applicability in existing_applicabilities:
                # Convert cursor to RepoProfileApplicability object
                existing_applicability = RepoProfileApplicability(**dict(existing_applicability))
                profile_hash = existing_applicability["profile_hash"]
                unit_profile = UnitProfile.get_collection().find_one(
                    {"profile_hash": profile_hash}, fields=["id", "content_type"]
                )
                # Regenerate applicability data for given unit_profile and repo id
                ApplicabilityRegenerationManager.regenerate_applicability(
                    profile_hash, unit_profile["content_type"], unit_profile["id"], repo_id, existing_applicability
                )
Beispiel #5
0
    def remove_orphans():
        """
        The RepoProfileApplicability objects can become orphaned over time, as repositories are
        deleted, or as consumer profiles change. This method searches for RepoProfileApplicability
        objects that reference either repositories or profile hashes that no longer exist in Pulp.
        """
        # Find all of the repo_ids that are referenced by RepoProfileApplicability objects
        rpa_collection = RepoProfileApplicability.get_collection()
        rpa_repo_ids = rpa_collection.distinct('repo_id')

        # Find all of the repo_ids that exist in Pulp
        repo_ids = Repo.get_collection().distinct('id')

        # Find rpa_repo_ids that aren't part of repo_ids
        missing_repo_ids = list(set(rpa_repo_ids) - set(repo_ids))

        # Remove all RepoProfileApplicability objects that reference these repo_ids
        if missing_repo_ids:
            rpa_collection.remove({'repo_id': {'$in': missing_repo_ids}})

        # Next, we need to find profile_hashes that don't exist in the UnitProfile collection
        rpa_profile_hashes = rpa_collection.distinct('profile_hash')

        # Find the profile hashes that exist in current UnitProfiles
        profile_hashes = UnitProfile.get_collection().distinct('profile_hash')

        # Find profile hashes that we have RepoProfileApplicability objects for, but no real
        # UnitProfiles
        missing_profile_hashes = list(set(rpa_profile_hashes) - set(profile_hashes))

        # Remove all RepoProfileApplicability objects that reference these profile hashes
        if missing_profile_hashes:
            rpa_collection.remove({'profile_hash': {'$in': missing_profile_hashes}})
Beispiel #6
0
    def queue_regenerate_applicability_for_repos(repo_criteria):
        """
        Queue a group of tasks to generate and save applicability data affected by given updated
        repositories.

        :param repo_criteria: The repo selection criteria
        :type repo_criteria: dict
        """
        repo_criteria = Criteria.from_dict(repo_criteria)

        # Process repo criteria
        repo_criteria.fields = ['id']
        repo_ids = [
            r.repo_id
            for r in model.Repository.objects.find_by_criteria(repo_criteria)
        ]

        task_group_id = uuid4()

        for repo_id in repo_ids:
            profile_hashes = RepoProfileApplicability.get_collection().find(
                {'repo_id': repo_id}, {'profile_hash': 1})
            for batch in paginate(profile_hashes, 10):
                batch_regenerate_applicability_task.apply_async(
                    (repo_id, batch), **{'group_id': task_group_id})
        return task_group_id
Beispiel #7
0
    def regenerate_applicability_for_repos(self, repo_criteria=None):
        """
        Regenerate and save applicability data affected by given updated repositories.

        :param repo_criteria: The repo selection criteria
        :type repo_criteria: pulp.server.db.model.criteria.Criteria
        """
        repo_query_manager = managers.repo_query_manager()

        # Process repo criteria
        repo_criteria.fields = ['id']
        repo_ids = [r['id'] for r in repo_query_manager.find_by_criteria(repo_criteria)]

        for repo_id in repo_ids:
            # Find all existing applicabilities for given repo_id
            existing_applicabilities = RepoProfileApplicability.get_collection().find({'repo_id':repo_id})
            for existing_applicability in existing_applicabilities:
                # Convert cursor to RepoProfileApplicability object
                existing_applicability = RepoProfileApplicability(**dict(existing_applicability))
                profile_hash = existing_applicability['profile_hash']
                unit_profile = UnitProfile.get_collection().find_one({'profile_hash': profile_hash},
                                                                     fields=['id','content_type'])
                # Regenerate applicability data for given unit_profile and repo id
                self.regenerate_applicability(profile_hash, unit_profile['content_type'],
                                              unit_profile['id'],
                                              repo_id,
                                              existing_applicability)
Beispiel #8
0
    def regenerate_applicability_for_repos(repo_criteria):
        """
        Regenerate and save applicability data affected by given updated repositories.

        :param repo_criteria: The repo selection criteria
        :type repo_criteria: dict
        """
        repo_criteria = Criteria.from_dict(repo_criteria)
        repo_query_manager = managers.repo_query_manager()

        # Process repo criteria
        repo_criteria.fields = ['id']
        repo_ids = [r['id'] for r in repo_query_manager.find_by_criteria(repo_criteria)]

        for repo_id in repo_ids:
            # Find all existing applicabilities for given repo_id
            existing_applicabilities = RepoProfileApplicability.get_collection().find(
                {'repo_id': repo_id})
            for existing_applicability in existing_applicabilities:
                # Convert cursor to RepoProfileApplicability object
                existing_applicability = RepoProfileApplicability(**dict(existing_applicability))
                profile_hash = existing_applicability['profile_hash']
                unit_profile = UnitProfile.get_collection().find_one({'profile_hash': profile_hash},
                                                                     fields=['id', 'content_type'])
                if unit_profile is None:
                    # Unit profiles change whenever packages are installed or removed on consumers,
                    # and it is possible that existing_applicability references a UnitProfile
                    # that no longer exists. This is harmless, as Pulp has a monthly cleanup task
                    # that will identify these dangling references and remove them.
                    continue

                # Regenerate applicability data for given unit_profile and repo id
                ApplicabilityRegenerationManager.regenerate_applicability(
                    profile_hash, unit_profile['content_type'], unit_profile['id'], repo_id,
                    existing_applicability)
Beispiel #9
0
    def remove_orphans():
        """
        The RepoProfileApplicability objects can become orphaned over time, as repositories are
        deleted, or as consumer profiles change. This method searches for RepoProfileApplicability
        objects that reference either repositories or profile hashes that no longer exist in Pulp.
        """
        # Find all of the repo_ids that are referenced by RepoProfileApplicability objects
        rpa_collection = RepoProfileApplicability.get_collection()
        rpa_repo_ids = rpa_collection.distinct('repo_id')

        # Find all of the repo_ids that exist in Pulp
        repo_ids = model.Repository.objects.distinct('repo_id')

        # Find rpa_repo_ids that aren't part of repo_ids
        missing_repo_ids = list(set(rpa_repo_ids) - set(repo_ids))

        # Remove all RepoProfileApplicability objects that reference these repo_ids
        if missing_repo_ids:
            rpa_collection.remove({'repo_id': {'$in': missing_repo_ids}})

        # Next, we need to find profile_hashes that don't exist in the UnitProfile collection
        rpa_profile_hashes = rpa_collection.distinct('profile_hash')

        # Find the profile hashes that exist in current UnitProfiles
        profile_hashes = UnitProfile.get_collection().distinct('profile_hash')

        # Find profile hashes that we have RepoProfileApplicability objects for, but no real
        # UnitProfiles
        missing_profile_hashes = list(set(rpa_profile_hashes) - set(profile_hashes))

        # Remove all RepoProfileApplicability objects that reference these profile hashes
        if missing_profile_hashes:
            rpa_collection.remove({'profile_hash': {'$in': missing_profile_hashes}})
Beispiel #10
0
    def batch_regenerate_applicability(repo_id, profile_hashes):
        """
        Regenerate and save applicability data for a batch of existing applicabilities

        :param repo_id: Repository id for which applicability is being calculated
        :type repo_id: str
        :param profile_hashes: Tuple of consumer profile hashes for applicability profiles.
                               Don't pass too much of these, all the profile data
                               associated with these hashes is loaded into the memory.
        :type profile_hashes: tuple of dicts in form of {'profile_hash': str}
        """
        profile_hash_list = [phash['profile_hash'] for phash in profile_hashes]
        existing_applicabilities = RepoProfileApplicability.get_collection().find(
            {"repo_id": repo_id, "profile_hash": {"$in": profile_hash_list}})
        for existing_applicability in list(existing_applicabilities):
                # Convert cursor to RepoProfileApplicability object
            existing_applicability = RepoProfileApplicability(**dict(existing_applicability))
            profile_hash = existing_applicability['profile_hash']
            unit_profile = UnitProfile.get_collection().find_one({'profile_hash': profile_hash},
                                                                 projection=['id', 'content_type'])
            if unit_profile is None:
                # Unit profiles change whenever packages are installed or removed on consumers,
                # and it is possible that existing_applicability references a UnitProfile
                # that no longer exists. This is harmless, as Pulp has a monthly cleanup task
                # that will identify these dangling references and remove them.
                continue

            # Regenerate applicability data for given unit_profile and repo id
            ApplicabilityRegenerationManager.regenerate_applicability(
                profile_hash, unit_profile['content_type'], unit_profile['id'], repo_id,
                existing_applicability)
Beispiel #11
0
    def regenerate_applicability_for_repos(repo_criteria):
        """
        Regenerate and save applicability data affected by given updated repositories.

        :param repo_criteria: The repo selection criteria
        :type repo_criteria: dict
        """
        repo_criteria = Criteria.from_dict(repo_criteria)

        # Process repo criteria
        repo_criteria.fields = ['id']
        repo_ids = [
            r.repo_id
            for r in model.Repository.objects.find_by_criteria(repo_criteria)
        ]

        for repo_id in repo_ids:
            # Find all existing applicabilities for given repo_id. Setting batch size of 5 ensures
            # the MongoDB cursor does not time out. See https://pulp.plan.io/issues/998#note-6 for
            # more details.
            existing_applicabilities = RepoProfileApplicability.get_collection(
            ).find({
                'repo_id': repo_id
            }).batch_size(5)
            for existing_applicability in existing_applicabilities:
                existing_applicability = RepoProfileApplicability(
                    **dict(existing_applicability))
                profile_hash = existing_applicability['profile_hash']
                unit_profile = UnitProfile.get_collection().find_one(
                    {'profile_hash': profile_hash},
                    projection=['id', 'content_type'])
                if unit_profile is None:
                    # Unit profiles change whenever packages are installed or removed on consumers,
                    # and it is possible that existing_applicability references a UnitProfile
                    # that no longer exists. This is harmless, as Pulp has a monthly cleanup task
                    # that will identify these dangling references and remove them.
                    continue

                # Regenerate applicability data for given unit_profile and repo id
                ApplicabilityRegenerationManager.regenerate_applicability(
                    profile_hash, unit_profile['content_type'],
                    unit_profile['id'], repo_id, existing_applicability)
Beispiel #12
0
    def regenerate_applicability_for_repos(repo_criteria):
        """
        Regenerate and save applicability data affected by given updated repositories.

        :param repo_criteria: The repo selection criteria
        :type repo_criteria: dict
        """
        repo_criteria = Criteria.from_dict(repo_criteria)
        repo_query_manager = managers.repo_query_manager()

        # Process repo criteria
        repo_criteria.fields = ['id']
        repo_ids = [
            r['id'] for r in repo_query_manager.find_by_criteria(repo_criteria)
        ]

        for repo_id in repo_ids:
            # Find all existing applicabilities for given repo_id
            existing_applicabilities = RepoProfileApplicability.get_collection(
            ).find({'repo_id': repo_id})
            for existing_applicability in existing_applicabilities:
                # Convert cursor to RepoProfileApplicability object
                existing_applicability = RepoProfileApplicability(
                    **dict(existing_applicability))
                profile_hash = existing_applicability['profile_hash']
                unit_profile = UnitProfile.get_collection().find_one(
                    {'profile_hash': profile_hash},
                    fields=['id', 'content_type'])
                if unit_profile is None:
                    # Unit profiles change whenever packages are installed or removed on consumers,
                    # and it is possible that existing_applicability references a UnitProfile
                    # that no longer exists. This is harmless, as Pulp has a monthly cleanup task
                    # that will identify these dangling references and remove them.
                    continue

                # Regenerate applicability data for given unit_profile and repo id
                ApplicabilityRegenerationManager.regenerate_applicability(
                    profile_hash, unit_profile['content_type'],
                    unit_profile['id'], repo_id, existing_applicability)
Beispiel #13
0
    def create(self, profile_hash, repo_id, profile, applicability):
        """
        Create and return a RepoProfileApplicability object.

        :param profile_hash:  The hash of the profile that this object contains applicability data
                              for
        :type  profile_hash:  basestring
        :param repo_id:       The repo ID that this applicability data is for
        :type  repo_id:       basestring
        :param profile:       The entire profile that resulted in the profile_hash
        :type  profile:       object
        :param applicability: A dictionary structure mapping unit type IDs to lists of applicable
                              Unit IDs.
        :type  applicability: dict
        :return:              A new RepoProfileApplicability object
        :rtype:               pulp.server.db.model.consumer.RepoProfileApplicability
        """
        applicability = RepoProfileApplicability(
            profile_hash=profile_hash, repo_id=repo_id, profile=profile,
            applicability=applicability)
        applicability.save()
        return applicability
Beispiel #14
0
    def batch_regenerate_applicability(repo_id, profile_hashes):
        """
        Regenerate and save applicability data for a batch of existing applicabilities

        :param repo_id: Repository id for which applicability is being calculated
        :type repo_id: str
        :param profile_hashes: Tuple of consumer profile hashes for applicability profiles.
                               Don't pass too much of these, all the profile data
                               associated with these hashes is loaded into the memory.
        :type profile_hashes: tuple of dicts in form of {'profile_hash': str}
        """
        profile_hash_list = [phash['profile_hash'] for phash in profile_hashes]
        existing_applicabilities = RepoProfileApplicability.get_collection(
        ).find({
            "repo_id": repo_id,
            "profile_hash": {
                "$in": profile_hash_list
            }
        })
        for existing_applicability in list(existing_applicabilities):
            # Convert cursor to RepoProfileApplicability object
            existing_applicability = RepoProfileApplicability(
                **dict(existing_applicability))
            profile_hash = existing_applicability['profile_hash']
            unit_profile = UnitProfile.get_collection().find_one(
                {'profile_hash': profile_hash},
                projection=['id', 'content_type'])
            if unit_profile is None:
                # Unit profiles change whenever packages are installed or removed on consumers,
                # and it is possible that existing_applicability references a UnitProfile
                # that no longer exists. This is harmless, as Pulp has a monthly cleanup task
                # that will identify these dangling references and remove them.
                continue

            # Regenerate applicability data for given unit_profile and repo id
            ApplicabilityRegenerationManager.regenerate_applicability(
                profile_hash, unit_profile['content_type'], unit_profile['id'],
                repo_id, existing_applicability)
Beispiel #15
0
    def filter(self, query_params):
        """
        Get a list of RepoProfileApplicability objects with the given MongoDB query dict.

        :param query_params: A MongoDB query dictionary that selects RepoProfileApplicability
                             documents
        :type  query_params: dict
        :return:             A list of RepoProfileApplicability objects that match the given query
        :rtype:              list
        """
        collection = RepoProfileApplicability.get_collection()
        mongo_applicabilities = collection.find(query_params)
        applicabilities = [RepoProfileApplicability(**dict(applicability)) for applicability in mongo_applicabilities]
        return applicabilities
Beispiel #16
0
    def _is_existing_applicability(repo_id, profile_hash):
        """
        Check if applicability for given repo and profle hash is already calculated.

        :param repo_id:      repo id
        :type repo_id:       basestring
        :param profile_hash: unit profile hash
        :type profile_hash:  basestring
        :return:             true if applicability exists, false otherwise
        :type:               boolean
        """
        query_params = {'repo_id': repo_id, 'profile_hash': profile_hash}
        if RepoProfileApplicability.get_collection().find_one(query_params, projection=['_id']):
            return True
        return False
Beispiel #17
0
    def _is_existing_applicability(repo_id, profile_hash):
        """
        Check if applicability for given repo and profle hash is already calculated.

        :param repo_id:      repo id
        :type repo_id:       basestring
        :param profile_hash: unit profile hash
        :type profile_hash:  basestring
        :return:             true if applicability exists, false otherwise
        :type:               boolean
        """
        query_params = {'repo_id': repo_id, 'profile_hash': profile_hash}
        if RepoProfileApplicability.get_collection().find_one(query_params, fields=['_id']):
            return True
        return False
Beispiel #18
0
def _get_applicability_map(all_profiles_hashes, content_types):
    """
    Build an "applicability_map", which is a dictionary that maps tuples of
    (all_profiles_hash, repo_id) to a dictionary of applicability data and consumer_ids. The
    consumer_ids are just initialized to an empty list, so that a later method can add
    consumers to it. For example, it might look like:

    {('all_profiles_hash_1', 'repo_1'): {'applicability': {<applicability_data>}, 'consumers': []}}

    :param all_profiles_hash: A list of all_profiles_hashes that the applicabilities should be
                              queried with. The applicability map is initialized with all
                              applicability data for all the given all_profiles_hashes.
    :type  all_profiles_hash: list
    :param content_types:  If not None, content_types is a list of content_types to
                           be included in the applicability data within the
                           applicability_map
    :type  content_types:  list or None
    :return:               The applicability map
    :rtype:                dict
    """

    applicabilities = RepoProfileApplicability.get_collection().find(
        {'all_profiles_hash': {
            '$in': all_profiles_hashes
        }},
        projection=['all_profiles_hash', 'repo_id', 'applicability'])
    return_value = {}
    for a in applicabilities:
        if content_types is not None:
            # The caller has requested us to filter by content_type, so we need to look through
            # the applicability data and filter out the unwanted content types. Some
            # applicabilities may end up being empty if they don't have any data for the
            # requested types, so we'll build a list of those to remove
            for key in a['applicability'].keys():
                if key not in content_types:
                    del a['applicability'][key]
            # If a doesn't have anything worth reporting, move on to the next applicability
            if not a['applicability']:
                continue
        return_value[(a['all_profiles_hash'], a['repo_id'])] = {
            'applicability': a['applicability'],
            'consumers': []
        }
    return return_value
Beispiel #19
0
    def _is_existing_applicability(repo_id, all_profiles_hash):
        """
        Check if applicability for given repo and profile hash is already calculated.

        :param repo_id:      repo id
        :type repo_id:       basestring
        :param all_profiles_hash: consumer profiles' hash
        :type all_profiles_hash:  basestring
        :return:             true if applicability exists, false otherwise
        :type:               boolean
        """
        query_params = {
            'repo_id': repo_id,
            'all_profiles_hash': all_profiles_hash
        }
        if RepoProfileApplicability.get_collection().find(
                query_params, projection=['_id']).count():
            return True
        return False
Beispiel #20
0
def _get_applicability_map(profile_hashes, content_types):
    """
    Build an "applicability_map", which is a dictionary that maps tuples of
    (profile_hash, repo_id) to a dictionary of applicability data and consumer_ids. The
    consumer_ids are just initialized to an empty list, so that a later method can add
    consumers to it. For example, it might look like:

    {('profile_hash_1', 'repo_1'): {'applicability': {<applicability_data>}, 'consumers': []}}

    :param profile_hashes: A list of profile hashes that the applicabilities should be queried
                           with. The applicability map is initialized with all applicability
                           data for all the given profile_hashes.
    :type  profile_hashes: list
    :param content_types:  If not None, content_types is a list of content_types to
                           be included in the applicability data within the
                           applicability_map
    :type  content_types:  list or None
    :return:               The applicability map
    :rtype:                dict
    """
    applicabilities = RepoProfileApplicability.get_collection().find(
        {'profile_hash': {'$in': profile_hashes}},
        projection=['profile_hash', 'repo_id', 'applicability'])
    return_value = {}
    for a in applicabilities:
        if content_types is not None:
            # The caller has requested us to filter by content_type, so we need to look through
            # the applicability data and filter out the unwanted content types. Some
            # applicabilities may end up being empty if they don't have any data for the
            # requested types, so we'll build a list of those to remove
            for key in a['applicability'].keys():
                if key not in content_types:
                    del a['applicability'][key]
            # If a doesn't have anything worth reporting, move on to the next applicability
            if not a['applicability']:
                continue
        return_value[(a['profile_hash'], a['repo_id'])] = {'applicability': a['applicability'],
                                                           'consumers': []}
    return return_value
Beispiel #21
0
    def queue_regenerate_applicability_for_repos(repo_criteria):
        """
        Queue a group of tasks to generate and save applicability data affected by given updated
        repositories.

        :param repo_criteria: The repo selection criteria
        :type repo_criteria: dict
        """
        repo_criteria = Criteria.from_dict(repo_criteria)

        # Process repo criteria
        repo_criteria.fields = ['id']
        repo_ids = [r.repo_id for r in model.Repository.objects.find_by_criteria(repo_criteria)]

        task_group_id = uuid4()

        for repo_id in repo_ids:
            profile_hashes = RepoProfileApplicability.get_collection().find(
                {'repo_id': repo_id}, {'profile_hash': 1})
            for batch in paginate(profile_hashes, 10):
                batch_regenerate_applicability_task.apply_async((repo_id, batch),
                                                                **{'group_id': task_group_id})
        return task_group_id
Beispiel #22
0
    def regenerate_applicability_for_repos(repo_criteria):
        """
        Regenerate and save applicability data affected by given updated repositories.

        :param repo_criteria: The repo selection criteria
        :type repo_criteria: dict
        """
        repo_criteria = Criteria.from_dict(repo_criteria)

        # Process repo criteria
        repo_criteria.fields = ['id']
        repo_ids = [r.repo_id for r in model.Repository.objects.find_by_criteria(repo_criteria)]

        for repo_id in repo_ids:
            # Find all existing applicabilities for given repo_id. Setting batch size of 5 ensures
            # the MongoDB cursor does not time out. See https://pulp.plan.io/issues/998#note-6 for
            # more details.
            existing_applicabilities = RepoProfileApplicability.get_collection().find(
                {'repo_id': repo_id}).batch_size(5)
            for existing_applicability in existing_applicabilities:
                existing_applicability = RepoProfileApplicability(**dict(existing_applicability))
                profile_hash = existing_applicability['profile_hash']
                unit_profile = UnitProfile.get_collection().find_one({'profile_hash': profile_hash},
                                                                     projection=['id',
                                                                                 'content_type'])
                if unit_profile is None:
                    # Unit profiles change whenever packages are installed or removed on consumers,
                    # and it is possible that existing_applicability references a UnitProfile
                    # that no longer exists. This is harmless, as Pulp has a monthly cleanup task
                    # that will identify these dangling references and remove them.
                    continue

                # Regenerate applicability data for given unit_profile and repo id
                ApplicabilityRegenerationManager.regenerate_applicability(
                    profile_hash, unit_profile['content_type'], unit_profile['id'], repo_id,
                    existing_applicability)
Beispiel #23
0
    def regenerate_applicability(profile_hash,
                                 content_type,
                                 profile_id,
                                 bound_repo_id,
                                 existing_applicability=None):
        """
        Regenerate and save applicability data for given profile and bound repo id.
        If existing_applicability is not None, replace it with the new applicability data.

        :param profile_hash: hash of the unit profile
        :type profile_hash: basestring

        :param content_type: profile (unit) type ID
        :type content_type: str

        :param profile_id: unique id of the unit profile
        :type profile_id: str

        :param bound_repo_id: repo id to be used to calculate applicability
                              against the given unit profile
        :type bound_repo_id: str

        :param existing_applicability: existing RepoProfileApplicability object to be replaced
        :type existing_applicability: pulp.server.db.model.consumer.RepoProfileApplicability
        """
        profiler_conduit = ProfilerConduit()
        # Get the profiler for content_type of given unit_profile
        profiler, profiler_cfg = ApplicabilityRegenerationManager._profiler(
            content_type)

        # Check if the profiler supports applicability, else return
        if profiler.calculate_applicable_units == Profiler.calculate_applicable_units:
            # If base class calculate_applicable_units method is called,
            # skip applicability regeneration
            return

        # Find out which content types have unit counts greater than zero in the bound repo
        repo_content_types = ApplicabilityRegenerationManager._get_existing_repo_content_types(
            bound_repo_id)
        # Get the intersection of existing types in the repo and the types that the profiler
        # handles. If the intersection is not empty, regenerate applicability
        if (set(repo_content_types) & set(profiler.metadata()['types'])):
            # Get the actual profile for existing_applicability or lookup using profile_id
            if existing_applicability:
                profile = existing_applicability.profile
            else:
                unit_profile = UnitProfile.get_collection().find_one(
                    {'id': profile_id}, projection=['profile'])
                profile = unit_profile['profile']
            call_config = PluginCallConfiguration(plugin_config=profiler_cfg,
                                                  repo_plugin_config=None)
            try:
                applicability = profiler.calculate_applicable_units(
                    profile, bound_repo_id, call_config, profiler_conduit)
            except NotImplementedError:
                msg = "Profiler for content type [%s] does not support applicability" % content_type
                _logger.debug(msg)
                return

            try:
                # Create a new RepoProfileApplicability object and save it in the db
                RepoProfileApplicability.objects.create(
                    profile_hash, bound_repo_id, profile, applicability)
            except DuplicateKeyError:
                # Update existing applicability
                if not existing_applicability:
                    applicability_dict = RepoProfileApplicability.get_collection(
                    ).find_one({
                        'repo_id': bound_repo_id,
                        'profile_hash': profile_hash
                    })
                    existing_applicability = RepoProfileApplicability(
                        **applicability_dict)
                existing_applicability.applicability = applicability
                existing_applicability.save()
Beispiel #24
0
    def regenerate_applicability(all_profiles_hash, profiles, bound_repo_id):
        """
        Regenerate and save applicability data for given set of profiles and bound repo id.

        :param all_profiles_hash: hash of the consumer profiles
        :type  all_profiles_hash: basestring

        :param profiles: profiles data: (profile_hash, content_type, profile_id)
        :type  profiles: list of tuples

        :param bound_repo_id: repo id to be used to calculate applicability
                              against the given unit profile
        :type  bound_repo_id: str
        """
        profiler_conduit = ProfilerConduit()

        # Get the profiler for content_type of given profiles.
        # The assumption is that the same profiler is used for all the content types, so different
        # profilers are not supported at the moment.
        # Take the content type from the first profile.
        content_type = profiles[0][1]
        profiler, profiler_cfg = ApplicabilityRegenerationManager._profiler(
            content_type)

        # Check if the profiler supports applicability, else return
        if profiler.calculate_applicable_units == Profiler.calculate_applicable_units:
            # If base class calculate_applicable_units method is called,
            # skip applicability regeneration
            return

        # Find out which content types have unit counts greater than zero in the bound repo
        repo_content_types = ApplicabilityRegenerationManager._get_existing_repo_content_types(
            bound_repo_id)

        # Get the intersection of existing types in the repo and the types that the profiler
        # handles. If the intersection is not empty, regenerate applicability
        if (set(repo_content_types) & set(profiler.metadata()['types'])):
            profile_ids = [p_id for _, _, p_id in profiles]
            unit_profiles = UnitProfile.get_collection().find(
                {'id': {
                    '$in': profile_ids
                }},
                projection=['profile', 'content_type', 'profile_hash'])
            try:
                profiles = [(p['profile_hash'], p['content_type'],
                             p['profile']) for p in unit_profiles]
            except TypeError:
                # It means that p = None.
                # Consumer can be removed during applicability regeneration,
                # so it is possible that its profile no longer exists. It is harmless.
                return

            call_config = PluginCallConfiguration(plugin_config=profiler_cfg,
                                                  repo_plugin_config=None)
            try:
                applicability = profiler.calculate_applicable_units(
                    profiles, bound_repo_id, call_config, profiler_conduit)
            except NotImplementedError:
                msg = "Profiler for content type [%s] does not support applicability" % content_type
                _logger.debug(msg)
                return

            # Save applicability results on each of the profiles. The results are duplicated.
            # It's a compromise to have applicability data available in any applicability profile
            # record in the DB.
            for profile in profiles:
                profile_hash = profile[0]
                try:
                    # Create a new RepoProfileApplicability object and save it in the db
                    RepoProfileApplicability.objects.create(
                        profile_hash=profile_hash,
                        repo_id=bound_repo_id,
                        # profiles can be large, the one in
                        # repo_profile_applicability collection
                        # is no longer used,
                        # it's a duplicated data
                        # from the consumer_unit_profiles
                        # collection.
                        profile=[],
                        applicability=applicability,
                        all_profiles_hash=all_profiles_hash)
                except DuplicateKeyError:
                    applicability_dict = RepoProfileApplicability.get_collection(
                    ).find_one({
                        'repo_id': bound_repo_id,
                        'all_profiles_hash': all_profiles_hash,
                        'profile_hash': profile_hash
                    })
                    existing_applicability = RepoProfileApplicability(
                        **applicability_dict)
                    existing_applicability.applicability = applicability
                    existing_applicability.save()
Beispiel #25
0
    def remove_orphans():
        """
        The RepoProfileApplicability objects can become orphaned over time, as repositories are
        deleted, or as consumer profiles change. This method searches for RepoProfileApplicability
        objects that reference either repositories or profile hashes that no longer exist in Pulp.

        There is a rare case when some orphaned applicability profiles are not removed:
         - a consumer can have multiple profiles and applicability is calculated for a
         combination of them
         - if only one of the profiles changed, then the applicability for the unchnaged one is
         not removed.
         - there is no harm, no consequences to the applicability results when requested,
         just useless records in the DB
        """
        # Find all of the repo_ids that are referenced by RepoProfileApplicability objects
        rpa_collection = RepoProfileApplicability.get_collection()
        rpa_repo_ids = rpa_collection.distinct('repo_id')

        # Find all of the repo_ids that exist in Pulp
        repo_ids = model.Repository.objects.distinct('repo_id')

        # Find rpa_repo_ids that aren't part of repo_ids
        missing_repo_ids = list(set(rpa_repo_ids) - set(repo_ids))

        # Remove all RepoProfileApplicability objects that reference these repo_ids
        if missing_repo_ids:
            rpa_collection.remove({'repo_id': {'$in': missing_repo_ids}})

        # The code below has to be compatible with MongoDB 2.6+, it has to workaround
        # the 16MB BSON size limit, and no race conditions should be introduced.
        # For those reasons it may look complicated or unintuitive, but it does the following:
        #
        #     active_profile_hashes = set(consumer_unit_profile collection)
        #     profile_hashes_in_applicability = set(repo_profile_applicability collection)
        #     orphaned_profile_hashes = profile_hashes_in_applicability - active_profile_hashes
        #     for batch in paginate(orphaned_profile_hashes):
        #          remove_from_applicability_collection(where profile_hashes in batch)
        #

        # Find the profile hashes that exist in current UnitProfiles
        active_profile_hashes = UnitProfile.get_collection().distinct(
            'profile_hash')

        # Define a group stage for aggregation to find the profile hashes
        # that are present in RepoProfileApplicability collection
        group_stage = {
            '$group': {
                '_id': None,
                'rpa_profiles': {
                    '$addToSet': '$profile_hash'
                }
            }
        }

        # Define a project stage to find orphaned profile hashes in the RepoProfileApplicability
        project_stage1 = {
            "$project": {
                "orphaned_profiles": {
                    "$setDifference": ["$rpa_profiles", active_profile_hashes]
                }
            }
        }

        # Unwind the array of results so each element becomes a document itself.
        # It's important if results are huge (>16MB)
        unwind_stage = {"$unwind": "$orphaned_profiles"}

        # Reshape results in a wayi that no indices are violated: _id = profile_hash
        project_stage2 = {"$project": {"_id": "$orphaned_profiles"}}

        # Write results to a separate collection.
        # If a collection exists, old data is substituted with a new one.
        out_stage = {"$out": "orphaned_profile_hash"}

        # Trigger aggregation pipeline
        rpa_collection.aggregate([
            group_stage, project_stage1, unwind_stage, project_stage2,
            out_stage
        ],
                                 allowDiskUse=True)

        # Remove orphaned applicability profiles using profile hashes from the temporary collection.
        # Prepare a list of profiles to remove them in batches in case there are millions of them.
        orphaned_profiles_collection = connection.get_collection(
            'orphaned_profile_hash')
        profiles_batch_size = 100000
        profiles_total = orphaned_profiles_collection.count()

        _logger.info("Orphaned consumer profiles to process: %s" %
                     profiles_total)

        for skip_idx in range(0, profiles_total, profiles_batch_size):
            skip_stage = {"$skip": skip_idx}
            limit_stage = {"$limit": profiles_batch_size}
            group_stage = {
                "$group": {
                    "_id": None,
                    "profile_hash": {
                        "$push": "$_id"
                    }
                }
            }
            agg_result = orphaned_profiles_collection.aggregate(
                [skip_stage, limit_stage, group_stage])
            profiles_to_remove = agg_result.next()['profile_hash']
            rpa_collection.remove(
                {'profile_hash': {
                    '$in': profiles_to_remove
                }})

            # Statistics
            if profiles_total <= profiles_batch_size + skip_idx:
                profiles_removed = profiles_total
            else:
                profiles_removed = profiles_batch_size + skip_idx
            _logger.info("Orphaned consumer profiles processed: %s" %
                         profiles_removed)
Beispiel #26
0
    def regenerate_applicability(profile_hash, content_type, profile_id,
                                 bound_repo_id, existing_applicability=None):
        """
        Regenerate and save applicability data for given profile and bound repo id.
        If existing_applicability is not None, replace it with the new applicability data.

        :param profile_hash: hash of the unit profile
        :type profile_hash: basestring

        :param content_type: profile (unit) type ID
        :type content_type: str

        :param profile_id: unique id of the unit profile
        :type profile_id: str

        :param bound_repo_id: repo id to be used to calculate applicability
                              against the given unit profile
        :type bound_repo_id: str

        :param existing_applicability: existing RepoProfileApplicability object to be replaced
        :type existing_applicability: pulp.server.db.model.consumer.RepoProfileApplicability
        """
        profiler_conduit = ProfilerConduit()
        # Get the profiler for content_type of given unit_profile
        profiler, profiler_cfg = ApplicabilityRegenerationManager._profiler(content_type)

        # Check if the profiler supports applicability, else return
        if profiler.calculate_applicable_units == Profiler.calculate_applicable_units:
            # If base class calculate_applicable_units method is called,
            # skip applicability regeneration
            return

        # Find out which content types have unit counts greater than zero in the bound repo
        repo_content_types = ApplicabilityRegenerationManager._get_existing_repo_content_types(
            bound_repo_id)
        # Get the intersection of existing types in the repo and the types that the profiler
        # handles. If the intersection is not empty, regenerate applicability
        if (set(repo_content_types) & set(profiler.metadata()['types'])):
            # Get the actual profile for existing_applicability or lookup using profile_id
            if existing_applicability:
                profile = existing_applicability.profile
            else:
                unit_profile = UnitProfile.get_collection().find_one({'id': profile_id},
                                                                     projection=['profile'])
                profile = unit_profile['profile']
            call_config = PluginCallConfiguration(plugin_config=profiler_cfg,
                                                  repo_plugin_config=None)
            try:
                applicability = profiler.calculate_applicable_units(profile,
                                                                    bound_repo_id,
                                                                    call_config,
                                                                    profiler_conduit)
            except NotImplementedError:
                msg = "Profiler for content type [%s] does not support applicability" % content_type
                _logger.debug(msg)
                return

            try:
                # Create a new RepoProfileApplicability object and save it in the db
                RepoProfileApplicability.objects.create(profile_hash,
                                                        bound_repo_id,
                                                        profile,
                                                        applicability)
            except DuplicateKeyError:
                # Update existing applicability
                if not existing_applicability:
                    applicability_dict = RepoProfileApplicability.get_collection().find_one(
                        {'repo_id': bound_repo_id, 'profile_hash': profile_hash})
                    existing_applicability = RepoProfileApplicability(**applicability_dict)
                existing_applicability.applicability = applicability
                existing_applicability.save()