def create(self, profile_hash, repo_id, profile, applicability, all_profiles_hash): """ Create and return a RepoProfileApplicability object. :param profile_hash: The hash of the profile that is a part of the profile set of a consumer :type profile_hash: basestring :param repo_id: The repo ID that this applicability data is for :type repo_id: basestring :param profile: The entire profile that resulted in the profile_hash :type profile: object :param applicability: A dictionary structure mapping unit type IDs to lists of applicable Unit IDs. :type applicability: dict :param all_profiles_hash: The hash of the set of the profiles that this applicability data is for :type all_profiles_hash: basestring :return: A new RepoProfileApplicability object :rtype: pulp.server.db.model.consumer.RepoProfileApplicability """ applicability = RepoProfileApplicability( profile_hash=profile_hash, repo_id=repo_id, profile=profile, applicability=applicability, all_profiles_hash=all_profiles_hash) applicability.save() return applicability
def batch_regenerate_applicability(repo_id, existing_applicability_ids): """ Regenerate and save applicability data for a batch of existing applicabilities :param repo_id: Repository id for which applicability is being calculated :type repo_id: str :param existing_applicability_ids: Tuple of Object Ids for applicability profiles :type existing_applicability_ids: tuple of dicts in form of {"_id": ObjectID('mongo-id')} """ id_list = [id['_id'] for id in existing_applicability_ids] existing_applicabilities = RepoProfileApplicability.get_collection().find( {"_id": {"$in": id_list}}) for existing_applicability in existing_applicabilities: # Convert cursor to RepoProfileApplicability object existing_applicability = RepoProfileApplicability(**dict(existing_applicability)) profile_hash = existing_applicability['profile_hash'] unit_profile = UnitProfile.get_collection().find_one({'profile_hash': profile_hash}, fields=['id', 'content_type']) if unit_profile is None: # Unit profiles change whenever packages are installed or removed on consumers, # and it is possible that existing_applicability references a UnitProfile # that no longer exists. This is harmless, as Pulp has a monthly cleanup task # that will identify these dangling references and remove them. continue # Regenerate applicability data for given unit_profile and repo id ApplicabilityRegenerationManager.regenerate_applicability( profile_hash, unit_profile['content_type'], unit_profile['id'], repo_id, existing_applicability)
def filter(self, query_params): """ Get a list of RepoProfileApplicability objects with the given MongoDB query dict. :param query_params: A MongoDB query dictionary that selects RepoProfileApplicability documents :type query_params: dict :return: A list of RepoProfileApplicability objects that match the given query :rtype: list """ collection = RepoProfileApplicability.get_collection() mongo_applicabilities = collection.find(query_params) applicabilities = [RepoProfileApplicability(**dict(applicability)) \ for applicability in mongo_applicabilities] return applicabilities
def regenerate_applicability_for_repos(repo_criteria): """ Regenerate and save applicability data affected by given updated repositories. :param repo_criteria: The repo selection criteria :type repo_criteria: dict """ repo_criteria = Criteria.from_dict(repo_criteria) repo_query_manager = managers.repo_query_manager() # Process repo criteria repo_criteria.fields = ["id"] repo_ids = [r["id"] for r in repo_query_manager.find_by_criteria(repo_criteria)] for repo_id in repo_ids: # Find all existing applicabilities for given repo_id existing_applicabilities = RepoProfileApplicability.get_collection().find({"repo_id": repo_id}) for existing_applicability in existing_applicabilities: # Convert cursor to RepoProfileApplicability object existing_applicability = RepoProfileApplicability(**dict(existing_applicability)) profile_hash = existing_applicability["profile_hash"] unit_profile = UnitProfile.get_collection().find_one( {"profile_hash": profile_hash}, fields=["id", "content_type"] ) # Regenerate applicability data for given unit_profile and repo id ApplicabilityRegenerationManager.regenerate_applicability( profile_hash, unit_profile["content_type"], unit_profile["id"], repo_id, existing_applicability )
def remove_orphans(): """ The RepoProfileApplicability objects can become orphaned over time, as repositories are deleted, or as consumer profiles change. This method searches for RepoProfileApplicability objects that reference either repositories or profile hashes that no longer exist in Pulp. """ # Find all of the repo_ids that are referenced by RepoProfileApplicability objects rpa_collection = RepoProfileApplicability.get_collection() rpa_repo_ids = rpa_collection.distinct('repo_id') # Find all of the repo_ids that exist in Pulp repo_ids = Repo.get_collection().distinct('id') # Find rpa_repo_ids that aren't part of repo_ids missing_repo_ids = list(set(rpa_repo_ids) - set(repo_ids)) # Remove all RepoProfileApplicability objects that reference these repo_ids if missing_repo_ids: rpa_collection.remove({'repo_id': {'$in': missing_repo_ids}}) # Next, we need to find profile_hashes that don't exist in the UnitProfile collection rpa_profile_hashes = rpa_collection.distinct('profile_hash') # Find the profile hashes that exist in current UnitProfiles profile_hashes = UnitProfile.get_collection().distinct('profile_hash') # Find profile hashes that we have RepoProfileApplicability objects for, but no real # UnitProfiles missing_profile_hashes = list(set(rpa_profile_hashes) - set(profile_hashes)) # Remove all RepoProfileApplicability objects that reference these profile hashes if missing_profile_hashes: rpa_collection.remove({'profile_hash': {'$in': missing_profile_hashes}})
def queue_regenerate_applicability_for_repos(repo_criteria): """ Queue a group of tasks to generate and save applicability data affected by given updated repositories. :param repo_criteria: The repo selection criteria :type repo_criteria: dict """ repo_criteria = Criteria.from_dict(repo_criteria) # Process repo criteria repo_criteria.fields = ['id'] repo_ids = [ r.repo_id for r in model.Repository.objects.find_by_criteria(repo_criteria) ] task_group_id = uuid4() for repo_id in repo_ids: profile_hashes = RepoProfileApplicability.get_collection().find( {'repo_id': repo_id}, {'profile_hash': 1}) for batch in paginate(profile_hashes, 10): batch_regenerate_applicability_task.apply_async( (repo_id, batch), **{'group_id': task_group_id}) return task_group_id
def regenerate_applicability_for_repos(self, repo_criteria=None): """ Regenerate and save applicability data affected by given updated repositories. :param repo_criteria: The repo selection criteria :type repo_criteria: pulp.server.db.model.criteria.Criteria """ repo_query_manager = managers.repo_query_manager() # Process repo criteria repo_criteria.fields = ['id'] repo_ids = [r['id'] for r in repo_query_manager.find_by_criteria(repo_criteria)] for repo_id in repo_ids: # Find all existing applicabilities for given repo_id existing_applicabilities = RepoProfileApplicability.get_collection().find({'repo_id':repo_id}) for existing_applicability in existing_applicabilities: # Convert cursor to RepoProfileApplicability object existing_applicability = RepoProfileApplicability(**dict(existing_applicability)) profile_hash = existing_applicability['profile_hash'] unit_profile = UnitProfile.get_collection().find_one({'profile_hash': profile_hash}, fields=['id','content_type']) # Regenerate applicability data for given unit_profile and repo id self.regenerate_applicability(profile_hash, unit_profile['content_type'], unit_profile['id'], repo_id, existing_applicability)
def regenerate_applicability_for_repos(repo_criteria): """ Regenerate and save applicability data affected by given updated repositories. :param repo_criteria: The repo selection criteria :type repo_criteria: dict """ repo_criteria = Criteria.from_dict(repo_criteria) repo_query_manager = managers.repo_query_manager() # Process repo criteria repo_criteria.fields = ['id'] repo_ids = [r['id'] for r in repo_query_manager.find_by_criteria(repo_criteria)] for repo_id in repo_ids: # Find all existing applicabilities for given repo_id existing_applicabilities = RepoProfileApplicability.get_collection().find( {'repo_id': repo_id}) for existing_applicability in existing_applicabilities: # Convert cursor to RepoProfileApplicability object existing_applicability = RepoProfileApplicability(**dict(existing_applicability)) profile_hash = existing_applicability['profile_hash'] unit_profile = UnitProfile.get_collection().find_one({'profile_hash': profile_hash}, fields=['id', 'content_type']) if unit_profile is None: # Unit profiles change whenever packages are installed or removed on consumers, # and it is possible that existing_applicability references a UnitProfile # that no longer exists. This is harmless, as Pulp has a monthly cleanup task # that will identify these dangling references and remove them. continue # Regenerate applicability data for given unit_profile and repo id ApplicabilityRegenerationManager.regenerate_applicability( profile_hash, unit_profile['content_type'], unit_profile['id'], repo_id, existing_applicability)
def remove_orphans(): """ The RepoProfileApplicability objects can become orphaned over time, as repositories are deleted, or as consumer profiles change. This method searches for RepoProfileApplicability objects that reference either repositories or profile hashes that no longer exist in Pulp. """ # Find all of the repo_ids that are referenced by RepoProfileApplicability objects rpa_collection = RepoProfileApplicability.get_collection() rpa_repo_ids = rpa_collection.distinct('repo_id') # Find all of the repo_ids that exist in Pulp repo_ids = model.Repository.objects.distinct('repo_id') # Find rpa_repo_ids that aren't part of repo_ids missing_repo_ids = list(set(rpa_repo_ids) - set(repo_ids)) # Remove all RepoProfileApplicability objects that reference these repo_ids if missing_repo_ids: rpa_collection.remove({'repo_id': {'$in': missing_repo_ids}}) # Next, we need to find profile_hashes that don't exist in the UnitProfile collection rpa_profile_hashes = rpa_collection.distinct('profile_hash') # Find the profile hashes that exist in current UnitProfiles profile_hashes = UnitProfile.get_collection().distinct('profile_hash') # Find profile hashes that we have RepoProfileApplicability objects for, but no real # UnitProfiles missing_profile_hashes = list(set(rpa_profile_hashes) - set(profile_hashes)) # Remove all RepoProfileApplicability objects that reference these profile hashes if missing_profile_hashes: rpa_collection.remove({'profile_hash': {'$in': missing_profile_hashes}})
def batch_regenerate_applicability(repo_id, profile_hashes): """ Regenerate and save applicability data for a batch of existing applicabilities :param repo_id: Repository id for which applicability is being calculated :type repo_id: str :param profile_hashes: Tuple of consumer profile hashes for applicability profiles. Don't pass too much of these, all the profile data associated with these hashes is loaded into the memory. :type profile_hashes: tuple of dicts in form of {'profile_hash': str} """ profile_hash_list = [phash['profile_hash'] for phash in profile_hashes] existing_applicabilities = RepoProfileApplicability.get_collection().find( {"repo_id": repo_id, "profile_hash": {"$in": profile_hash_list}}) for existing_applicability in list(existing_applicabilities): # Convert cursor to RepoProfileApplicability object existing_applicability = RepoProfileApplicability(**dict(existing_applicability)) profile_hash = existing_applicability['profile_hash'] unit_profile = UnitProfile.get_collection().find_one({'profile_hash': profile_hash}, projection=['id', 'content_type']) if unit_profile is None: # Unit profiles change whenever packages are installed or removed on consumers, # and it is possible that existing_applicability references a UnitProfile # that no longer exists. This is harmless, as Pulp has a monthly cleanup task # that will identify these dangling references and remove them. continue # Regenerate applicability data for given unit_profile and repo id ApplicabilityRegenerationManager.regenerate_applicability( profile_hash, unit_profile['content_type'], unit_profile['id'], repo_id, existing_applicability)
def regenerate_applicability_for_repos(repo_criteria): """ Regenerate and save applicability data affected by given updated repositories. :param repo_criteria: The repo selection criteria :type repo_criteria: dict """ repo_criteria = Criteria.from_dict(repo_criteria) # Process repo criteria repo_criteria.fields = ['id'] repo_ids = [ r.repo_id for r in model.Repository.objects.find_by_criteria(repo_criteria) ] for repo_id in repo_ids: # Find all existing applicabilities for given repo_id. Setting batch size of 5 ensures # the MongoDB cursor does not time out. See https://pulp.plan.io/issues/998#note-6 for # more details. existing_applicabilities = RepoProfileApplicability.get_collection( ).find({ 'repo_id': repo_id }).batch_size(5) for existing_applicability in existing_applicabilities: existing_applicability = RepoProfileApplicability( **dict(existing_applicability)) profile_hash = existing_applicability['profile_hash'] unit_profile = UnitProfile.get_collection().find_one( {'profile_hash': profile_hash}, projection=['id', 'content_type']) if unit_profile is None: # Unit profiles change whenever packages are installed or removed on consumers, # and it is possible that existing_applicability references a UnitProfile # that no longer exists. This is harmless, as Pulp has a monthly cleanup task # that will identify these dangling references and remove them. continue # Regenerate applicability data for given unit_profile and repo id ApplicabilityRegenerationManager.regenerate_applicability( profile_hash, unit_profile['content_type'], unit_profile['id'], repo_id, existing_applicability)
def regenerate_applicability_for_repos(repo_criteria): """ Regenerate and save applicability data affected by given updated repositories. :param repo_criteria: The repo selection criteria :type repo_criteria: dict """ repo_criteria = Criteria.from_dict(repo_criteria) repo_query_manager = managers.repo_query_manager() # Process repo criteria repo_criteria.fields = ['id'] repo_ids = [ r['id'] for r in repo_query_manager.find_by_criteria(repo_criteria) ] for repo_id in repo_ids: # Find all existing applicabilities for given repo_id existing_applicabilities = RepoProfileApplicability.get_collection( ).find({'repo_id': repo_id}) for existing_applicability in existing_applicabilities: # Convert cursor to RepoProfileApplicability object existing_applicability = RepoProfileApplicability( **dict(existing_applicability)) profile_hash = existing_applicability['profile_hash'] unit_profile = UnitProfile.get_collection().find_one( {'profile_hash': profile_hash}, fields=['id', 'content_type']) if unit_profile is None: # Unit profiles change whenever packages are installed or removed on consumers, # and it is possible that existing_applicability references a UnitProfile # that no longer exists. This is harmless, as Pulp has a monthly cleanup task # that will identify these dangling references and remove them. continue # Regenerate applicability data for given unit_profile and repo id ApplicabilityRegenerationManager.regenerate_applicability( profile_hash, unit_profile['content_type'], unit_profile['id'], repo_id, existing_applicability)
def create(self, profile_hash, repo_id, profile, applicability): """ Create and return a RepoProfileApplicability object. :param profile_hash: The hash of the profile that this object contains applicability data for :type profile_hash: basestring :param repo_id: The repo ID that this applicability data is for :type repo_id: basestring :param profile: The entire profile that resulted in the profile_hash :type profile: object :param applicability: A dictionary structure mapping unit type IDs to lists of applicable Unit IDs. :type applicability: dict :return: A new RepoProfileApplicability object :rtype: pulp.server.db.model.consumer.RepoProfileApplicability """ applicability = RepoProfileApplicability( profile_hash=profile_hash, repo_id=repo_id, profile=profile, applicability=applicability) applicability.save() return applicability
def batch_regenerate_applicability(repo_id, profile_hashes): """ Regenerate and save applicability data for a batch of existing applicabilities :param repo_id: Repository id for which applicability is being calculated :type repo_id: str :param profile_hashes: Tuple of consumer profile hashes for applicability profiles. Don't pass too much of these, all the profile data associated with these hashes is loaded into the memory. :type profile_hashes: tuple of dicts in form of {'profile_hash': str} """ profile_hash_list = [phash['profile_hash'] for phash in profile_hashes] existing_applicabilities = RepoProfileApplicability.get_collection( ).find({ "repo_id": repo_id, "profile_hash": { "$in": profile_hash_list } }) for existing_applicability in list(existing_applicabilities): # Convert cursor to RepoProfileApplicability object existing_applicability = RepoProfileApplicability( **dict(existing_applicability)) profile_hash = existing_applicability['profile_hash'] unit_profile = UnitProfile.get_collection().find_one( {'profile_hash': profile_hash}, projection=['id', 'content_type']) if unit_profile is None: # Unit profiles change whenever packages are installed or removed on consumers, # and it is possible that existing_applicability references a UnitProfile # that no longer exists. This is harmless, as Pulp has a monthly cleanup task # that will identify these dangling references and remove them. continue # Regenerate applicability data for given unit_profile and repo id ApplicabilityRegenerationManager.regenerate_applicability( profile_hash, unit_profile['content_type'], unit_profile['id'], repo_id, existing_applicability)
def filter(self, query_params): """ Get a list of RepoProfileApplicability objects with the given MongoDB query dict. :param query_params: A MongoDB query dictionary that selects RepoProfileApplicability documents :type query_params: dict :return: A list of RepoProfileApplicability objects that match the given query :rtype: list """ collection = RepoProfileApplicability.get_collection() mongo_applicabilities = collection.find(query_params) applicabilities = [RepoProfileApplicability(**dict(applicability)) for applicability in mongo_applicabilities] return applicabilities
def _is_existing_applicability(repo_id, profile_hash): """ Check if applicability for given repo and profle hash is already calculated. :param repo_id: repo id :type repo_id: basestring :param profile_hash: unit profile hash :type profile_hash: basestring :return: true if applicability exists, false otherwise :type: boolean """ query_params = {'repo_id': repo_id, 'profile_hash': profile_hash} if RepoProfileApplicability.get_collection().find_one(query_params, projection=['_id']): return True return False
def _is_existing_applicability(repo_id, profile_hash): """ Check if applicability for given repo and profle hash is already calculated. :param repo_id: repo id :type repo_id: basestring :param profile_hash: unit profile hash :type profile_hash: basestring :return: true if applicability exists, false otherwise :type: boolean """ query_params = {'repo_id': repo_id, 'profile_hash': profile_hash} if RepoProfileApplicability.get_collection().find_one(query_params, fields=['_id']): return True return False
def _get_applicability_map(all_profiles_hashes, content_types): """ Build an "applicability_map", which is a dictionary that maps tuples of (all_profiles_hash, repo_id) to a dictionary of applicability data and consumer_ids. The consumer_ids are just initialized to an empty list, so that a later method can add consumers to it. For example, it might look like: {('all_profiles_hash_1', 'repo_1'): {'applicability': {<applicability_data>}, 'consumers': []}} :param all_profiles_hash: A list of all_profiles_hashes that the applicabilities should be queried with. The applicability map is initialized with all applicability data for all the given all_profiles_hashes. :type all_profiles_hash: list :param content_types: If not None, content_types is a list of content_types to be included in the applicability data within the applicability_map :type content_types: list or None :return: The applicability map :rtype: dict """ applicabilities = RepoProfileApplicability.get_collection().find( {'all_profiles_hash': { '$in': all_profiles_hashes }}, projection=['all_profiles_hash', 'repo_id', 'applicability']) return_value = {} for a in applicabilities: if content_types is not None: # The caller has requested us to filter by content_type, so we need to look through # the applicability data and filter out the unwanted content types. Some # applicabilities may end up being empty if they don't have any data for the # requested types, so we'll build a list of those to remove for key in a['applicability'].keys(): if key not in content_types: del a['applicability'][key] # If a doesn't have anything worth reporting, move on to the next applicability if not a['applicability']: continue return_value[(a['all_profiles_hash'], a['repo_id'])] = { 'applicability': a['applicability'], 'consumers': [] } return return_value
def _is_existing_applicability(repo_id, all_profiles_hash): """ Check if applicability for given repo and profile hash is already calculated. :param repo_id: repo id :type repo_id: basestring :param all_profiles_hash: consumer profiles' hash :type all_profiles_hash: basestring :return: true if applicability exists, false otherwise :type: boolean """ query_params = { 'repo_id': repo_id, 'all_profiles_hash': all_profiles_hash } if RepoProfileApplicability.get_collection().find( query_params, projection=['_id']).count(): return True return False
def _get_applicability_map(profile_hashes, content_types): """ Build an "applicability_map", which is a dictionary that maps tuples of (profile_hash, repo_id) to a dictionary of applicability data and consumer_ids. The consumer_ids are just initialized to an empty list, so that a later method can add consumers to it. For example, it might look like: {('profile_hash_1', 'repo_1'): {'applicability': {<applicability_data>}, 'consumers': []}} :param profile_hashes: A list of profile hashes that the applicabilities should be queried with. The applicability map is initialized with all applicability data for all the given profile_hashes. :type profile_hashes: list :param content_types: If not None, content_types is a list of content_types to be included in the applicability data within the applicability_map :type content_types: list or None :return: The applicability map :rtype: dict """ applicabilities = RepoProfileApplicability.get_collection().find( {'profile_hash': {'$in': profile_hashes}}, projection=['profile_hash', 'repo_id', 'applicability']) return_value = {} for a in applicabilities: if content_types is not None: # The caller has requested us to filter by content_type, so we need to look through # the applicability data and filter out the unwanted content types. Some # applicabilities may end up being empty if they don't have any data for the # requested types, so we'll build a list of those to remove for key in a['applicability'].keys(): if key not in content_types: del a['applicability'][key] # If a doesn't have anything worth reporting, move on to the next applicability if not a['applicability']: continue return_value[(a['profile_hash'], a['repo_id'])] = {'applicability': a['applicability'], 'consumers': []} return return_value
def queue_regenerate_applicability_for_repos(repo_criteria): """ Queue a group of tasks to generate and save applicability data affected by given updated repositories. :param repo_criteria: The repo selection criteria :type repo_criteria: dict """ repo_criteria = Criteria.from_dict(repo_criteria) # Process repo criteria repo_criteria.fields = ['id'] repo_ids = [r.repo_id for r in model.Repository.objects.find_by_criteria(repo_criteria)] task_group_id = uuid4() for repo_id in repo_ids: profile_hashes = RepoProfileApplicability.get_collection().find( {'repo_id': repo_id}, {'profile_hash': 1}) for batch in paginate(profile_hashes, 10): batch_regenerate_applicability_task.apply_async((repo_id, batch), **{'group_id': task_group_id}) return task_group_id
def regenerate_applicability_for_repos(repo_criteria): """ Regenerate and save applicability data affected by given updated repositories. :param repo_criteria: The repo selection criteria :type repo_criteria: dict """ repo_criteria = Criteria.from_dict(repo_criteria) # Process repo criteria repo_criteria.fields = ['id'] repo_ids = [r.repo_id for r in model.Repository.objects.find_by_criteria(repo_criteria)] for repo_id in repo_ids: # Find all existing applicabilities for given repo_id. Setting batch size of 5 ensures # the MongoDB cursor does not time out. See https://pulp.plan.io/issues/998#note-6 for # more details. existing_applicabilities = RepoProfileApplicability.get_collection().find( {'repo_id': repo_id}).batch_size(5) for existing_applicability in existing_applicabilities: existing_applicability = RepoProfileApplicability(**dict(existing_applicability)) profile_hash = existing_applicability['profile_hash'] unit_profile = UnitProfile.get_collection().find_one({'profile_hash': profile_hash}, projection=['id', 'content_type']) if unit_profile is None: # Unit profiles change whenever packages are installed or removed on consumers, # and it is possible that existing_applicability references a UnitProfile # that no longer exists. This is harmless, as Pulp has a monthly cleanup task # that will identify these dangling references and remove them. continue # Regenerate applicability data for given unit_profile and repo id ApplicabilityRegenerationManager.regenerate_applicability( profile_hash, unit_profile['content_type'], unit_profile['id'], repo_id, existing_applicability)
def regenerate_applicability(profile_hash, content_type, profile_id, bound_repo_id, existing_applicability=None): """ Regenerate and save applicability data for given profile and bound repo id. If existing_applicability is not None, replace it with the new applicability data. :param profile_hash: hash of the unit profile :type profile_hash: basestring :param content_type: profile (unit) type ID :type content_type: str :param profile_id: unique id of the unit profile :type profile_id: str :param bound_repo_id: repo id to be used to calculate applicability against the given unit profile :type bound_repo_id: str :param existing_applicability: existing RepoProfileApplicability object to be replaced :type existing_applicability: pulp.server.db.model.consumer.RepoProfileApplicability """ profiler_conduit = ProfilerConduit() # Get the profiler for content_type of given unit_profile profiler, profiler_cfg = ApplicabilityRegenerationManager._profiler( content_type) # Check if the profiler supports applicability, else return if profiler.calculate_applicable_units == Profiler.calculate_applicable_units: # If base class calculate_applicable_units method is called, # skip applicability regeneration return # Find out which content types have unit counts greater than zero in the bound repo repo_content_types = ApplicabilityRegenerationManager._get_existing_repo_content_types( bound_repo_id) # Get the intersection of existing types in the repo and the types that the profiler # handles. If the intersection is not empty, regenerate applicability if (set(repo_content_types) & set(profiler.metadata()['types'])): # Get the actual profile for existing_applicability or lookup using profile_id if existing_applicability: profile = existing_applicability.profile else: unit_profile = UnitProfile.get_collection().find_one( {'id': profile_id}, projection=['profile']) profile = unit_profile['profile'] call_config = PluginCallConfiguration(plugin_config=profiler_cfg, repo_plugin_config=None) try: applicability = profiler.calculate_applicable_units( profile, bound_repo_id, call_config, profiler_conduit) except NotImplementedError: msg = "Profiler for content type [%s] does not support applicability" % content_type _logger.debug(msg) return try: # Create a new RepoProfileApplicability object and save it in the db RepoProfileApplicability.objects.create( profile_hash, bound_repo_id, profile, applicability) except DuplicateKeyError: # Update existing applicability if not existing_applicability: applicability_dict = RepoProfileApplicability.get_collection( ).find_one({ 'repo_id': bound_repo_id, 'profile_hash': profile_hash }) existing_applicability = RepoProfileApplicability( **applicability_dict) existing_applicability.applicability = applicability existing_applicability.save()
def regenerate_applicability(all_profiles_hash, profiles, bound_repo_id): """ Regenerate and save applicability data for given set of profiles and bound repo id. :param all_profiles_hash: hash of the consumer profiles :type all_profiles_hash: basestring :param profiles: profiles data: (profile_hash, content_type, profile_id) :type profiles: list of tuples :param bound_repo_id: repo id to be used to calculate applicability against the given unit profile :type bound_repo_id: str """ profiler_conduit = ProfilerConduit() # Get the profiler for content_type of given profiles. # The assumption is that the same profiler is used for all the content types, so different # profilers are not supported at the moment. # Take the content type from the first profile. content_type = profiles[0][1] profiler, profiler_cfg = ApplicabilityRegenerationManager._profiler( content_type) # Check if the profiler supports applicability, else return if profiler.calculate_applicable_units == Profiler.calculate_applicable_units: # If base class calculate_applicable_units method is called, # skip applicability regeneration return # Find out which content types have unit counts greater than zero in the bound repo repo_content_types = ApplicabilityRegenerationManager._get_existing_repo_content_types( bound_repo_id) # Get the intersection of existing types in the repo and the types that the profiler # handles. If the intersection is not empty, regenerate applicability if (set(repo_content_types) & set(profiler.metadata()['types'])): profile_ids = [p_id for _, _, p_id in profiles] unit_profiles = UnitProfile.get_collection().find( {'id': { '$in': profile_ids }}, projection=['profile', 'content_type', 'profile_hash']) try: profiles = [(p['profile_hash'], p['content_type'], p['profile']) for p in unit_profiles] except TypeError: # It means that p = None. # Consumer can be removed during applicability regeneration, # so it is possible that its profile no longer exists. It is harmless. return call_config = PluginCallConfiguration(plugin_config=profiler_cfg, repo_plugin_config=None) try: applicability = profiler.calculate_applicable_units( profiles, bound_repo_id, call_config, profiler_conduit) except NotImplementedError: msg = "Profiler for content type [%s] does not support applicability" % content_type _logger.debug(msg) return # Save applicability results on each of the profiles. The results are duplicated. # It's a compromise to have applicability data available in any applicability profile # record in the DB. for profile in profiles: profile_hash = profile[0] try: # Create a new RepoProfileApplicability object and save it in the db RepoProfileApplicability.objects.create( profile_hash=profile_hash, repo_id=bound_repo_id, # profiles can be large, the one in # repo_profile_applicability collection # is no longer used, # it's a duplicated data # from the consumer_unit_profiles # collection. profile=[], applicability=applicability, all_profiles_hash=all_profiles_hash) except DuplicateKeyError: applicability_dict = RepoProfileApplicability.get_collection( ).find_one({ 'repo_id': bound_repo_id, 'all_profiles_hash': all_profiles_hash, 'profile_hash': profile_hash }) existing_applicability = RepoProfileApplicability( **applicability_dict) existing_applicability.applicability = applicability existing_applicability.save()
def remove_orphans(): """ The RepoProfileApplicability objects can become orphaned over time, as repositories are deleted, or as consumer profiles change. This method searches for RepoProfileApplicability objects that reference either repositories or profile hashes that no longer exist in Pulp. There is a rare case when some orphaned applicability profiles are not removed: - a consumer can have multiple profiles and applicability is calculated for a combination of them - if only one of the profiles changed, then the applicability for the unchnaged one is not removed. - there is no harm, no consequences to the applicability results when requested, just useless records in the DB """ # Find all of the repo_ids that are referenced by RepoProfileApplicability objects rpa_collection = RepoProfileApplicability.get_collection() rpa_repo_ids = rpa_collection.distinct('repo_id') # Find all of the repo_ids that exist in Pulp repo_ids = model.Repository.objects.distinct('repo_id') # Find rpa_repo_ids that aren't part of repo_ids missing_repo_ids = list(set(rpa_repo_ids) - set(repo_ids)) # Remove all RepoProfileApplicability objects that reference these repo_ids if missing_repo_ids: rpa_collection.remove({'repo_id': {'$in': missing_repo_ids}}) # The code below has to be compatible with MongoDB 2.6+, it has to workaround # the 16MB BSON size limit, and no race conditions should be introduced. # For those reasons it may look complicated or unintuitive, but it does the following: # # active_profile_hashes = set(consumer_unit_profile collection) # profile_hashes_in_applicability = set(repo_profile_applicability collection) # orphaned_profile_hashes = profile_hashes_in_applicability - active_profile_hashes # for batch in paginate(orphaned_profile_hashes): # remove_from_applicability_collection(where profile_hashes in batch) # # Find the profile hashes that exist in current UnitProfiles active_profile_hashes = UnitProfile.get_collection().distinct( 'profile_hash') # Define a group stage for aggregation to find the profile hashes # that are present in RepoProfileApplicability collection group_stage = { '$group': { '_id': None, 'rpa_profiles': { '$addToSet': '$profile_hash' } } } # Define a project stage to find orphaned profile hashes in the RepoProfileApplicability project_stage1 = { "$project": { "orphaned_profiles": { "$setDifference": ["$rpa_profiles", active_profile_hashes] } } } # Unwind the array of results so each element becomes a document itself. # It's important if results are huge (>16MB) unwind_stage = {"$unwind": "$orphaned_profiles"} # Reshape results in a wayi that no indices are violated: _id = profile_hash project_stage2 = {"$project": {"_id": "$orphaned_profiles"}} # Write results to a separate collection. # If a collection exists, old data is substituted with a new one. out_stage = {"$out": "orphaned_profile_hash"} # Trigger aggregation pipeline rpa_collection.aggregate([ group_stage, project_stage1, unwind_stage, project_stage2, out_stage ], allowDiskUse=True) # Remove orphaned applicability profiles using profile hashes from the temporary collection. # Prepare a list of profiles to remove them in batches in case there are millions of them. orphaned_profiles_collection = connection.get_collection( 'orphaned_profile_hash') profiles_batch_size = 100000 profiles_total = orphaned_profiles_collection.count() _logger.info("Orphaned consumer profiles to process: %s" % profiles_total) for skip_idx in range(0, profiles_total, profiles_batch_size): skip_stage = {"$skip": skip_idx} limit_stage = {"$limit": profiles_batch_size} group_stage = { "$group": { "_id": None, "profile_hash": { "$push": "$_id" } } } agg_result = orphaned_profiles_collection.aggregate( [skip_stage, limit_stage, group_stage]) profiles_to_remove = agg_result.next()['profile_hash'] rpa_collection.remove( {'profile_hash': { '$in': profiles_to_remove }}) # Statistics if profiles_total <= profiles_batch_size + skip_idx: profiles_removed = profiles_total else: profiles_removed = profiles_batch_size + skip_idx _logger.info("Orphaned consumer profiles processed: %s" % profiles_removed)
def regenerate_applicability(profile_hash, content_type, profile_id, bound_repo_id, existing_applicability=None): """ Regenerate and save applicability data for given profile and bound repo id. If existing_applicability is not None, replace it with the new applicability data. :param profile_hash: hash of the unit profile :type profile_hash: basestring :param content_type: profile (unit) type ID :type content_type: str :param profile_id: unique id of the unit profile :type profile_id: str :param bound_repo_id: repo id to be used to calculate applicability against the given unit profile :type bound_repo_id: str :param existing_applicability: existing RepoProfileApplicability object to be replaced :type existing_applicability: pulp.server.db.model.consumer.RepoProfileApplicability """ profiler_conduit = ProfilerConduit() # Get the profiler for content_type of given unit_profile profiler, profiler_cfg = ApplicabilityRegenerationManager._profiler(content_type) # Check if the profiler supports applicability, else return if profiler.calculate_applicable_units == Profiler.calculate_applicable_units: # If base class calculate_applicable_units method is called, # skip applicability regeneration return # Find out which content types have unit counts greater than zero in the bound repo repo_content_types = ApplicabilityRegenerationManager._get_existing_repo_content_types( bound_repo_id) # Get the intersection of existing types in the repo and the types that the profiler # handles. If the intersection is not empty, regenerate applicability if (set(repo_content_types) & set(profiler.metadata()['types'])): # Get the actual profile for existing_applicability or lookup using profile_id if existing_applicability: profile = existing_applicability.profile else: unit_profile = UnitProfile.get_collection().find_one({'id': profile_id}, projection=['profile']) profile = unit_profile['profile'] call_config = PluginCallConfiguration(plugin_config=profiler_cfg, repo_plugin_config=None) try: applicability = profiler.calculate_applicable_units(profile, bound_repo_id, call_config, profiler_conduit) except NotImplementedError: msg = "Profiler for content type [%s] does not support applicability" % content_type _logger.debug(msg) return try: # Create a new RepoProfileApplicability object and save it in the db RepoProfileApplicability.objects.create(profile_hash, bound_repo_id, profile, applicability) except DuplicateKeyError: # Update existing applicability if not existing_applicability: applicability_dict = RepoProfileApplicability.get_collection().find_one( {'repo_id': bound_repo_id, 'profile_hash': profile_hash}) existing_applicability = RepoProfileApplicability(**applicability_dict) existing_applicability.applicability = applicability existing_applicability.save()