def deep_prefetch_related_objects(objects, lookups): """ Helper function for prefetch_related functionality. Populates prefetched objects caches for a list of results from a QuerySet. Differs from :meth:`django.db.models.query.prefetch_related_objects` in that that it can prefetch "non-strict" lookups through GFK. :param objects: result cache of base queryset. :param lookups: list with lookups. """ #How it works #------------ # - Since data is not of same type - "unit" of data is a model instance # (object), not QuerySet. # - Goals that I was tried to achieve: # - Prefetching 'non-strict' lookups. # - Absence of doubled queries to DB, other redundancy in processing # - and infinite recursions. # - Simplicity of design in comparison with original # `django.db.models.query.prefetch_related_objects` # - Sets are used to avoid duplicates. # - Python's sets distinguish objects by value. But task require # to distinguish objects by id, not by value, because two objects # that are same by value, might be included in two different # querysets, and both of them must be populated with cache. # - BUT fetched cache data for object are distinguished by value. # Rationale is to distinguish unique queries and prevent their # repeated execution. We don't care about "id" of queries, we care # about their value. Composite identifier for query is # object and lookup for which query being constructed. # Only one query exists for one object (by value) and lookup part. # In function there slightly more complex structure than # `obj -> lookup` used for storing data for traversed lookups # (``seen``) - that's done to reduce redundancy in data, but # "primary key" for stroed data is `obj -> lookup`. # - Data flows through `buffer`, during processing. # Objects discovered while traversing DB structure are being added # and processed objects are removed. #todo beauty and refactoring if len(objects) == 0: return # nothing to do #lookup -> model -> {(id(obj), obj), ...} #id(obj) - because objects must be distinguished by id. #DefaultOrederedDict because order of lookups matter, #see tests.LookupOrderingTest.test_order of Django test suite. buffer = DefaultOrderedDict(lambda: defaultdict(set)) update_buffer(buffer, objects, reversed(lookups)) seen = tree() # model -> attr -> # single -> bool # cache_name -> str # cache -> # obj -> [cache] while True: try: lookup = last(buffer.keys()) except TypeError: break try: model, current = buffer[lookup].popitem() except KeyError: del buffer[lookup] continue attr = lookup.split(LOOKUP_SEP)[0] sample = current.pop() current.add(sample) _, object = sample prefetcher, _, attr_found, _ = get_prefetcher(object, attr) #Lookup is not valid for that object, it must be skipped. #No exception, because data it is that data is of diffrerent types, #so - such situation is normal. if not attr_found: continue if LOOKUP_SEP not in lookup and prefetcher is None: raise ValueError("'%s' does not resolve to a item that supports " "prefetching - this is an invalid parameter to " "prefetch_related()." % lookup) if prefetcher is None: clipped = clip_lookup(lookup) if clipped: update_buffer( buffer, filter(is_not_none, [getattr(o, attr) for _, o in current]), [clipped]) continue to_discard = set() for e in current: # no need to query for already prefetched data obj = e[1] obj_model = obj.__class__ p, d, _, is_fetched = get_prefetcher(obj, attr) cache = None if is_fetched: # case of Django internal cache single, cache_name = get_info(p, d) cache = get_cache(obj, single, cache_name, attr) single, cache_name = get_info(p, d) update_seen(seen, model, attr, single, cache_name, obj, cache) to_discard.add(e) elif (model in seen and # case of `seen` attr in seen[obj_model] and obj in seen[obj_model][attr]['cache']): single = seen[obj_model][attr]['single'] cache_name = seen[obj_model][attr]['cache_name'] cache = seen[obj_model][attr]['cache'][obj] to_discard.add(e) set_cache(obj, single, cache, cache_name, attr) if cache is not None and len(cache) != 0: # if data was cached clipped = clip_lookup(lookup) # it still must get if clipped: # into `buffer`. update_buffer(buffer, cache, [clipped]) current -= to_discard if current: prefetch_qs, rel_attr_fn, cur_attr_fn, single, cache_name = \ prefetcher.get_prefetch_queryset( list(map(itemgetter(1), current)) ) # Prefetch lookups from prefetch queries are merged # with processed lookups. additional_lookups = getattr(prefetch_qs, '_prefetch_related_lookups', []) if additional_lookups: setattr(prefetch_qs, '_prefetch_related_lookups', []) discovered = list(prefetch_qs) if discovered: lookups_for_discovered = additional_lookups clipped_lookup = LOOKUP_SEP.join(lookup.split(LOOKUP_SEP)[1:]) if len(clipped_lookup) > 0: lookups_for_discovered = chain([clipped_lookup], additional_lookups) if lookups_for_discovered: reversed_lookups = reversed(list(lookups_for_discovered)) update_buffer(buffer, discovered, reversed_lookups) rel_to_cur = defaultdict(list) for obj in discovered: val = rel_attr_fn(obj) rel_to_cur[val].append(obj) for pair in current: # queried data is set up to objects obj = pair[1] val = cur_attr_fn(obj) cache = rel_to_cur.get(val, []) update_seen(seen, model, attr, single, cache_name, obj, cache) set_cache(obj, single, cache, cache_name, attr)
def test_bug(self): prefetcher = get_prefetcher(self.rooms[0], 'house')[0] queryset = prefetcher.get_prefetch_queryset(list(Room.objects.all()))[0] self.assertNotIn(' JOIN ', force_text(queryset.query))
def deep_prefetch_related_objects(objects, lookups): """ Helper function for prefetch_related functionality. Populates prefetched objects caches for a list of results from a QuerySet. Differs from :meth:`django.db.models.query.prefetch_related_objects` in that that it can prefetch "non-strict" lookups through GFK. :param objects: result cache of base queryset. :param lookups: list with lookups. """ #How it works #------------ # - Since data is not of same type - "unit" of data is a model instance # (object), not QuerySet. # - Goals that I was tried to achieve: # - Prefetching 'non-strict' lookups. # - Absence of doubled queries to DB, other redundancy in processing # - and infinite recursions. # - Simplicity of design in comparison with original # `django.db.models.query.prefetch_related_objects` # - Sets are used to avoid duplicates. # - Python's sets distinguish objects by value. But task require # to distinguish objects by id, not by value, because two objects # that are same by value, might be included in two different # querysets, and both of them must be populated with cache. # - BUT fetched cache data for object are distinguished by value. # Rationale is to distinguish unique queries and prevent their # repeated execution. We don't care about "id" of queries, we care # about their value. Composite identifier for query is # object and lookup for which query being constructed. # Only one query exists for one object (by value) and lookup part. # In function there slightly more complex structure than # `obj -> lookup` used for storing data for traversed lookups # (``seen``) - that's done to reduce redundancy in data, but # "primary key" for stroed data is `obj -> lookup`. # - Data flows through `buffer`, during processing. # Objects discovered while traversing DB structure are being added # and processed objects are removed. #todo beauty and refactoring if len(objects) == 0: return # nothing to do #lookup -> model -> {(id(obj), obj), ...} #id(obj) - because objects must be distinguished by id. #DefaultOrederedDict because order of lookups matter, #see tests.LookupOrderingTest.test_order of Django test suite. buffer = DefaultOrderedDict(lambda: defaultdict(set)) update_buffer(buffer, objects, reversed(lookups)) seen = tree() # model -> attr -> # single -> bool # cache_name -> str # cache -> # obj -> [cache] while True: try: lookup = last(buffer.keys()) except TypeError: break try: model, current = buffer[lookup].popitem() except KeyError: del buffer[lookup] continue attr = lookup.split(LOOKUP_SEP)[0] sample = current.pop() current.add(sample) _, object = sample prefetcher, _, attr_found, _ = get_prefetcher(object, attr) #Lookup is not valid for that object, it must be skipped. #No exception, because data it is that data is of diffrerent types, #so - such situation is normal. if not attr_found: continue if LOOKUP_SEP not in lookup and prefetcher is None: raise ValueError("'%s' does not resolve to a item that supports " "prefetching - this is an invalid parameter to " "prefetch_related()." % lookup) if prefetcher is None: clipped = clip_lookup(lookup) if clipped: update_buffer( buffer, filter(is_not_none, [getattr(o, attr) for _, o in current]), [clipped]) continue to_discard = set() for e in current: # no need to query for already prefetched data obj = e[1] obj_model = obj.__class__ p, d, _, is_fetched = get_prefetcher(obj, attr) cache = None if is_fetched: # case of Django internal cache single, cache_name = get_info(p, d) cache = get_cache(obj, single, cache_name, attr) single, cache_name = get_info(p, d) update_seen(seen, model, attr, single, cache_name, obj, cache) to_discard.add(e) elif (model in seen and # case of `seen` attr in seen[obj_model] and obj in seen[obj_model][attr]['cache']): single = seen[obj_model][attr]['single'] cache_name = seen[obj_model][attr]['cache_name'] cache = seen[obj_model][attr]['cache'][obj] to_discard.add(e) set_cache(obj, single, cache, cache_name, attr) if cache is not None and len(cache) != 0: # if data was cached clipped = clip_lookup(lookup) # it still must get if clipped: # into `buffer`. update_buffer(buffer, cache, [clipped]) current -= to_discard if current: prefetch_qs, rel_attr_fn, cur_attr_fn, single, cache_name = \ prefetcher.get_prefetch_query_set( map(itemgetter(1), current) ) #prefetch lookups from prefetch queries are merged into processing. additional_lookups = getattr(prefetch_qs, '_prefetch_related_lookups', []) if additional_lookups: setattr(prefetch_qs, '_prefetch_related_lookups', []) discovered = list(prefetch_qs) lookups_for_discovered = additional_lookups clipped_lookup = LOOKUP_SEP.join(lookup.split(LOOKUP_SEP)[1:]) if len(clipped_lookup) > 0: lookups_for_discovered = chain([clipped_lookup], additional_lookups) if lookups_for_discovered: update_buffer(buffer, discovered, reversed(list(lookups_for_discovered))) rel_to_cur = defaultdict(list) for obj in discovered: val = rel_attr_fn(obj) rel_to_cur[val].append(obj) for pair in current: # queried data is set up to objects obj = pair[1] val = cur_attr_fn(obj) cache = rel_to_cur.get(val, []) update_seen(seen, model, attr, single, cache_name, obj, cache) set_cache(obj, single, cache, cache_name, attr)