Ejemplo n.º 1
0
def deep_prefetch_related_objects(objects, lookups):
    """
    Helper function for prefetch_related functionality.

    Populates prefetched objects caches for a list of results
    from a QuerySet.

    Differs from :meth:`django.db.models.query.prefetch_related_objects`
    in that that it can prefetch "non-strict" lookups through GFK.

    :param objects: result cache of base queryset.
    :param lookups: list with lookups.
    """

    #How it works
    #------------
    #   - Since data is not of same type - "unit" of data is a model instance
    #     (object), not QuerySet.
    #   - Goals that I was tried to achieve:
    #       - Prefetching 'non-strict' lookups.
    #       - Absence of doubled queries to DB, other redundancy in processing
    #       - and infinite recursions.
    #       - Simplicity of design in comparison with original
    #         `django.db.models.query.prefetch_related_objects`
    #   - Sets are used to avoid duplicates.
    #   - Python's sets distinguish objects by value. But task require
    #     to distinguish objects by id, not by value, because two objects
    #     that are same by value, might be included in two different
    #     querysets, and both of them must be populated with cache.
    #   - BUT fetched cache data for object are distinguished by value.
    #     Rationale is to distinguish unique queries and prevent their
    #     repeated execution. We don't care about "id" of queries, we care
    #     about their value. Composite identifier for query is
    #     object and lookup for which query being constructed.
    #     Only one query exists for one object (by value) and lookup part.
    #     In function there slightly more complex structure than
    #     `obj -> lookup` used for storing data for traversed lookups
    #     (``seen``) - that's done to reduce redundancy in data, but
    #     "primary key" for stroed data is `obj -> lookup`.
    #   - Data flows through `buffer`, during processing.
    #     Objects discovered while traversing DB structure are being added
    #     and processed objects are removed.
    #todo beauty and refactoring
    if len(objects) == 0:
        return  # nothing to do

    #lookup -> model -> {(id(obj), obj), ...}
    #id(obj) - because objects must be distinguished by id.
    #DefaultOrederedDict because order of lookups matter,
    #see tests.LookupOrderingTest.test_order of Django test suite.
    buffer = DefaultOrderedDict(lambda: defaultdict(set))

    update_buffer(buffer, objects, reversed(lookups))
    seen = tree()  # model -> attr ->
    #              single     -> bool
    #              cache_name -> str
    #              cache      ->
    #                         obj -> [cache]
    while True:
        try:
            lookup = last(buffer.keys())
        except TypeError:
            break
        try:
            model, current = buffer[lookup].popitem()
        except KeyError:
            del buffer[lookup]
            continue

        attr = lookup.split(LOOKUP_SEP)[0]

        sample = current.pop()
        current.add(sample)
        _, object = sample
        prefetcher, _, attr_found, _ = get_prefetcher(object, attr)

        #Lookup is not valid for that object, it must be skipped.
        #No exception, because data it is that data is of diffrerent types,
        #so - such situation is normal.
        if not attr_found:
            continue

        if LOOKUP_SEP not in lookup and prefetcher is None:
            raise ValueError("'%s' does not resolve to a item that supports "
                             "prefetching - this is an invalid parameter to "
                             "prefetch_related()." % lookup)

        if prefetcher is None:
            clipped = clip_lookup(lookup)
            if clipped:
                update_buffer(
                    buffer,
                    filter(is_not_none,
                           [getattr(o, attr) for _, o in current]), [clipped])
            continue

        to_discard = set()
        for e in current:  # no need to query for already prefetched data
            obj = e[1]
            obj_model = obj.__class__
            p, d, _, is_fetched = get_prefetcher(obj, attr)
            cache = None
            if is_fetched:  # case of Django internal cache
                single, cache_name = get_info(p, d)
                cache = get_cache(obj, single, cache_name, attr)
                single, cache_name = get_info(p, d)
                update_seen(seen, model, attr, single, cache_name, obj, cache)
                to_discard.add(e)
            elif (model in seen and  # case of `seen`
                  attr in seen[obj_model] and obj
                  in seen[obj_model][attr]['cache']):
                single = seen[obj_model][attr]['single']
                cache_name = seen[obj_model][attr]['cache_name']
                cache = seen[obj_model][attr]['cache'][obj]
                to_discard.add(e)
                set_cache(obj, single, cache, cache_name, attr)
            if cache is not None and len(cache) != 0:  # if data was cached
                clipped = clip_lookup(lookup)  # it still must get
                if clipped:  # into `buffer`.
                    update_buffer(buffer, cache, [clipped])

        current -= to_discard

        if current:
            prefetch_qs, rel_attr_fn, cur_attr_fn, single, cache_name = \
                prefetcher.get_prefetch_queryset(
                    list(map(itemgetter(1), current))
                )

            # Prefetch lookups from prefetch queries are merged
            # with processed lookups.
            additional_lookups = getattr(prefetch_qs,
                                         '_prefetch_related_lookups', [])

            if additional_lookups:
                setattr(prefetch_qs, '_prefetch_related_lookups', [])
            discovered = list(prefetch_qs)
            if discovered:
                lookups_for_discovered = additional_lookups
                clipped_lookup = LOOKUP_SEP.join(lookup.split(LOOKUP_SEP)[1:])
                if len(clipped_lookup) > 0:
                    lookups_for_discovered = chain([clipped_lookup],
                                                   additional_lookups)
                if lookups_for_discovered:
                    reversed_lookups = reversed(list(lookups_for_discovered))
                    update_buffer(buffer, discovered, reversed_lookups)

            rel_to_cur = defaultdict(list)

            for obj in discovered:
                val = rel_attr_fn(obj)
                rel_to_cur[val].append(obj)
            for pair in current:  # queried data is set up to objects
                obj = pair[1]
                val = cur_attr_fn(obj)
                cache = rel_to_cur.get(val, [])
                update_seen(seen, model, attr, single, cache_name, obj, cache)
                set_cache(obj, single, cache, cache_name, attr)
Ejemplo n.º 2
0
 def test_bug(self):
     prefetcher = get_prefetcher(self.rooms[0], 'house')[0]
     queryset = prefetcher.get_prefetch_queryset(list(Room.objects.all()))[0]
     self.assertNotIn(' JOIN ', force_text(queryset.query))
Ejemplo n.º 3
0
 def test_bug(self):
     prefetcher = get_prefetcher(self.rooms[0], 'house')[0]
     queryset = prefetcher.get_prefetch_queryset(list(Room.objects.all()))[0]
     self.assertNotIn(' JOIN ', force_text(queryset.query))
Ejemplo n.º 4
0
def deep_prefetch_related_objects(objects, lookups):
    """
    Helper function for prefetch_related functionality.

    Populates prefetched objects caches for a list of results
    from a QuerySet.

    Differs from :meth:`django.db.models.query.prefetch_related_objects`
    in that that it can prefetch "non-strict" lookups through GFK.

    :param objects: result cache of base queryset.
    :param lookups: list with lookups.
    """

    #How it works
    #------------
    #   - Since data is not of same type - "unit" of data is a model instance
    #     (object), not QuerySet.
    #   - Goals that I was tried to achieve:
    #       - Prefetching 'non-strict' lookups.
    #       - Absence of doubled queries to DB, other redundancy in processing
    #       - and infinite recursions.
    #       - Simplicity of design in comparison with original
    #         `django.db.models.query.prefetch_related_objects`
    #   - Sets are used to avoid duplicates.
    #   - Python's sets distinguish objects by value. But task require
    #     to distinguish objects by id, not by value, because two objects
    #     that are same by value, might be included in two different
    #     querysets, and both of them must be populated with cache.
    #   - BUT fetched cache data for object are distinguished by value.
    #     Rationale is to distinguish unique queries and prevent their
    #     repeated execution. We don't care about "id" of queries, we care
    #     about their value. Composite identifier for query is
    #     object and lookup for which query being constructed.
    #     Only one query exists for one object (by value) and lookup part.
    #     In function there slightly more complex structure than
    #     `obj -> lookup` used for storing data for traversed lookups
    #     (``seen``) - that's done to reduce redundancy in data, but
    #     "primary key" for stroed data is `obj -> lookup`.
    #   - Data flows through `buffer`, during processing.
    #     Objects discovered while traversing DB structure are being added
    #     and processed objects are removed.
    #todo beauty and refactoring
    if len(objects) == 0:
        return # nothing to do

    #lookup -> model -> {(id(obj), obj), ...}
    #id(obj) - because objects must be distinguished by id.
    #DefaultOrederedDict because order of lookups matter,
    #see tests.LookupOrderingTest.test_order of Django test suite.
    buffer = DefaultOrderedDict(lambda: defaultdict(set))

    update_buffer(buffer, objects, reversed(lookups))
    seen = tree()     # model -> attr ->
                      #              single     -> bool
                      #              cache_name -> str
                      #              cache      ->
                      #                         obj -> [cache]
    while True:
        try:
            lookup = last(buffer.keys())
        except TypeError:
            break
        try:
            model, current = buffer[lookup].popitem()
        except KeyError:
            del buffer[lookup]
            continue

        attr = lookup.split(LOOKUP_SEP)[0]

        sample = current.pop()
        current.add(sample)
        _, object = sample
        prefetcher, _, attr_found, _ = get_prefetcher(object, attr)

        #Lookup is not valid for that object, it must be skipped.
        #No exception, because data it is that data is of diffrerent types,
        #so - such situation is normal.
        if not attr_found:
            continue

        if LOOKUP_SEP not in lookup and prefetcher is None:
            raise ValueError("'%s' does not resolve to a item that supports "
                             "prefetching - this is an invalid parameter to "
                             "prefetch_related()." % lookup)

        if prefetcher is None:
            clipped = clip_lookup(lookup)
            if clipped:
                update_buffer(
                    buffer,
                    filter(is_not_none, [getattr(o, attr)
                                         for _, o in current]),
                    [clipped])
            continue


        to_discard = set()
        for e in current: # no need to query for already prefetched data
            obj = e[1]
            obj_model = obj.__class__
            p, d, _, is_fetched = get_prefetcher(obj, attr)
            cache = None
            if is_fetched: # case of Django internal cache
                single, cache_name = get_info(p, d)
                cache = get_cache(obj, single, cache_name, attr)
                single, cache_name = get_info(p, d)
                update_seen(seen, model, attr, single, cache_name, obj, cache)
                to_discard.add(e)
            elif (model in seen and  # case of `seen`
                attr in seen[obj_model] and
                obj in seen[obj_model][attr]['cache']):
                single = seen[obj_model][attr]['single']
                cache_name = seen[obj_model][attr]['cache_name']
                cache = seen[obj_model][attr]['cache'][obj]
                to_discard.add(e)
                set_cache(obj, single, cache, cache_name, attr)
            if cache is not None and len(cache) != 0:  # if data was cached
                clipped = clip_lookup(lookup)          # it still must get
                if clipped:                            # into `buffer`.
                    update_buffer(buffer, cache, [clipped])

        current -= to_discard

        if current:
            prefetch_qs, rel_attr_fn, cur_attr_fn, single, cache_name =       \
            prefetcher.get_prefetch_query_set(
                map(itemgetter(1), current)
            )

            #prefetch lookups from prefetch queries are merged into processing.
            additional_lookups = getattr(prefetch_qs,
                                         '_prefetch_related_lookups', [])

            if additional_lookups:
                setattr(prefetch_qs, '_prefetch_related_lookups', [])
            discovered = list(prefetch_qs)
            lookups_for_discovered = additional_lookups
            clipped_lookup = LOOKUP_SEP.join(lookup.split(LOOKUP_SEP)[1:])
            if len(clipped_lookup) > 0:
                lookups_for_discovered = chain([clipped_lookup],
                                               additional_lookups)
            if lookups_for_discovered:
                update_buffer(buffer, discovered, reversed(list(lookups_for_discovered)))


            rel_to_cur = defaultdict(list)

            for obj in discovered:
                val = rel_attr_fn(obj)
                rel_to_cur[val].append(obj)
            for pair in current: # queried data is set up to objects
                obj = pair[1]
                val = cur_attr_fn(obj)
                cache = rel_to_cur.get(val, [])
                update_seen(seen, model, attr, single, cache_name, obj, cache)
                set_cache(obj, single, cache, cache_name, attr)