Python Query Exemples, google.appengine.api.datastore.Query Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : meta_queries.py Projet : grzes/djangae

    def Run(self, limit, offset):
        opts = self._gae_query._Query__query_options
        if opts.keys_only or opts.projection:
            return self._gae_query.Run(limit=limit, offset=offset)

        ret = caching.get_from_cache(self._identifier, self._namespace)
        if ret is not None and not utils.entity_matches_query(ret, self._gae_query):
            ret = None

        if ret is None:
            # We do a fast keys_only query to get the result
            keys_query = Query(self._gae_query._Query__kind, keys_only=True, namespace=self._namespace)
            keys_query.update(self._gae_query)
            keys = keys_query.Run(limit=limit, offset=offset)

            # Do a consistent get so we don't cache stale data, and recheck the result matches the query
            ret = [x for x in datastore.Get(keys) if x and utils.entity_matches_query(x, self._gae_query)]
            if len(ret) == 1:
                caching.add_entities_to_cache(
                    self._model,
                    [ret[0]],
                    caching.CachingSituation.DATASTORE_GET,
                    self._namespace,
                )
            return iter(ret)

        return iter([ret])

Exemple #2

0

Afficher le fichier

    def prep_value_for_query(self, value, model, column, connection):
        """
            Return a list of IDs of the associated contains models, these should
            match up with the IDs from the parent entities
        """

        if hasattr(value, "isoformat"):
            value = value.isoformat()
        else:
            value = unicode(value)
        value = self.unescape(value)

        if STRIP_PERCENTS:
            # SQL does __contains by doing LIKE %value%
            if value.startswith("%") and value.endswith("%"):
                value = value[1:-1]

        namespace = connection.settings_dict.get("NAMESPACE", "")
        qry = Query(self._generate_kind_name(model, column),
                    keys_only=True,
                    namespace=namespace)
        qry['{} >='.format(self.INDEXED_COLUMN_NAME)] = value
        qry['{} <='.format(self.INDEXED_COLUMN_NAME)] = value + u'\ufffd'

        # We can't filter on the 'name' as part of the query, because the name is the key and these
        # are child entities of the ancestor entities which they are indexing, and as we don't know
        # the keys of the ancestor entities we can't create the complete keys, hence the comparison
        # of `x.name() == self.OPERATOR` happens here in python
        resulting_keys = set(
            [x.parent() for x in qry.Run() if x.name() == self.OPERATOR])
        return resulting_keys

Exemple #3

0

Afficher le fichier

    def Run(self, limit, offset):
        opts = self._gae_query._Query__query_options
        if opts.keys_only or opts.projection:
            return self._gae_query.Run(limit=limit, offset=offset)

        ret = caching.get_from_cache(self._identifier)
        if ret is not None and not utils.entity_matches_query(
                ret, self._gae_query):
            ret = None

        if ret is None:
            # We do a fast keys_only query to get the result
            keys_query = Query(self._gae_query._Query__kind, keys_only=True)
            keys_query.update(self._gae_query)
            keys = keys_query.Run(limit=limit, offset=offset)

            # Do a consistent get so we don't cache stale data, and recheck the result matches the query
            ret = [
                x for x in datastore.Get(keys)
                if utils.entity_matches_query(x, self._gae_query)
            ]
            if len(ret) == 1:
                caching.add_entity_to_cache(
                    self._model, ret[0],
                    caching.CachingSituation.DATASTORE_GET)
            return iter(ret)

        return iter([ret])

Exemple #4

0

Afficher le fichier

    def cleanup(cls, datastore_key):

        # Kindless query, we don't know the kinds because we don't know all the fields
        # that use contains. But, we do know that all the things we need to delete are:
        # a.) A descendent
        # b.) Have a key name of whatever OPERATOR is

        qry = Query(keys_only=True, namespace=datastore_key.namespace())
        qry = qry.Ancestor(datastore_key)

        # Delete all the entities matching the ancestor query
        Delete([x for x in qry.Run() if x.name() == cls.OPERATOR])

Exemple #5

0

Afficher le fichier

    def table_names(self):
        """
        Returns a list of names of all tables that exist in the
        database.
        """
        from google.appengine.api.datastore import Query

        return [kind.key().name() for kind in Query(kind='__kind__').Run()]

Exemple #6

0

Afficher le fichier

 def tear_down_helper(self):
     query = Query('Greeting', _app=PROJECT_ID)
     results = yield self.datastore.run_query(query)
     batch = []
     for entity in results:
         batch.append(entity.key())
         if len(batch) == self.BATCH_SIZE:
             yield self.datastore.delete(batch)
             batch = []
     yield self.datastore.delete(batch)

Exemple #7

0

Afficher le fichier

    def test_cassandra_page_size(self):
        entity_count = self.CASSANDRA_PAGE_SIZE + 1
        for _ in range(entity_count):
            entity = Entity('Greeting', _app=PROJECT_ID)
            yield self.datastore.put(entity)

        query = Query('Greeting', _app=PROJECT_ID)
        results = yield self.datastore.run_query(query)
        self.assertEqual(len(results), entity_count)
        self.assertTrue(True)

Exemple #8

0

Afficher le fichier

Fichier : compiler.py Projet : umitproject/tease-o-matic

 def _combine_filters(self, column, db_type, op_values):
     gae_query = self.gae_query
     combined = []
     for query in gae_query:
         for op, value in op_values:
             self.gae_query = [Query(self.db_table,
                                     keys_only=self.pks_only)]
             self.gae_query[0].update(query)
             self._add_filter(column, op, db_type, value)
             combined.append(self.gae_query[0])
     self.gae_query = combined

Exemple #9

0

Afficher le fichier

def delete_all_entities():
    for namespace in get_namespaces():
        set_namespace(namespace)
        for kind in get_kinds():
            if kind.startswith('__'):
                continue
            while True:
                data = Query(kind=kind, keys_only=True).Get(200)
                if not data:
                    break
                Delete(data)

Exemple #10

0

Afficher le fichier

def test_merge_query_with_null(locations):
    datastore = Datastore(locations, PROJECT_ID)

    query = Query('Greeting', _app=PROJECT_ID)
    results = yield datastore.run_query(query)
    for entity in results:
        yield datastore.delete([entity.key()])

    entity = Entity('Greeting', _app=PROJECT_ID)
    create_time = datetime.datetime.now()
    entity['content'] = None
    entity['create_time'] = create_time
    yield datastore.put(entity)

    entity = Entity('Greeting', _app=PROJECT_ID)
    entity['content'] = 'hi'
    entity['create_time'] = create_time
    yield datastore.put(entity)

    entity = Entity('Greeting', _app=PROJECT_ID)
    entity['create_time'] = None
    yield datastore.put(entity)

    query = Query('Greeting', {
        'content =': None,
        'create_time =': create_time
    },
                  _app=PROJECT_ID)
    response = yield datastore.run_query(query)
    if len(response) != 1:
        raise Exception('Expected 1 result. Received: {}'.format(response))

    entity = response[0]
    if entity['content'] is not None or entity['create_time'] != create_time:
        raise Exception('Unexpected entity: {}'.format(entity))

    query = Query('Greeting', _app=PROJECT_ID)
    results = yield datastore.run_query(query)
    for entity in results:
        yield datastore.delete([entity.key()])

Exemple #11

0

Afficher le fichier

 def __init__(self, compiler, fields):
     super(GAEQuery, self).__init__(compiler, fields)
     self.inequality_field = None
     self.included_pks = None
     self.excluded_pks = ()
     self.has_negated_exact_filter = False
     self.ordering = []
     self.db_table = self.query.get_meta().db_table
     self.pks_only = (len(fields) == 1 and fields[0].primary_key)
     start_cursor = getattr(self.query, '_gae_start_cursor', None)
     end_cursor = getattr(self.query, '_gae_end_cursor', None)
     self.gae_query = [Query(self.db_table, keys_only=self.pks_only,
                             cursor=start_cursor, end_cursor=end_cursor)]

Exemple #12

0

Afficher le fichier

    def _start_task(self):
        assert not mapper_library.is_mapper_running(
            self.identifier,
            self.namespace), "Migration started by separate thread?"

        query = Query(self.map_kind, namespace=self.namespace)
        return mapper_library.start_mapping(
            self.identifier,
            query,
            self,
            operation_method="_wrapped_map_entity",
            shard_count=self.shard_count,
            entities_per_task=self.entities_per_task,
            queue=self.queue)

Exemple #13

0

Afficher le fichier

    def test_cassandra_page_size(self):
        entity_count = self.CASSANDRA_PAGE_SIZE + 1
        batch = []
        for _ in range(entity_count):
            entity = Entity('Greeting', _app=PROJECT_ID)
            batch.append(entity)
            if len(batch) == self.BATCH_SIZE:
                yield self.datastore.put_multi(batch)
                batch = []
        yield self.datastore.put_multi(batch)

        query = Query('Greeting', _app=PROJECT_ID)
        results = yield self.datastore.run_query(query)
        self.assertEqual(len(results), entity_count)

Exemple #14

0

Afficher le fichier

Fichier : compiler.py Projet : own2pwn/zihigh

 def __init__(self, compiler, fields):
     super(GAEQuery, self).__init__(compiler, fields)
     self.inequality_field = None
     self.pk_filters = None
     self.excluded_pks = ()
     self.has_negated_exact_filter = False
     self.ordering = ()
     self.gae_ordering = []
     pks_only = False
     if len(fields) == 1 and fields[0].primary_key:
         pks_only = True
     self.db_table = self.query.get_meta().db_table
     self.pks_only = pks_only
     self.gae_query = [Query(self.db_table, keys_only=self.pks_only)]

Exemple #15

0

Afficher le fichier

def delete_all_entities():
    from google.appengine.api.datastore import Delete, Query
    from google.appengine.ext.db.metadata import get_kinds, get_namespaces
    from google.appengine.api.namespace_manager import set_namespace

    for namespace in get_namespaces():
        set_namespace(namespace)
        for kind in get_kinds():
            if kind.startswith('__'):
                continue
            while True:
                data = Query(kind=kind, keys_only=True).Get(200)
                if not data:
                    break
                Delete(data)

Exemple #16

0

Afficher le fichier

Fichier : test_queries.py Projet : xhuad/gts

  def test_separator_in_name(self):
    entity = Entity('Greeting', name='Test:1', _app=PROJECT_ID)
    create_time = datetime.datetime.utcnow()
    entity['color'] = 'red'
    entity['create_time'] = create_time
    yield self.datastore.put(entity)

    query = Query('Greeting', {'color =': 'red', 'create_time =': create_time},
                  _app=PROJECT_ID)
    response = yield self.datastore.run_query(query)

    self.assertEqual(len(response), 1)

    entity = response[0]
    self.assertEqual(entity['color'], 'red')
    self.assertEqual(entity['create_time'], create_time)

Exemple #17

0

Afficher le fichier

    def execute(self):
        self.select.execute()

        #This is a little bit more inefficient than just doing a keys_only query and
        #sending it to delete, but I think this is the sacrifice to make for the unique caching layer
        keys = []
        for entity in QueryByKeys(
                Query(self.select.model._meta.db_table),
                [ x.key() for x in self.select.results ],
                []
            ).Run():

            keys.append(entity.key())
            constraints.release(self.select.model, entity)
            caching.remove_entity_from_context_cache_by_key(entity.key())
        datastore.Delete(keys)

Exemple #18

0

Afficher le fichier

Fichier : test_queries.py Projet : xhuad/gts

  def test_merge_query_with_null(self):
    entity = Entity('Greeting', _app=PROJECT_ID)
    create_time = datetime.datetime.now()
    entity['content'] = None
    entity['create_time'] = create_time
    yield self.datastore.put(entity)

    entity = Entity('Greeting', _app=PROJECT_ID)
    entity['content'] = 'hi'
    entity['create_time'] = create_time
    yield self.datastore.put(entity)

    query = Query('Greeting', {'content =': None, 'create_time =': create_time},
                  _app=PROJECT_ID)
    response = yield self.datastore.run_query(query)
    self.assertEqual(len(response), 1)

    entity = response[0]
    self.assertEqual(entity['content'], None)
    self.assertEqual(entity['create_time'], create_time)

Exemple #19

0

Afficher le fichier

    def test_batch_put_index_entries(self):
        entities = []

        entity = Entity('Greeting', name='duplicate', _app=PROJECT_ID)
        entity['content'] = 'first entry'
        entities.append(entity)

        entity = Entity('Greeting', name='duplicate', _app=PROJECT_ID)
        entity['content'] = 'second entry'
        entities.append(entity)

        yield self.datastore.put_multi(entities)

        # Ensure the last specified mutation is the one that matters.
        query = Query('Greeting', projection=['content'], _app=PROJECT_ID)
        response = yield self.datastore.run_query(query)
        self.assertEqual(len(response), 1)

        entity = response[0]
        self.assertEqual(entity['content'], 'second entry')

Exemple #20

0

Afficher le fichier

def test_separator_in_name(locations):
    datastore = Datastore(locations, PROJECT_ID)

    entity = Entity('Greeting', name='Test:1', _app=PROJECT_ID)
    create_time = datetime.datetime.utcnow()
    entity['color'] = 'red'
    entity['create_time'] = create_time
    yield datastore.put(entity)

    query = Query('Greeting', {
        'color =': 'red',
        'create_time =': create_time
    },
                  _app=PROJECT_ID)
    response = yield datastore.run_query(query)
    if len(response) != 1:
        raise Exception('Expected 1 result. Received: {}'.format(response))

    entity = response[0]
    if entity['color'] != 'red' or entity['create_time'] != create_time:
        raise Exception('Unexpected entity: {}'.format(entity))

Exemple #21

0

Afficher le fichier

 def table_names(self, cursor=None):
     """
     Returns a list of names of all tables that exist in the
     database.
     """
     return [kind.key().name() for kind in Query(kind='__kind__').Run()]

Exemple #22

0

Afficher le fichier

Fichier : commands.py Projet : vzts/djangae

    def _fetch_results(self, query):
        # If we're manually excluding PKs, and we've specified a limit to the results
        # we need to make sure that we grab more than we were asked for otherwise we could filter
        # out too many! These are again limited back to the original request limit
        # while we're processing the results later
        # Apply the namespace before excluding
        excluded_pks = [
            rpc.Key.from_path(x.kind(), x.id_or_name(), namespace=self.namespace)
            for x in self.query.excluded_pks
        ]

        high_mark = self.query.high_mark
        low_mark = self.query.low_mark

        excluded_pk_count = 0
        if excluded_pks and high_mark:
            excluded_pk_count = len(excluded_pks)
            high_mark += excluded_pk_count

        limit = None if high_mark is None else (high_mark - (low_mark or 0))
        offset = low_mark or 0

        if self.query.kind == "COUNT":
            if excluded_pks:
                # If we're excluding pks, relying on a traditional count won't work
                # so we have two options:
                # 1. Do a keys_only query instead and count the results excluding keys
                # 2. Do a count, then a pk__in=excluded_pks to work out how many to subtract
                # Here I've favoured option one as it means a single RPC call. Testing locally
                # didn't seem to indicate much of a performance difference, even when doing the pk__in
                # with GetAsync while the count was running. That might not be true of prod though so
                # if anyone comes up with a faster idea let me know!
                if isinstance(query, meta_queries.QueryByKeys):
                    # If this is a QueryByKeys, just do the datastore Get and count the results
                    resultset = (x.key() for x in query.Run(limit=limit, offset=offset) if x)
                else:
                    count_query = Query(
                        query._Query__kind, keys_only=True, namespace=self.namespace
                    )
                    count_query.update(query)
                    resultset = count_query.Run(limit=limit, offset=offset)
                self.results = [len([y for y in resultset if y not in excluded_pks])]
                self.results_returned = 1
            else:
                self.results = [query.Count(limit=limit, offset=offset)]
                self.results_returned = 1
            return
        elif self.query.kind == "AVERAGE":
            raise ValueError("AVERAGE not yet supported")

        # Ensure that the results returned is reset
        self.results_returned = 0
        self.results = []

        seen = set()

        def dedupe(result):
            # FIXME: This logic can't be right. I think we need to store the distinct fields
            # somewhere on the query
            if getattr(self.original_query, "annotation_select", None):
                columns = self.original_query.annotation_select.keys()
            else:
                columns = self.query.columns or []
            if not columns:
                return result

            key = tuple([result[x] for x in self._exclude_pk(columns) if x in result])
            if key in seen:
                return None
            seen.add(key)
            return result

        for entity in query.Run(limit=limit, offset=offset):
            # If this is a keys only query, we need to generate a fake entity
            # for each key in the result set
            if self.keys_only:
                entity = EntityTransforms.convert_key_to_entity(entity)

            entity = EntityTransforms.ignore_excluded_pks(excluded_pks, entity)
            entity = EntityTransforms.convert_datetime_fields(self.query, entity)
            entity = EntityTransforms.fix_projected_values_type(self.query, entity)
            entity = EntityTransforms.rename_pk_field(
                self.query.model, self.query.concrete_model, entity
            )
            entity = EntityTransforms.process_extra_selects(self.query, entity)

            if self.query.distinct and self.query.extra_selects:
                entity = dedupe(entity)

            if entity:
                self.results.append(entity)
                self.results_returned += 1

            if limit and self.results_returned >= (limit - excluded_pk_count):
                break

Exemple #23

0

Afficher le fichier

def key_exists(key):
    qry = Query(keys_only=True, namespace=key.namespace())
    qry.Ancestor(key)
    return qry.Count(limit=1) > 0

Exemple #24

0

Afficher le fichier

Fichier : commands.py Projet : jscissr/djangae

    def _fetch_results(self, query):
        # If we're manually excluding PKs, and we've specified a limit to the results
        # we need to make sure that we grab more than we were asked for otherwise we could filter
        # out too many! These are again limited back to the original request limit
        # while we're processing the results later

        high_mark = self.query.high_mark
        low_mark = self.query.low_mark

        excluded_pk_count = 0
        if self.excluded_pks and high_mark:
            excluded_pk_count = len(self.excluded_pks)
            high_mark += excluded_pk_count

        limit = None if high_mark is None else (high_mark - (low_mark or 0))
        offset = low_mark or 0

        if self.query.kind == "COUNT":
            if self.excluded_pks:
                # If we're excluding pks, relying on a traditional count won't work
                # so we have two options:
                # 1. Do a keys_only query instead and count the results excluding keys
                # 2. Do a count, then a pk__in=excluded_pks to work out how many to subtract
                # Here I've favoured option one as it means a single RPC call. Testing locally
                # didn't seem to indicate much of a performance difference, even when doing the pk__in
                # with GetAsync while the count was running. That might not be true of prod though so
                # if anyone comes up with a faster idea let me know!
                count_query = Query(query._Query__kind, keys_only=True)
                count_query.update(query)
                resultset = count_query.Run(limit=limit, offset=offset)
                self.results = (x for x in [len([y for y in resultset if y not in self.excluded_pks])])
            else:
                self.results = (x for x in [query.Count(limit=limit, offset=offset)])
            return
        elif self.query.kind == "AVERAGE":
            raise ValueError("AVERAGE not yet supported")
        else:
            self.results = query.Run(limit=limit, offset=offset)

        # Ensure that the results returned is reset
        self.results_returned = 0

        def increment_returned_results(result):
            self.results_returned += 1
            return result

        def convert_key_to_entity(result):
            class FakeEntity(dict):
                def __init__(self, key):
                    self._key = key

                def key(self):
                    return self._key

            return FakeEntity(result)

        def rename_pk_field(result):
            if result is None:
                return result

            value = result.key().id_or_name()
            result[self.query.model._meta.pk.column] = value
            result[self.query.concrete_model._meta.pk.column] = value
            return result

        def process_extra_selects(result):
            """
                We handle extra selects by generating the new columns from
                each result. We can handle simple boolean logic and operators.
            """
            extra_selects = self.query.extra_selects
            model_fields = self.query.model._meta.fields

            DATE_FORMATS = ("%Y-%m-%d", "%Y-%m-%d %H:%M:%S")

            def process_arg(arg):
                if arg.startswith("'") and arg.endswith("'"):
                    # String literal
                    arg = arg.strip("'")
                    # Check to see if this is a date
                    for date in DATE_FORMATS:
                        try:
                            value = datetime.strptime(arg, date)
                            return value
                        except ValueError:
                            continue
                    return arg
                elif arg in [x.column for x in model_fields]:
                    # Column value
                    return result.get(arg)

                # Handle NULL
                if arg.lower() == "null":
                    return None
                elif arg.lower() == "true":
                    return True
                elif arg.lower() == "false":
                    return False

                # See if it's an integer
                try:
                    arg = int(arg)
                except (TypeError, ValueError):
                    pass

                # Just a plain old literal
                return arg

            for col, select in extra_selects:
                result[col] = select[0](*[process_arg(x) for x in select[1]])

            return result

        def convert_datetime_fields(result):
            fields = [
                x
                for x in self.query.model._meta.fields
                if x.get_internal_type() in ("DateTimeField", "DateField", "TimeField")
            ]

            for field in fields:
                column = field.column
                if isinstance(result, dict):  # sometimes it's a key!
                    value = result.get(column)
                else:
                    value = None

                if value is not None:
                    result[column] = ensure_datetime(value)
            return result

        def ignore_excluded_pks(result):
            if result.key() in self.query.excluded_pks:
                return None
            return result

        self.results = wrap_result_with_functor(self.results, increment_returned_results)

        # If this is a keys only query, we need to generate a fake entity
        # for each key in the result set
        if self.keys_only:
            self.results = wrap_result_with_functor(self.results, convert_key_to_entity)

        self.results = wrap_result_with_functor(self.results, ignore_excluded_pks)
        self.results = wrap_result_with_functor(self.results, convert_datetime_fields)
        self.results = wrap_result_with_functor(self.results, rename_pk_field)
        self.results = wrap_result_with_functor(self.results, process_extra_selects)

        if self.query.distinct and self.query.extra_selects:
            # If we had extra selects, and we're distinct, we must deduplicate results
            def deduper_factory():
                seen = set()

                def dedupe(result):
                    # FIXME: This logic can't be right. I think we need to store the distinct fields
                    # somewhere on the query
                    if getattr(self.original_query, "annotation_select", None):
                        columns = self.original_query.annotation_select.keys()
                    else:
                        columns = self.query.columns or []
                    if not columns:
                        return result

                    key = tuple([result[x] for x in self._exclude_pk(columns) if x in result])
                    if key in seen:
                        return None
                    seen.add(key)
                    return result

                return dedupe

            self.results = wrap_result_with_functor(self.results, deduper_factory())

Exemple #25

0

Afficher le fichier

Fichier : commands.py Projet : scottg1989/djangae

    def _fetch_results(self, query):
        # If we're manually excluding PKs, and we've specified a limit to the results
        # we need to make sure that we grab more than we were asked for otherwise we could filter
        # out too many! These are again limited back to the original request limit
        # while we're processing the results later
        # Apply the namespace before excluding
        excluded_pks = [
            datastore.Key.from_path(x.kind(), x.id_or_name(), namespace=self.namespace)
            for x in self.query.excluded_pks
        ]

        high_mark = self.query.high_mark
        low_mark = self.query.low_mark

        excluded_pk_count = 0
        if excluded_pks and high_mark:
            excluded_pk_count = len(excluded_pks)
            high_mark += excluded_pk_count

        limit = None if high_mark is None else (high_mark - (low_mark or 0))
        offset = low_mark or 0

        if self.query.kind == "COUNT":
            if excluded_pks:
                # If we're excluding pks, relying on a traditional count won't work
                # so we have two options:
                # 1. Do a keys_only query instead and count the results excluding keys
                # 2. Do a count, then a pk__in=excluded_pks to work out how many to subtract
                # Here I've favoured option one as it means a single RPC call. Testing locally
                # didn't seem to indicate much of a performance difference, even when doing the pk__in
                # with GetAsync while the count was running. That might not be true of prod though so
                # if anyone comes up with a faster idea let me know!
                if isinstance(query, QueryByKeys):
                    # If this is a QueryByKeys, just do the datastore Get and count the results
                    resultset = (x.key() for x in query.Run(limit=limit, offset=offset) if x)
                else:
                    count_query = Query(query._Query__kind, keys_only=True, namespace=self.namespace)
                    count_query.update(query)
                    resultset = count_query.Run(limit=limit, offset=offset)
                self.results = [ len([ y for y in resultset if y not in excluded_pks]) ]
                self.results_returned = 1
            else:
                self.results = [query.Count(limit=limit, offset=offset)]
                self.results_returned = 1
            return
        elif self.query.kind == "AVERAGE":
            raise ValueError("AVERAGE not yet supported")

        # Ensure that the results returned is reset
        self.results_returned = 0
        self.results = []

        seen = set()

        def dedupe(result):
            # FIXME: This logic can't be right. I think we need to store the distinct fields
            # somewhere on the query
            if getattr(self.original_query, "annotation_select", None):
                columns = self.original_query.annotation_select.keys()
            else:
                columns = self.query.columns or []
            if not columns:
                return result

            key = tuple([ result[x] for x in self._exclude_pk(columns) if x in result ])
            if key in seen:
                return None
            seen.add(key)
            return result

        for entity in query.Run(limit=limit, offset=offset):
            # If this is a keys only query, we need to generate a fake entity
            # for each key in the result set
            if self.keys_only:
                entity = EntityTransforms.convert_key_to_entity(entity)

            entity = EntityTransforms.ignore_excluded_pks(excluded_pks, entity)
            entity = EntityTransforms.convert_datetime_fields(self.query, entity)
            entity = EntityTransforms.rename_pk_field(self.query.model, self.query.concrete_model, entity)
            entity = EntityTransforms.process_extra_selects(self.query, entity)

            if self.query.distinct and self.query.extra_selects:
                entity = dedupe(entity)

            if entity:
                self.results.append(entity)
                self.results_returned += 1

            if limit and self.results_returned >= (limit - excluded_pk_count):
                break

Exemple #26

0

Afficher le fichier

 def tear_down_helper(self):
     query = Query('Greeting', _app=PROJECT_ID)
     results = yield self.datastore.run_query(query)
     yield self.datastore.delete([entity.key() for entity in results])

Exemple #27

0

Afficher le fichier

    def _build_query(self):
        self._sanity_check()

        queries = []

        projection = self._exclude_pk(self.query.columns) or None

        query_kwargs = {
            "kind": self.query.concrete_model._meta.db_table,
            "distinct": self.query.distinct or None,
            "keys_only": self.keys_only or None,
            "projection": projection
        }

        ordering = convert_django_ordering_to_gae(self.query.order_by)

        if self.query.distinct and not ordering:
            # If we specified we wanted a distinct query, but we didn't specify
            # an ordering, we must set the ordering to the distinct columns, otherwise
            # App Engine shouts at us. Nastily. And without remorse.
            ordering = self.query.columns[:]

        # Deal with the no filters case
        if self.query.where is None:
            query = Query(**query_kwargs)
            try:
                query.Order(*ordering)
            except datastore_errors.BadArgumentError as e:
                raise NotSupportedError(e)
            return query

        assert self.query.where

        # Go through the normalized query tree
        for and_branch in self.query.where.children:
            query = Query(**query_kwargs)

            # This deals with the oddity that the root of the tree may well be a leaf
            filters = [and_branch
                       ] if and_branch.is_leaf else and_branch.children

            for filter_node in filters:
                lookup = "{} {}".format(filter_node.column,
                                        filter_node.operator)

                value = filter_node.value
                # This is a special case. Annoyingly Django's decimal field doesn't
                # ever call ops.get_prep_save or lookup or whatever when you are filtering
                # on a query. It *does* do it on a save, so we basically need to do a
                # conversion here, when really it should be handled elsewhere
                if isinstance(value, decimal.Decimal):
                    field = get_field_from_column(self.query.model,
                                                  filter_node.column)
                    value = self.connection.ops.value_to_db_decimal(
                        value, field.max_digits, field.decimal_places)
                elif isinstance(value, basestring):
                    value = unicode(value)

                # If there is already a value for this lookup, we need to make the
                # value a list and append the new entry
                if lookup in query and not isinstance(
                        query[lookup],
                    (list, tuple)) and query[lookup] != value:
                    query[lookup] = [query[lookup]] + [value]
                else:
                    # If the value is a list, we can't just assign it to the query
                    # which will treat each element as its own value. So in this
                    # case we nest it. This has the side effect of throwing a BadValueError
                    # which we could throw ourselves, but the datastore might start supporting
                    # list values in lookups.. you never know!
                    if isinstance(value, (list, tuple)):
                        query[lookup] = [value]
                    else:
                        # Common case: just add the raw where constraint
                        query[lookup] = value

            if ordering:
                try:
                    query.Order(*ordering)
                except datastore_errors.BadArgumentError as e:
                    # This is the easiest way to detect unsupported orderings
                    # ideally we'd detect this at the query normalization stage
                    # but it's a lot of hassle, this is much easier and seems to work OK
                    raise NotSupportedError(e)
            queries.append(query)

        if can_perform_datastore_get(self.query):
            # Yay for optimizations!
            return QueryByKeys(self.query.model, queries, ordering)

        if len(queries) == 1:
            identifier = query_is_unique(self.query.model, queries[0])
            if identifier:
                # Yay for optimizations!
                return UniqueQuery(identifier, queries[0], self.query.model)

            return queries[0]
        else:
            return datastore.MultiQuery(queries, ordering)

Exemple #28

0

Afficher le fichier

Fichier : commands.py Projet : datafordevelopment/djangae

    def Run(self, limit=None, offset=None):
        """
            Here are the options:

            1. Single key, hit memcache
            2. Multikey projection, async MultiQueries with ancestors chained
            3. Full select, datastore get
        """

        opts = self.queries[0]._Query__query_options
        key_count = len(self.queries_by_key)

        is_projection = False

        results = None
        if key_count == 1:
            # FIXME: Potentially could use get_multi in memcache and the make a query
            # for whatever remains
            key = self.queries_by_key.keys()[0]
            result = caching.get_from_cache_by_key(key)
            if result is not None:
                results = [result]
                cache = False  # Don't update cache, we just got it from there

        if results is None:
            if opts.projection:
                is_projection = True  # Don't cache projection results!

                # Assumes projection ancestor queries are faster than a datastore Get
                # due to lower traffic over the RPC. This should be faster for queries with
                # < 30 keys (which is the most common case), and faster if the entities are
                # larger and there are many results, but there is probably a slower middle ground
                # because the larger number of RPC calls. Still, if performance is an issue the
                # user can just do a normal get() rather than values/values_list/only/defer

                to_fetch = (offset or 0) + limit if limit else None
                additional_cols = set([
                    x[0] for x in self.ordering if x[0] not in opts.projection
                ])

                multi_query = []
                final_queries = []
                orderings = self.queries[0]._Query__orderings
                for key, queries in self.queries_by_key.iteritems():
                    for query in queries:
                        if additional_cols:
                            # We need to include additional orderings in the projection so that we can
                            # sort them in memory. Annoyingly that means reinstantiating the queries
                            query = Query(
                                kind=query._Query__kind,
                                filters=query,
                                projection=list(opts.projection).extend(
                                    list(additional_cols)),
                                namespace=self.namespace,
                            )

                        query.Ancestor(key)  # Make this an ancestor query
                        multi_query.append(query)
                        if len(multi_query) == 30:
                            final_queries.append(
                                datastore.MultiQuery(
                                    multi_query,
                                    orderings).Run(limit=to_fetch))
                            multi_query = []
                else:
                    if len(multi_query) == 1:
                        final_queries.append(
                            multi_query[0].Run(limit=to_fetch))
                    elif multi_query:
                        final_queries.append(
                            datastore.MultiQuery(
                                multi_query, orderings).Run(limit=to_fetch))

                results = chain(*final_queries)
            else:
                results = datastore.Get(self.queries_by_key.keys())

        def iter_results(results):
            returned = 0
            # This is safe, because Django is fetching all results any way :(
            sorted_results = sorted(results,
                                    cmp=partial(
                                        utils.django_ordering_comparison,
                                        self.ordering))
            sorted_results = [
                result for result in sorted_results if result is not None
            ]
            if not is_projection and sorted_results:
                caching.add_entities_to_cache(
                    self.model,
                    sorted_results,
                    caching.CachingSituation.DATASTORE_GET,
                    self.namespace,
                )

            for result in sorted_results:
                if is_projection:
                    entity_matches_query = True
                else:
                    entity_matches_query = any(
                        utils.entity_matches_query(result, qry)
                        for qry in self.queries_by_key[result.key()])

                if not entity_matches_query:
                    continue

                if offset and returned < offset:
                    # Skip entities based on offset
                    returned += 1
                    continue
                else:

                    yield _convert_entity_based_on_query_options(result, opts)

                    returned += 1

                    # If there is a limit, we might be done!
                    if limit is not None and returned == (offset or 0) + limit:
                        break

        return iter_results(results)

Exemple #29

0

Afficher le fichier

def key_exists(key):
    qry = Query(keys_only=True)
    qry.Ancestor(key)
    return qry.Count(limit=1) > 0

Exemple #30

0

Afficher le fichier

Fichier : commands.py Projet : meizon/djangae

    def _build_gae_query(self):
        """ Build and return the Datstore Query object. """
        query_kwargs = {
            "kind": str(self.db_table)
        }

        if self.distinct:
            query_kwargs["distinct"] = True

        if self.keys_only:
            query_kwargs["keys_only"] = self.keys_only
        elif self.projection:
            query_kwargs["projection"] = self.projection

        query = Query(
            **query_kwargs
        )

        if has_concrete_parents(self.model) and not self.model._meta.proxy:
            query["class ="] = self.model._meta.db_table

        ordering = []
        for order in self.ordering:
            if isinstance(order, int):
                direction = datastore.Query.ASCENDING if order == 1 else datastore.Query.DESCENDING
                order = self.queried_fields[0]
            else:
                direction = datastore.Query.DESCENDING if order.startswith("-") else datastore.Query.ASCENDING
                order = order.lstrip("-")

            if order == self.model._meta.pk.column or order == "pk":
                order = "__key__"
            ordering.append((order, direction))

        def process_and_branch(query, and_branch):
            for column, op, value in and_branch[-1]:
                if column == self.pk_col:
                    column = "__key__"

                    #FIXME: This EmptyResultSet check should happen during normalization so that Django doesn't count it as a query
                    if op == "=" and "__key__ =" in query:
                        #We've already done an exact lookup on a key, this query can't return anything!
                        raise EmptyResultSet()

                    if not isinstance(value, datastore.Key):
                        value = get_datastore_key(self.model, value)

                key = "%s %s" % (column, op)
                if key in query:
                    query[key] = [ query[key], value ]
                else:
                    query[key] = value

        if self.where:
            queries = []

            #If there is a single filter, we make it out it's an OR with only one branch
            #just so that the code below is simpler
            if isinstance(self.where, tuple) and len(self.where) == 3:
                self.where = ('OR', [(u'AND', [ self.where ])])
            elif isinstance(self.where, tuple) and self.where[0] == 'AND':
                self.where = ('OR', [self.where])
            elif isinstance(self.where, tuple) and self.where[0] == 'OR' and isinstance(self.where[1][0], tuple) and self.where[1][0][0] != 'AND':
                self.where = ('OR', [ ('AND', [x]) for x in self.where[-1] ])

            operator = self.where[0]
            assert operator == 'OR'
            #print query._Query__kind, self.where

            for and_branch in self.where[1]:
                #Duplicate the query for all the "OR"s
                queries.append(Query(**query_kwargs))
                queries[-1].update(query) #Make sure we copy across filters (e.g. class =)
                try:
                    process_and_branch(queries[-1], and_branch)
                except EmptyResultSet:
                    return NoOpQuery()

            def all_queries_same_except_key(_queries):
                """
                    Returns True if all queries in the list of queries filter on the same thing
                    except for "__key__ =". Determine if we can do a Get basically.
                """
                test = _queries[0]

                for qry in _queries:
                    if "__key__ =" not in qry.keys():
                        return False

                    if qry._Query__kind != test._Query__kind:
                        return False

                    if qry.keys() != test.keys():
                        return False

                    for k, v in qry.items():
                        if k.startswith("__key__"):
                            continue

                        if v != test[k]:
                            return False
                return True

            if all_queries_same_except_key(queries):
                included_pks = [ qry["__key__ ="] for qry in queries ]
                return QueryByKeys(queries[0], included_pks, ordering) #Just use whatever query to determine the matches
            else:
                if len(queries) > 1:
                    #Disable keys only queries for MultiQuery
                    new_queries = []
                    for query in queries:
                        qry = Query(query._Query__kind, projection=query._Query__query_options.projection)
                        qry.update(query)
                        new_queries.append(qry)

                    query = datastore.MultiQuery(new_queries, ordering)
                else:
                    query = queries[0]
                    query.Order(*ordering)
        else:
            query.Order(*ordering)

        #If the resulting query was unique, then wrap as a unique query which
        #will hit the cache first
        unique_identifier = query_is_unique(self.model, query)
        if unique_identifier:
            return UniqueQuery(unique_identifier, query, self.model)

        DJANGAE_LOG.debug("Select query: {0}, {1}".format(self.model.__name__, self.where))

        return query

Exemple #31

0

Afficher le fichier

Fichier : commands.py Projet : olibrook/djangae

    def _build_gae_query(self):
        """ Build and return the Datstore Query object. """
        combined_filters = []

        query_kwargs = {}

        if self.keys_only:
            query_kwargs["keys_only"] = self.keys_only
        elif self.projection:
            query_kwargs["projection"] = self.projection

        query = Query(
            self.db_table,
            **query_kwargs
        )

        if has_concrete_parents(self.model) and not self.model._meta.proxy:
            query["class ="] = self.model._meta.db_table

        DJANGAE_LOG.debug("Select query: {0}, {1}".format(self.model.__name__, self.where))
        for column, op, value in self.where:
            if column == self.pk_col:
                column = "__key__"

            final_op = OPERATORS_MAP.get(op)
            if final_op is None:
                if op in REQUIRES_SPECIAL_INDEXES:
                    add_special_index(self.model, column, op) #Add the index if we can (e.g. on dev_appserver)

                    if op not in special_indexes_for_column(self.model, column):
                        raise RuntimeError("There is a missing index in your djangaeidx.yaml - \n\n{0}:\n\t{1}: [{2}]".format(
                            self.model, column, op)
                        )

                    indexer = REQUIRES_SPECIAL_INDEXES[op]
                    column = indexer.indexed_column_name(column)
                    value = indexer.prep_value_for_query(value)
                    query["%s =" % column] = value
                else:
                    if op == "in":
                        combined_filters.append((column, op, value))
                    elif op == "gt_and_lt":
                        combined_filters.append((column, op, value))
                    elif op == "isnull":
                        query["%s =" % column] = None
                    elif op == "startswith":
                        #You can emulate starts with by adding the last unicode char
                        #to the value, then doing <=. Genius.
                        query["%s >=" % column] = value
                        if isinstance(value, str):
                            value = value.decode("utf-8")
                        value += u'\ufffd'
                        query["%s <=" % column] = value
                    else:
                        raise NotImplementedError("Unimplemented operator {0}".format(op))
            else:
                query["%s %s" % (column, final_op)] = value

        ordering = []
        for order in self.ordering:
            if isinstance(order, int):
                direction = datastore.Query.ASCENDING if order == 1 else datastore.Query.DESCENDING
                order = self.queried_fields[0]
            else:
                direction = datastore.Query.DESCENDING if order.startswith("-") else datastore.Query.ASCENDING
                order = order.lstrip("-")

            if order == self.model._meta.pk.column:
                order = "__key__"
            ordering.append((order, direction))

        if combined_filters:
            queries = [ query ]
            for column, op, value in combined_filters:
                new_queries = []
                for query in queries:
                    if op == "in":
                        for val in value:
                            new_query = datastore.Query(self.model._meta.db_table)
                            new_query.update(query)
                            new_query["%s =" % column] = val
                            new_queries.append(new_query)
                    elif op == "gt_and_lt":
                        for tmp_op in ("<", ">"):
                            new_query = datastore.Query(self.model._meta.db_table)
                            new_query.update(query)
                            new_query["%s %s" % (column, tmp_op)] = value
                            new_queries.append(new_query)
                queries = new_queries

            query = datastore.MultiQuery(queries, ordering)
        elif ordering:
            query.Order(*ordering)
        return query

Exemple #32

0

Afficher le fichier

 def spawn_query(kind, key):
     qry = Query(kind)
     qry["__key__ ="] = key
     return qry

Exemple #33

0

Afficher le fichier

    def _build_gae_query(self):
        """ Build and return the Datastore Query object. """
        query_kwargs = {
            "kind": str(self.db_table)
        }

        if self.distinct:
            if self.projection:
                query_kwargs["distinct"] = True
            else:
                logging.warning("Ignoring distinct on a query where a projection wasn't possible")

        if self.keys_only:
            query_kwargs["keys_only"] = self.keys_only
        elif self.projection:
            query_kwargs["projection"] = self.projection

        query = Query(
            **query_kwargs
        )

        if has_concrete_parents(self.model) and not self.model._meta.proxy:
            query["class ="] = self.model._meta.db_table

        ordering = []
        for order in self.ordering:
            if isinstance(order, (long, int)):
                direction = datastore.Query.ASCENDING if order == 1 else datastore.Query.DESCENDING
                order = self.queried_fields[0]
            else:
                direction = datastore.Query.DESCENDING if order.startswith("-") else datastore.Query.ASCENDING
                order = order.lstrip("-")

            if order == self.model._meta.pk.column or order == "pk":
                order = "__key__"

            #Flip the ordering if someone called reverse() on the queryset
            if not self.original_query.standard_ordering:
                direction = datastore.Query.DESCENDING if direction == datastore.Query.ASCENDING else datastore.Query.ASCENDING

            ordering.append((order, direction))

        def process_and_branch(query, and_branch):
            for child in and_branch[-1]:
                column, op, value = child[1]

            # for column, op, value in and_branch[-1]:
                if column == self.pk_col:
                    column = "__key__"

                    #FIXME: This EmptyResultSet check should happen during normalization so that Django doesn't count it as a query
                    if op == "=" and "__key__ =" in query and query["__key__ ="] != value:
                        # We've already done an exact lookup on a key, this query can't return anything!
                        raise EmptyResultSet()

                    if not isinstance(value, datastore.Key):
                        value = get_datastore_key(self.model, value)

                key = "%s %s" % (column, op)
                try:
                    if isinstance(value, basestring):
                        value = coerce_unicode(value)

                    if key in query:
                        if type(query[key]) == list:
                            if value not in query[key]:
                                query[key].append(value)
                        else:
                            if query[key] != value:
                                query[key] = [ query[key], value ]
                    else:
                        query[key] = value
                except datastore_errors.BadFilterError as e:
                    raise NotSupportedError(str(e))

        if self.where:
            queries = []

            # print query._Query__kind, self.where

            for and_branch in self.where[1]:
                # Duplicate the query for all the "OR"s
                queries.append(Query(**query_kwargs))
                queries[-1].update(query)  # Make sure we copy across filters (e.g. class =)
                try:
                    if and_branch[0] == "LIT":
                        and_branch = ("AND", [and_branch])
                    process_and_branch(queries[-1], and_branch)
                except EmptyResultSet:
                    # This is a little hacky but basically if there is only one branch in the or, and it raises
                    # and EmptyResultSet, then we just bail, however if there is more than one branch the query the
                    # query might still return something. This logic needs cleaning up and moving to the DNF phase
                    if len(self.where[1]) == 1:
                        return NoOpQuery()
                    else:
                        queries.pop()

            if not queries:
                return NoOpQuery()

            included_pks = [ qry["__key__ ="] for qry in queries if "__key__ =" in qry ]
            if len(included_pks) == len(queries): # If all queries have a key, we can perform a Get
                return QueryByKeys(self.model, queries, ordering) # Just use whatever query to determine the matches
            else:
                if len(queries) > 1:
                    # Disable keys only queries for MultiQuery
                    new_queries = []
                    for i, query in enumerate(queries):
                        if i > 30:
                            raise NotSupportedError("Too many subqueries (max: 30, got {}). Probably cause too many IN/!= filters".format(
                                len(queries)
                            ))
                        qry = Query(query._Query__kind, projection=query._Query__query_options.projection)
                        qry.update(query)
                        try:
                            qry.Order(*ordering)
                        except datastore_errors.BadArgumentError as e:
                            raise NotSupportedError(e)

                        new_queries.append(qry)

                    query = datastore.MultiQuery(new_queries, ordering)
                else:
                    query = queries[0]
                    try:
                        query.Order(*ordering)
                    except datastore_errors.BadArgumentError as e:
                        raise NotSupportedError(e)
        else:
            try:
                query.Order(*ordering)
            except datastore_errors.BadArgumentError as e:
                raise NotSupportedError(e)

        # If the resulting query was unique, then wrap as a unique query which
        # will hit the cache first
        unique_identifier = query_is_unique(self.model, query)
        if unique_identifier:
            return UniqueQuery(unique_identifier, query, self.model)

        DJANGAE_LOG.debug("Select query: {0}, {1}".format(self.model.__name__, self.where))

        return query

Exemple #34

0

Afficher le fichier

    def _fetch_results(self, query):
        # If we're manually excluding PKs, and we've specified a limit to the results
        # we need to make sure that we grab more than we were asked for otherwise we could filter
        # out too many! These are again limited back to the original request limit
        # while we're processing the results later

        high_mark = self.query.high_mark
        low_mark = self.query.low_mark

        excluded_pk_count = 0
        if self.excluded_pks and high_mark:
            excluded_pk_count = len(self.excluded_pks)
            high_mark += excluded_pk_count

        limit = None if high_mark is None else (high_mark - (low_mark or 0))
        offset = low_mark or 0

        if self.query.kind == "COUNT":
            if self.excluded_pks:
                # If we're excluding pks, relying on a traditional count won't work
                # so we have two options:
                # 1. Do a keys_only query instead and count the results excluding keys
                # 2. Do a count, then a pk__in=excluded_pks to work out how many to subtract
                # Here I've favoured option one as it means a single RPC call. Testing locally
                # didn't seem to indicate much of a performance difference, even when doing the pk__in
                # with GetAsync while the count was running. That might not be true of prod though so
                # if anyone comes up with a faster idea let me know!
                count_query = Query(query._Query__kind, keys_only=True)
                count_query.update(query)
                resultset = count_query.Run(limit=limit, offset=offset)
                self.results = (x for x in [
                    len([y for y in resultset if y not in self.excluded_pks])
                ])
            else:
                self.results = (
                    x for x in [query.Count(limit=limit, offset=offset)])
            return
        elif self.query.kind == "AVERAGE":
            raise ValueError("AVERAGE not yet supported")
        else:
            self.results = query.Run(limit=limit, offset=offset)

        # Ensure that the results returned is reset
        self.results_returned = 0

        def increment_returned_results(result):
            self.results_returned += 1
            return result

        def convert_key_to_entity(result):
            class FakeEntity(dict):
                def __init__(self, key):
                    self._key = key

                def key(self):
                    return self._key

            return FakeEntity(result)

        def rename_pk_field(result):
            if result is None:
                return result

            value = result.key().id_or_name()
            result[self.query.model._meta.pk.column] = value
            result[self.query.concrete_model._meta.pk.column] = value
            return result

        def process_extra_selects(result):
            """
                We handle extra selects by generating the new columns from
                each result. We can handle simple boolean logic and operators.
            """
            extra_selects = self.query.extra_selects
            model_fields = self.query.model._meta.fields

            DATE_FORMATS = ("%Y-%m-%d", "%Y-%m-%d %H:%M:%S")

            def process_arg(arg):
                if arg.startswith("'") and arg.endswith("'"):
                    # String literal
                    arg = arg.strip("'")
                    # Check to see if this is a date
                    for date in DATE_FORMATS:
                        try:
                            value = datetime.strptime(arg, date)
                            return value
                        except ValueError:
                            continue
                    return arg
                elif arg in [x.column for x in model_fields]:
                    # Column value
                    return result.get(arg)

                # Handle NULL
                if arg.lower() == 'null':
                    return None
                elif arg.lower() == 'true':
                    return True
                elif arg.lower() == 'false':
                    return False

                # See if it's an integer
                try:
                    arg = int(arg)
                except (TypeError, ValueError):
                    pass

                # Just a plain old literal
                return arg

            for col, select in extra_selects:
                result[col] = select[0](*[process_arg(x) for x in select[1]])

            return result

        def convert_datetime_fields(result):
            fields = [
                x for x in self.query.model._meta.fields
                if x.get_internal_type() in ("DateTimeField", "DateField",
                                             "TimeField")
            ]

            for field in fields:
                column = field.column
                if isinstance(result, dict):  # sometimes it's a key!
                    value = result.get(column)
                else:
                    value = None

                if value is not None:
                    result[column] = ensure_datetime(value)
            return result

        def ignore_excluded_pks(result):
            if result.key() in self.query.excluded_pks:
                return None
            return result

        self.results = wrap_result_with_functor(self.results,
                                                increment_returned_results)

        # If this is a keys only query, we need to generate a fake entity
        # for each key in the result set
        if self.keys_only:
            self.results = wrap_result_with_functor(self.results,
                                                    convert_key_to_entity)

        self.results = wrap_result_with_functor(self.results,
                                                ignore_excluded_pks)
        self.results = wrap_result_with_functor(self.results,
                                                convert_datetime_fields)
        self.results = wrap_result_with_functor(self.results, rename_pk_field)
        self.results = wrap_result_with_functor(self.results,
                                                process_extra_selects)

        if self.query.distinct and self.query.extra_selects:
            # If we had extra selects, and we're distinct, we must deduplicate results
            def deduper_factory():
                seen = set()

                def dedupe(result):
                    # FIXME: This logic can't be right. I think we need to store the distinct fields
                    # somewhere on the query
                    if getattr(self.original_query, "annotation_select", None):
                        columns = self.original_query.annotation_select.keys()
                    else:
                        columns = self.query.columns or []
                    if not columns:
                        return result

                    key = tuple([
                        result[x] for x in self._exclude_pk(columns)
                        if x in result
                    ])
                    if key in seen:
                        return None
                    seen.add(key)
                    return result

                return dedupe

            self.results = wrap_result_with_functor(self.results,
                                                    deduper_factory())