예제 #1
0
    def Run(self, limit, offset):
        opts = self._gae_query._Query__query_options
        if opts.keys_only or opts.projection:
            return self._gae_query.Run(limit=limit, offset=offset)

        ret = caching.get_from_cache(self._identifier)
        if ret is not None and not utils.entity_matches_query(
                ret, self._gae_query):
            ret = None

        if ret is None:
            # We do a fast keys_only query to get the result
            keys_query = Query(self._gae_query._Query__kind, keys_only=True)
            keys_query.update(self._gae_query)
            keys = keys_query.Run(limit=limit, offset=offset)

            # Do a consistent get so we don't cache stale data, and recheck the result matches the query
            ret = [
                x for x in datastore.Get(keys)
                if utils.entity_matches_query(x, self._gae_query)
            ]
            if len(ret) == 1:
                caching.add_entity_to_cache(
                    self._model, ret[0],
                    caching.CachingSituation.DATASTORE_GET)
            return iter(ret)

        return iter([ret])
예제 #2
0
    def prep_value_for_query(self, value, model, column, connection):
        """
            Return a list of IDs of the associated contains models, these should
            match up with the IDs from the parent entities
        """

        if hasattr(value, "isoformat"):
            value = value.isoformat()
        else:
            value = unicode(value)
        value = self.unescape(value)

        if STRIP_PERCENTS:
            # SQL does __contains by doing LIKE %value%
            if value.startswith("%") and value.endswith("%"):
                value = value[1:-1]

        namespace = connection.settings_dict.get("NAMESPACE", "")
        qry = Query(self._generate_kind_name(model, column),
                    keys_only=True,
                    namespace=namespace)
        qry['{} >='.format(self.INDEXED_COLUMN_NAME)] = value
        qry['{} <='.format(self.INDEXED_COLUMN_NAME)] = value + u'\ufffd'

        # We can't filter on the 'name' as part of the query, because the name is the key and these
        # are child entities of the ancestor entities which they are indexing, and as we don't know
        # the keys of the ancestor entities we can't create the complete keys, hence the comparison
        # of `x.name() == self.OPERATOR` happens here in python
        resulting_keys = set(
            [x.parent() for x in qry.Run() if x.name() == self.OPERATOR])
        return resulting_keys
예제 #3
0
    def cleanup(cls, datastore_key):

        # Kindless query, we don't know the kinds because we don't know all the fields
        # that use contains. But, we do know that all the things we need to delete are:
        # a.) A descendent
        # b.) Have a key name of whatever OPERATOR is

        qry = Query(keys_only=True, namespace=datastore_key.namespace())
        qry = qry.Ancestor(datastore_key)

        # Delete all the entities matching the ancestor query
        Delete([x for x in qry.Run() if x.name() == cls.OPERATOR])
예제 #4
0
    def _fetch_results(self, query):
        # If we're manually excluding PKs, and we've specified a limit to the results
        # we need to make sure that we grab more than we were asked for otherwise we could filter
        # out too many! These are again limited back to the original request limit
        # while we're processing the results later

        high_mark = self.query.high_mark
        low_mark = self.query.low_mark

        excluded_pk_count = 0
        if self.excluded_pks and high_mark:
            excluded_pk_count = len(self.excluded_pks)
            high_mark += excluded_pk_count

        limit = None if high_mark is None else (high_mark - (low_mark or 0))
        offset = low_mark or 0

        if self.query.kind == "COUNT":
            if self.excluded_pks:
                # If we're excluding pks, relying on a traditional count won't work
                # so we have two options:
                # 1. Do a keys_only query instead and count the results excluding keys
                # 2. Do a count, then a pk__in=excluded_pks to work out how many to subtract
                # Here I've favoured option one as it means a single RPC call. Testing locally
                # didn't seem to indicate much of a performance difference, even when doing the pk__in
                # with GetAsync while the count was running. That might not be true of prod though so
                # if anyone comes up with a faster idea let me know!
                count_query = Query(query._Query__kind, keys_only=True)
                count_query.update(query)
                resultset = count_query.Run(limit=limit, offset=offset)
                self.results = (x for x in [
                    len([y for y in resultset if y not in self.excluded_pks])
                ])
            else:
                self.results = (
                    x for x in [query.Count(limit=limit, offset=offset)])
            return
        elif self.query.kind == "AVERAGE":
            raise ValueError("AVERAGE not yet supported")
        else:
            self.results = query.Run(limit=limit, offset=offset)

        # Ensure that the results returned is reset
        self.results_returned = 0

        def increment_returned_results(result):
            self.results_returned += 1
            return result

        def convert_key_to_entity(result):
            class FakeEntity(dict):
                def __init__(self, key):
                    self._key = key

                def key(self):
                    return self._key

            return FakeEntity(result)

        def rename_pk_field(result):
            if result is None:
                return result

            value = result.key().id_or_name()
            result[self.query.model._meta.pk.column] = value
            result[self.query.concrete_model._meta.pk.column] = value
            return result

        def process_extra_selects(result):
            """
                We handle extra selects by generating the new columns from
                each result. We can handle simple boolean logic and operators.
            """
            extra_selects = self.query.extra_selects
            model_fields = self.query.model._meta.fields

            DATE_FORMATS = ("%Y-%m-%d", "%Y-%m-%d %H:%M:%S")

            def process_arg(arg):
                if arg.startswith("'") and arg.endswith("'"):
                    # String literal
                    arg = arg.strip("'")
                    # Check to see if this is a date
                    for date in DATE_FORMATS:
                        try:
                            value = datetime.strptime(arg, date)
                            return value
                        except ValueError:
                            continue
                    return arg
                elif arg in [x.column for x in model_fields]:
                    # Column value
                    return result.get(arg)

                # Handle NULL
                if arg.lower() == 'null':
                    return None
                elif arg.lower() == 'true':
                    return True
                elif arg.lower() == 'false':
                    return False

                # See if it's an integer
                try:
                    arg = int(arg)
                except (TypeError, ValueError):
                    pass

                # Just a plain old literal
                return arg

            for col, select in extra_selects:
                result[col] = select[0](*[process_arg(x) for x in select[1]])

            return result

        def convert_datetime_fields(result):
            fields = [
                x for x in self.query.model._meta.fields
                if x.get_internal_type() in ("DateTimeField", "DateField",
                                             "TimeField")
            ]

            for field in fields:
                column = field.column
                if isinstance(result, dict):  # sometimes it's a key!
                    value = result.get(column)
                else:
                    value = None

                if value is not None:
                    result[column] = ensure_datetime(value)
            return result

        def ignore_excluded_pks(result):
            if result.key() in self.query.excluded_pks:
                return None
            return result

        self.results = wrap_result_with_functor(self.results,
                                                increment_returned_results)

        # If this is a keys only query, we need to generate a fake entity
        # for each key in the result set
        if self.keys_only:
            self.results = wrap_result_with_functor(self.results,
                                                    convert_key_to_entity)

        self.results = wrap_result_with_functor(self.results,
                                                ignore_excluded_pks)
        self.results = wrap_result_with_functor(self.results,
                                                convert_datetime_fields)
        self.results = wrap_result_with_functor(self.results, rename_pk_field)
        self.results = wrap_result_with_functor(self.results,
                                                process_extra_selects)

        if self.query.distinct and self.query.extra_selects:
            # If we had extra selects, and we're distinct, we must deduplicate results
            def deduper_factory():
                seen = set()

                def dedupe(result):
                    # FIXME: This logic can't be right. I think we need to store the distinct fields
                    # somewhere on the query
                    if getattr(self.original_query, "annotation_select", None):
                        columns = self.original_query.annotation_select.keys()
                    else:
                        columns = self.query.columns or []
                    if not columns:
                        return result

                    key = tuple([
                        result[x] for x in self._exclude_pk(columns)
                        if x in result
                    ])
                    if key in seen:
                        return None
                    seen.add(key)
                    return result

                return dedupe

            self.results = wrap_result_with_functor(self.results,
                                                    deduper_factory())
예제 #5
0
파일: commands.py 프로젝트: vzts/djangae
    def _fetch_results(self, query):
        # If we're manually excluding PKs, and we've specified a limit to the results
        # we need to make sure that we grab more than we were asked for otherwise we could filter
        # out too many! These are again limited back to the original request limit
        # while we're processing the results later
        # Apply the namespace before excluding
        excluded_pks = [
            rpc.Key.from_path(x.kind(), x.id_or_name(), namespace=self.namespace)
            for x in self.query.excluded_pks
        ]

        high_mark = self.query.high_mark
        low_mark = self.query.low_mark

        excluded_pk_count = 0
        if excluded_pks and high_mark:
            excluded_pk_count = len(excluded_pks)
            high_mark += excluded_pk_count

        limit = None if high_mark is None else (high_mark - (low_mark or 0))
        offset = low_mark or 0

        if self.query.kind == "COUNT":
            if excluded_pks:
                # If we're excluding pks, relying on a traditional count won't work
                # so we have two options:
                # 1. Do a keys_only query instead and count the results excluding keys
                # 2. Do a count, then a pk__in=excluded_pks to work out how many to subtract
                # Here I've favoured option one as it means a single RPC call. Testing locally
                # didn't seem to indicate much of a performance difference, even when doing the pk__in
                # with GetAsync while the count was running. That might not be true of prod though so
                # if anyone comes up with a faster idea let me know!
                if isinstance(query, meta_queries.QueryByKeys):
                    # If this is a QueryByKeys, just do the datastore Get and count the results
                    resultset = (x.key() for x in query.Run(limit=limit, offset=offset) if x)
                else:
                    count_query = Query(
                        query._Query__kind, keys_only=True, namespace=self.namespace
                    )
                    count_query.update(query)
                    resultset = count_query.Run(limit=limit, offset=offset)
                self.results = [len([y for y in resultset if y not in excluded_pks])]
                self.results_returned = 1
            else:
                self.results = [query.Count(limit=limit, offset=offset)]
                self.results_returned = 1
            return
        elif self.query.kind == "AVERAGE":
            raise ValueError("AVERAGE not yet supported")

        # Ensure that the results returned is reset
        self.results_returned = 0
        self.results = []

        seen = set()

        def dedupe(result):
            # FIXME: This logic can't be right. I think we need to store the distinct fields
            # somewhere on the query
            if getattr(self.original_query, "annotation_select", None):
                columns = self.original_query.annotation_select.keys()
            else:
                columns = self.query.columns or []
            if not columns:
                return result

            key = tuple([result[x] for x in self._exclude_pk(columns) if x in result])
            if key in seen:
                return None
            seen.add(key)
            return result

        for entity in query.Run(limit=limit, offset=offset):
            # If this is a keys only query, we need to generate a fake entity
            # for each key in the result set
            if self.keys_only:
                entity = EntityTransforms.convert_key_to_entity(entity)

            entity = EntityTransforms.ignore_excluded_pks(excluded_pks, entity)
            entity = EntityTransforms.convert_datetime_fields(self.query, entity)
            entity = EntityTransforms.fix_projected_values_type(self.query, entity)
            entity = EntityTransforms.rename_pk_field(
                self.query.model, self.query.concrete_model, entity
            )
            entity = EntityTransforms.process_extra_selects(self.query, entity)

            if self.query.distinct and self.query.extra_selects:
                entity = dedupe(entity)

            if entity:
                self.results.append(entity)
                self.results_returned += 1

            if limit and self.results_returned >= (limit - excluded_pk_count):
                break