def Run(self, limit, offset): opts = self._gae_query._Query__query_options if opts.keys_only or opts.projection: return self._gae_query.Run(limit=limit, offset=offset) ret = caching.get_from_cache(self._identifier) if ret is not None and not utils.entity_matches_query( ret, self._gae_query): ret = None if ret is None: # We do a fast keys_only query to get the result keys_query = Query(self._gae_query._Query__kind, keys_only=True) keys_query.update(self._gae_query) keys = keys_query.Run(limit=limit, offset=offset) # Do a consistent get so we don't cache stale data, and recheck the result matches the query ret = [ x for x in datastore.Get(keys) if utils.entity_matches_query(x, self._gae_query) ] if len(ret) == 1: caching.add_entity_to_cache( self._model, ret[0], caching.CachingSituation.DATASTORE_GET) return iter(ret) return iter([ret])
def prep_value_for_query(self, value, model, column, connection): """ Return a list of IDs of the associated contains models, these should match up with the IDs from the parent entities """ if hasattr(value, "isoformat"): value = value.isoformat() else: value = unicode(value) value = self.unescape(value) if STRIP_PERCENTS: # SQL does __contains by doing LIKE %value% if value.startswith("%") and value.endswith("%"): value = value[1:-1] namespace = connection.settings_dict.get("NAMESPACE", "") qry = Query(self._generate_kind_name(model, column), keys_only=True, namespace=namespace) qry['{} >='.format(self.INDEXED_COLUMN_NAME)] = value qry['{} <='.format(self.INDEXED_COLUMN_NAME)] = value + u'\ufffd' # We can't filter on the 'name' as part of the query, because the name is the key and these # are child entities of the ancestor entities which they are indexing, and as we don't know # the keys of the ancestor entities we can't create the complete keys, hence the comparison # of `x.name() == self.OPERATOR` happens here in python resulting_keys = set( [x.parent() for x in qry.Run() if x.name() == self.OPERATOR]) return resulting_keys
def cleanup(cls, datastore_key): # Kindless query, we don't know the kinds because we don't know all the fields # that use contains. But, we do know that all the things we need to delete are: # a.) A descendent # b.) Have a key name of whatever OPERATOR is qry = Query(keys_only=True, namespace=datastore_key.namespace()) qry = qry.Ancestor(datastore_key) # Delete all the entities matching the ancestor query Delete([x for x in qry.Run() if x.name() == cls.OPERATOR])
def _fetch_results(self, query): # If we're manually excluding PKs, and we've specified a limit to the results # we need to make sure that we grab more than we were asked for otherwise we could filter # out too many! These are again limited back to the original request limit # while we're processing the results later high_mark = self.query.high_mark low_mark = self.query.low_mark excluded_pk_count = 0 if self.excluded_pks and high_mark: excluded_pk_count = len(self.excluded_pks) high_mark += excluded_pk_count limit = None if high_mark is None else (high_mark - (low_mark or 0)) offset = low_mark or 0 if self.query.kind == "COUNT": if self.excluded_pks: # If we're excluding pks, relying on a traditional count won't work # so we have two options: # 1. Do a keys_only query instead and count the results excluding keys # 2. Do a count, then a pk__in=excluded_pks to work out how many to subtract # Here I've favoured option one as it means a single RPC call. Testing locally # didn't seem to indicate much of a performance difference, even when doing the pk__in # with GetAsync while the count was running. That might not be true of prod though so # if anyone comes up with a faster idea let me know! count_query = Query(query._Query__kind, keys_only=True) count_query.update(query) resultset = count_query.Run(limit=limit, offset=offset) self.results = (x for x in [ len([y for y in resultset if y not in self.excluded_pks]) ]) else: self.results = ( x for x in [query.Count(limit=limit, offset=offset)]) return elif self.query.kind == "AVERAGE": raise ValueError("AVERAGE not yet supported") else: self.results = query.Run(limit=limit, offset=offset) # Ensure that the results returned is reset self.results_returned = 0 def increment_returned_results(result): self.results_returned += 1 return result def convert_key_to_entity(result): class FakeEntity(dict): def __init__(self, key): self._key = key def key(self): return self._key return FakeEntity(result) def rename_pk_field(result): if result is None: return result value = result.key().id_or_name() result[self.query.model._meta.pk.column] = value result[self.query.concrete_model._meta.pk.column] = value return result def process_extra_selects(result): """ We handle extra selects by generating the new columns from each result. We can handle simple boolean logic and operators. """ extra_selects = self.query.extra_selects model_fields = self.query.model._meta.fields DATE_FORMATS = ("%Y-%m-%d", "%Y-%m-%d %H:%M:%S") def process_arg(arg): if arg.startswith("'") and arg.endswith("'"): # String literal arg = arg.strip("'") # Check to see if this is a date for date in DATE_FORMATS: try: value = datetime.strptime(arg, date) return value except ValueError: continue return arg elif arg in [x.column for x in model_fields]: # Column value return result.get(arg) # Handle NULL if arg.lower() == 'null': return None elif arg.lower() == 'true': return True elif arg.lower() == 'false': return False # See if it's an integer try: arg = int(arg) except (TypeError, ValueError): pass # Just a plain old literal return arg for col, select in extra_selects: result[col] = select[0](*[process_arg(x) for x in select[1]]) return result def convert_datetime_fields(result): fields = [ x for x in self.query.model._meta.fields if x.get_internal_type() in ("DateTimeField", "DateField", "TimeField") ] for field in fields: column = field.column if isinstance(result, dict): # sometimes it's a key! value = result.get(column) else: value = None if value is not None: result[column] = ensure_datetime(value) return result def ignore_excluded_pks(result): if result.key() in self.query.excluded_pks: return None return result self.results = wrap_result_with_functor(self.results, increment_returned_results) # If this is a keys only query, we need to generate a fake entity # for each key in the result set if self.keys_only: self.results = wrap_result_with_functor(self.results, convert_key_to_entity) self.results = wrap_result_with_functor(self.results, ignore_excluded_pks) self.results = wrap_result_with_functor(self.results, convert_datetime_fields) self.results = wrap_result_with_functor(self.results, rename_pk_field) self.results = wrap_result_with_functor(self.results, process_extra_selects) if self.query.distinct and self.query.extra_selects: # If we had extra selects, and we're distinct, we must deduplicate results def deduper_factory(): seen = set() def dedupe(result): # FIXME: This logic can't be right. I think we need to store the distinct fields # somewhere on the query if getattr(self.original_query, "annotation_select", None): columns = self.original_query.annotation_select.keys() else: columns = self.query.columns or [] if not columns: return result key = tuple([ result[x] for x in self._exclude_pk(columns) if x in result ]) if key in seen: return None seen.add(key) return result return dedupe self.results = wrap_result_with_functor(self.results, deduper_factory())
def _fetch_results(self, query): # If we're manually excluding PKs, and we've specified a limit to the results # we need to make sure that we grab more than we were asked for otherwise we could filter # out too many! These are again limited back to the original request limit # while we're processing the results later # Apply the namespace before excluding excluded_pks = [ rpc.Key.from_path(x.kind(), x.id_or_name(), namespace=self.namespace) for x in self.query.excluded_pks ] high_mark = self.query.high_mark low_mark = self.query.low_mark excluded_pk_count = 0 if excluded_pks and high_mark: excluded_pk_count = len(excluded_pks) high_mark += excluded_pk_count limit = None if high_mark is None else (high_mark - (low_mark or 0)) offset = low_mark or 0 if self.query.kind == "COUNT": if excluded_pks: # If we're excluding pks, relying on a traditional count won't work # so we have two options: # 1. Do a keys_only query instead and count the results excluding keys # 2. Do a count, then a pk__in=excluded_pks to work out how many to subtract # Here I've favoured option one as it means a single RPC call. Testing locally # didn't seem to indicate much of a performance difference, even when doing the pk__in # with GetAsync while the count was running. That might not be true of prod though so # if anyone comes up with a faster idea let me know! if isinstance(query, meta_queries.QueryByKeys): # If this is a QueryByKeys, just do the datastore Get and count the results resultset = (x.key() for x in query.Run(limit=limit, offset=offset) if x) else: count_query = Query( query._Query__kind, keys_only=True, namespace=self.namespace ) count_query.update(query) resultset = count_query.Run(limit=limit, offset=offset) self.results = [len([y for y in resultset if y not in excluded_pks])] self.results_returned = 1 else: self.results = [query.Count(limit=limit, offset=offset)] self.results_returned = 1 return elif self.query.kind == "AVERAGE": raise ValueError("AVERAGE not yet supported") # Ensure that the results returned is reset self.results_returned = 0 self.results = [] seen = set() def dedupe(result): # FIXME: This logic can't be right. I think we need to store the distinct fields # somewhere on the query if getattr(self.original_query, "annotation_select", None): columns = self.original_query.annotation_select.keys() else: columns = self.query.columns or [] if not columns: return result key = tuple([result[x] for x in self._exclude_pk(columns) if x in result]) if key in seen: return None seen.add(key) return result for entity in query.Run(limit=limit, offset=offset): # If this is a keys only query, we need to generate a fake entity # for each key in the result set if self.keys_only: entity = EntityTransforms.convert_key_to_entity(entity) entity = EntityTransforms.ignore_excluded_pks(excluded_pks, entity) entity = EntityTransforms.convert_datetime_fields(self.query, entity) entity = EntityTransforms.fix_projected_values_type(self.query, entity) entity = EntityTransforms.rename_pk_field( self.query.model, self.query.concrete_model, entity ) entity = EntityTransforms.process_extra_selects(self.query, entity) if self.query.distinct and self.query.extra_selects: entity = dedupe(entity) if entity: self.results.append(entity) self.results_returned += 1 if limit and self.results_returned >= (limit - excluded_pk_count): break