def Run(self, limit, offset): opts = self._gae_query._Query__query_options if opts.keys_only or opts.projection: return self._gae_query.Run(limit=limit, offset=offset) ret = caching.get_from_cache(self._identifier, self._namespace) if ret is not None and not utils.entity_matches_query(ret, self._gae_query): ret = None if ret is None: # We do a fast keys_only query to get the result keys_query = Query(self._gae_query._Query__kind, keys_only=True, namespace=self._namespace) keys_query.update(self._gae_query) keys = keys_query.Run(limit=limit, offset=offset) # Do a consistent get so we don't cache stale data, and recheck the result matches the query ret = [x for x in datastore.Get(keys) if x and utils.entity_matches_query(x, self._gae_query)] if len(ret) == 1: caching.add_entities_to_cache( self._model, [ret[0]], caching.CachingSituation.DATASTORE_GET, self._namespace, ) return iter(ret) return iter([ret])
def prep_value_for_query(self, value, model, column, connection): """ Return a list of IDs of the associated contains models, these should match up with the IDs from the parent entities """ if hasattr(value, "isoformat"): value = value.isoformat() else: value = unicode(value) value = self.unescape(value) if STRIP_PERCENTS: # SQL does __contains by doing LIKE %value% if value.startswith("%") and value.endswith("%"): value = value[1:-1] namespace = connection.settings_dict.get("NAMESPACE", "") qry = Query(self._generate_kind_name(model, column), keys_only=True, namespace=namespace) qry['{} >='.format(self.INDEXED_COLUMN_NAME)] = value qry['{} <='.format(self.INDEXED_COLUMN_NAME)] = value + u'\ufffd' # We can't filter on the 'name' as part of the query, because the name is the key and these # are child entities of the ancestor entities which they are indexing, and as we don't know # the keys of the ancestor entities we can't create the complete keys, hence the comparison # of `x.name() == self.OPERATOR` happens here in python resulting_keys = set( [x.parent() for x in qry.Run() if x.name() == self.OPERATOR]) return resulting_keys
def Run(self, limit, offset): opts = self._gae_query._Query__query_options if opts.keys_only or opts.projection: return self._gae_query.Run(limit=limit, offset=offset) ret = caching.get_from_cache(self._identifier) if ret is not None and not utils.entity_matches_query( ret, self._gae_query): ret = None if ret is None: # We do a fast keys_only query to get the result keys_query = Query(self._gae_query._Query__kind, keys_only=True) keys_query.update(self._gae_query) keys = keys_query.Run(limit=limit, offset=offset) # Do a consistent get so we don't cache stale data, and recheck the result matches the query ret = [ x for x in datastore.Get(keys) if utils.entity_matches_query(x, self._gae_query) ] if len(ret) == 1: caching.add_entity_to_cache( self._model, ret[0], caching.CachingSituation.DATASTORE_GET) return iter(ret) return iter([ret])
def cleanup(cls, datastore_key): # Kindless query, we don't know the kinds because we don't know all the fields # that use contains. But, we do know that all the things we need to delete are: # a.) A descendent # b.) Have a key name of whatever OPERATOR is qry = Query(keys_only=True, namespace=datastore_key.namespace()) qry = qry.Ancestor(datastore_key) # Delete all the entities matching the ancestor query Delete([x for x in qry.Run() if x.name() == cls.OPERATOR])
def table_names(self): """ Returns a list of names of all tables that exist in the database. """ from google.appengine.api.datastore import Query return [kind.key().name() for kind in Query(kind='__kind__').Run()]
def tear_down_helper(self): query = Query('Greeting', _app=PROJECT_ID) results = yield self.datastore.run_query(query) batch = [] for entity in results: batch.append(entity.key()) if len(batch) == self.BATCH_SIZE: yield self.datastore.delete(batch) batch = [] yield self.datastore.delete(batch)
def test_cassandra_page_size(self): entity_count = self.CASSANDRA_PAGE_SIZE + 1 for _ in range(entity_count): entity = Entity('Greeting', _app=PROJECT_ID) yield self.datastore.put(entity) query = Query('Greeting', _app=PROJECT_ID) results = yield self.datastore.run_query(query) self.assertEqual(len(results), entity_count) self.assertTrue(True)
def _combine_filters(self, column, db_type, op_values): gae_query = self.gae_query combined = [] for query in gae_query: for op, value in op_values: self.gae_query = [Query(self.db_table, keys_only=self.pks_only)] self.gae_query[0].update(query) self._add_filter(column, op, db_type, value) combined.append(self.gae_query[0]) self.gae_query = combined
def delete_all_entities(): for namespace in get_namespaces(): set_namespace(namespace) for kind in get_kinds(): if kind.startswith('__'): continue while True: data = Query(kind=kind, keys_only=True).Get(200) if not data: break Delete(data)
def test_merge_query_with_null(locations): datastore = Datastore(locations, PROJECT_ID) query = Query('Greeting', _app=PROJECT_ID) results = yield datastore.run_query(query) for entity in results: yield datastore.delete([entity.key()]) entity = Entity('Greeting', _app=PROJECT_ID) create_time = datetime.datetime.now() entity['content'] = None entity['create_time'] = create_time yield datastore.put(entity) entity = Entity('Greeting', _app=PROJECT_ID) entity['content'] = 'hi' entity['create_time'] = create_time yield datastore.put(entity) entity = Entity('Greeting', _app=PROJECT_ID) entity['create_time'] = None yield datastore.put(entity) query = Query('Greeting', { 'content =': None, 'create_time =': create_time }, _app=PROJECT_ID) response = yield datastore.run_query(query) if len(response) != 1: raise Exception('Expected 1 result. Received: {}'.format(response)) entity = response[0] if entity['content'] is not None or entity['create_time'] != create_time: raise Exception('Unexpected entity: {}'.format(entity)) query = Query('Greeting', _app=PROJECT_ID) results = yield datastore.run_query(query) for entity in results: yield datastore.delete([entity.key()])
def __init__(self, compiler, fields): super(GAEQuery, self).__init__(compiler, fields) self.inequality_field = None self.included_pks = None self.excluded_pks = () self.has_negated_exact_filter = False self.ordering = [] self.db_table = self.query.get_meta().db_table self.pks_only = (len(fields) == 1 and fields[0].primary_key) start_cursor = getattr(self.query, '_gae_start_cursor', None) end_cursor = getattr(self.query, '_gae_end_cursor', None) self.gae_query = [Query(self.db_table, keys_only=self.pks_only, cursor=start_cursor, end_cursor=end_cursor)]
def _start_task(self): assert not mapper_library.is_mapper_running( self.identifier, self.namespace), "Migration started by separate thread?" query = Query(self.map_kind, namespace=self.namespace) return mapper_library.start_mapping( self.identifier, query, self, operation_method="_wrapped_map_entity", shard_count=self.shard_count, entities_per_task=self.entities_per_task, queue=self.queue)
def test_cassandra_page_size(self): entity_count = self.CASSANDRA_PAGE_SIZE + 1 batch = [] for _ in range(entity_count): entity = Entity('Greeting', _app=PROJECT_ID) batch.append(entity) if len(batch) == self.BATCH_SIZE: yield self.datastore.put_multi(batch) batch = [] yield self.datastore.put_multi(batch) query = Query('Greeting', _app=PROJECT_ID) results = yield self.datastore.run_query(query) self.assertEqual(len(results), entity_count)
def __init__(self, compiler, fields): super(GAEQuery, self).__init__(compiler, fields) self.inequality_field = None self.pk_filters = None self.excluded_pks = () self.has_negated_exact_filter = False self.ordering = () self.gae_ordering = [] pks_only = False if len(fields) == 1 and fields[0].primary_key: pks_only = True self.db_table = self.query.get_meta().db_table self.pks_only = pks_only self.gae_query = [Query(self.db_table, keys_only=self.pks_only)]
def delete_all_entities(): from google.appengine.api.datastore import Delete, Query from google.appengine.ext.db.metadata import get_kinds, get_namespaces from google.appengine.api.namespace_manager import set_namespace for namespace in get_namespaces(): set_namespace(namespace) for kind in get_kinds(): if kind.startswith('__'): continue while True: data = Query(kind=kind, keys_only=True).Get(200) if not data: break Delete(data)
def test_separator_in_name(self): entity = Entity('Greeting', name='Test:1', _app=PROJECT_ID) create_time = datetime.datetime.utcnow() entity['color'] = 'red' entity['create_time'] = create_time yield self.datastore.put(entity) query = Query('Greeting', {'color =': 'red', 'create_time =': create_time}, _app=PROJECT_ID) response = yield self.datastore.run_query(query) self.assertEqual(len(response), 1) entity = response[0] self.assertEqual(entity['color'], 'red') self.assertEqual(entity['create_time'], create_time)
def execute(self): self.select.execute() #This is a little bit more inefficient than just doing a keys_only query and #sending it to delete, but I think this is the sacrifice to make for the unique caching layer keys = [] for entity in QueryByKeys( Query(self.select.model._meta.db_table), [ x.key() for x in self.select.results ], [] ).Run(): keys.append(entity.key()) constraints.release(self.select.model, entity) caching.remove_entity_from_context_cache_by_key(entity.key()) datastore.Delete(keys)
def test_merge_query_with_null(self): entity = Entity('Greeting', _app=PROJECT_ID) create_time = datetime.datetime.now() entity['content'] = None entity['create_time'] = create_time yield self.datastore.put(entity) entity = Entity('Greeting', _app=PROJECT_ID) entity['content'] = 'hi' entity['create_time'] = create_time yield self.datastore.put(entity) query = Query('Greeting', {'content =': None, 'create_time =': create_time}, _app=PROJECT_ID) response = yield self.datastore.run_query(query) self.assertEqual(len(response), 1) entity = response[0] self.assertEqual(entity['content'], None) self.assertEqual(entity['create_time'], create_time)
def test_batch_put_index_entries(self): entities = [] entity = Entity('Greeting', name='duplicate', _app=PROJECT_ID) entity['content'] = 'first entry' entities.append(entity) entity = Entity('Greeting', name='duplicate', _app=PROJECT_ID) entity['content'] = 'second entry' entities.append(entity) yield self.datastore.put_multi(entities) # Ensure the last specified mutation is the one that matters. query = Query('Greeting', projection=['content'], _app=PROJECT_ID) response = yield self.datastore.run_query(query) self.assertEqual(len(response), 1) entity = response[0] self.assertEqual(entity['content'], 'second entry')
def test_separator_in_name(locations): datastore = Datastore(locations, PROJECT_ID) entity = Entity('Greeting', name='Test:1', _app=PROJECT_ID) create_time = datetime.datetime.utcnow() entity['color'] = 'red' entity['create_time'] = create_time yield datastore.put(entity) query = Query('Greeting', { 'color =': 'red', 'create_time =': create_time }, _app=PROJECT_ID) response = yield datastore.run_query(query) if len(response) != 1: raise Exception('Expected 1 result. Received: {}'.format(response)) entity = response[0] if entity['color'] != 'red' or entity['create_time'] != create_time: raise Exception('Unexpected entity: {}'.format(entity))
def table_names(self, cursor=None): """ Returns a list of names of all tables that exist in the database. """ return [kind.key().name() for kind in Query(kind='__kind__').Run()]
def _fetch_results(self, query): # If we're manually excluding PKs, and we've specified a limit to the results # we need to make sure that we grab more than we were asked for otherwise we could filter # out too many! These are again limited back to the original request limit # while we're processing the results later # Apply the namespace before excluding excluded_pks = [ rpc.Key.from_path(x.kind(), x.id_or_name(), namespace=self.namespace) for x in self.query.excluded_pks ] high_mark = self.query.high_mark low_mark = self.query.low_mark excluded_pk_count = 0 if excluded_pks and high_mark: excluded_pk_count = len(excluded_pks) high_mark += excluded_pk_count limit = None if high_mark is None else (high_mark - (low_mark or 0)) offset = low_mark or 0 if self.query.kind == "COUNT": if excluded_pks: # If we're excluding pks, relying on a traditional count won't work # so we have two options: # 1. Do a keys_only query instead and count the results excluding keys # 2. Do a count, then a pk__in=excluded_pks to work out how many to subtract # Here I've favoured option one as it means a single RPC call. Testing locally # didn't seem to indicate much of a performance difference, even when doing the pk__in # with GetAsync while the count was running. That might not be true of prod though so # if anyone comes up with a faster idea let me know! if isinstance(query, meta_queries.QueryByKeys): # If this is a QueryByKeys, just do the datastore Get and count the results resultset = (x.key() for x in query.Run(limit=limit, offset=offset) if x) else: count_query = Query( query._Query__kind, keys_only=True, namespace=self.namespace ) count_query.update(query) resultset = count_query.Run(limit=limit, offset=offset) self.results = [len([y for y in resultset if y not in excluded_pks])] self.results_returned = 1 else: self.results = [query.Count(limit=limit, offset=offset)] self.results_returned = 1 return elif self.query.kind == "AVERAGE": raise ValueError("AVERAGE not yet supported") # Ensure that the results returned is reset self.results_returned = 0 self.results = [] seen = set() def dedupe(result): # FIXME: This logic can't be right. I think we need to store the distinct fields # somewhere on the query if getattr(self.original_query, "annotation_select", None): columns = self.original_query.annotation_select.keys() else: columns = self.query.columns or [] if not columns: return result key = tuple([result[x] for x in self._exclude_pk(columns) if x in result]) if key in seen: return None seen.add(key) return result for entity in query.Run(limit=limit, offset=offset): # If this is a keys only query, we need to generate a fake entity # for each key in the result set if self.keys_only: entity = EntityTransforms.convert_key_to_entity(entity) entity = EntityTransforms.ignore_excluded_pks(excluded_pks, entity) entity = EntityTransforms.convert_datetime_fields(self.query, entity) entity = EntityTransforms.fix_projected_values_type(self.query, entity) entity = EntityTransforms.rename_pk_field( self.query.model, self.query.concrete_model, entity ) entity = EntityTransforms.process_extra_selects(self.query, entity) if self.query.distinct and self.query.extra_selects: entity = dedupe(entity) if entity: self.results.append(entity) self.results_returned += 1 if limit and self.results_returned >= (limit - excluded_pk_count): break
def key_exists(key): qry = Query(keys_only=True, namespace=key.namespace()) qry.Ancestor(key) return qry.Count(limit=1) > 0
def _fetch_results(self, query): # If we're manually excluding PKs, and we've specified a limit to the results # we need to make sure that we grab more than we were asked for otherwise we could filter # out too many! These are again limited back to the original request limit # while we're processing the results later high_mark = self.query.high_mark low_mark = self.query.low_mark excluded_pk_count = 0 if self.excluded_pks and high_mark: excluded_pk_count = len(self.excluded_pks) high_mark += excluded_pk_count limit = None if high_mark is None else (high_mark - (low_mark or 0)) offset = low_mark or 0 if self.query.kind == "COUNT": if self.excluded_pks: # If we're excluding pks, relying on a traditional count won't work # so we have two options: # 1. Do a keys_only query instead and count the results excluding keys # 2. Do a count, then a pk__in=excluded_pks to work out how many to subtract # Here I've favoured option one as it means a single RPC call. Testing locally # didn't seem to indicate much of a performance difference, even when doing the pk__in # with GetAsync while the count was running. That might not be true of prod though so # if anyone comes up with a faster idea let me know! count_query = Query(query._Query__kind, keys_only=True) count_query.update(query) resultset = count_query.Run(limit=limit, offset=offset) self.results = (x for x in [len([y for y in resultset if y not in self.excluded_pks])]) else: self.results = (x for x in [query.Count(limit=limit, offset=offset)]) return elif self.query.kind == "AVERAGE": raise ValueError("AVERAGE not yet supported") else: self.results = query.Run(limit=limit, offset=offset) # Ensure that the results returned is reset self.results_returned = 0 def increment_returned_results(result): self.results_returned += 1 return result def convert_key_to_entity(result): class FakeEntity(dict): def __init__(self, key): self._key = key def key(self): return self._key return FakeEntity(result) def rename_pk_field(result): if result is None: return result value = result.key().id_or_name() result[self.query.model._meta.pk.column] = value result[self.query.concrete_model._meta.pk.column] = value return result def process_extra_selects(result): """ We handle extra selects by generating the new columns from each result. We can handle simple boolean logic and operators. """ extra_selects = self.query.extra_selects model_fields = self.query.model._meta.fields DATE_FORMATS = ("%Y-%m-%d", "%Y-%m-%d %H:%M:%S") def process_arg(arg): if arg.startswith("'") and arg.endswith("'"): # String literal arg = arg.strip("'") # Check to see if this is a date for date in DATE_FORMATS: try: value = datetime.strptime(arg, date) return value except ValueError: continue return arg elif arg in [x.column for x in model_fields]: # Column value return result.get(arg) # Handle NULL if arg.lower() == "null": return None elif arg.lower() == "true": return True elif arg.lower() == "false": return False # See if it's an integer try: arg = int(arg) except (TypeError, ValueError): pass # Just a plain old literal return arg for col, select in extra_selects: result[col] = select[0](*[process_arg(x) for x in select[1]]) return result def convert_datetime_fields(result): fields = [ x for x in self.query.model._meta.fields if x.get_internal_type() in ("DateTimeField", "DateField", "TimeField") ] for field in fields: column = field.column if isinstance(result, dict): # sometimes it's a key! value = result.get(column) else: value = None if value is not None: result[column] = ensure_datetime(value) return result def ignore_excluded_pks(result): if result.key() in self.query.excluded_pks: return None return result self.results = wrap_result_with_functor(self.results, increment_returned_results) # If this is a keys only query, we need to generate a fake entity # for each key in the result set if self.keys_only: self.results = wrap_result_with_functor(self.results, convert_key_to_entity) self.results = wrap_result_with_functor(self.results, ignore_excluded_pks) self.results = wrap_result_with_functor(self.results, convert_datetime_fields) self.results = wrap_result_with_functor(self.results, rename_pk_field) self.results = wrap_result_with_functor(self.results, process_extra_selects) if self.query.distinct and self.query.extra_selects: # If we had extra selects, and we're distinct, we must deduplicate results def deduper_factory(): seen = set() def dedupe(result): # FIXME: This logic can't be right. I think we need to store the distinct fields # somewhere on the query if getattr(self.original_query, "annotation_select", None): columns = self.original_query.annotation_select.keys() else: columns = self.query.columns or [] if not columns: return result key = tuple([result[x] for x in self._exclude_pk(columns) if x in result]) if key in seen: return None seen.add(key) return result return dedupe self.results = wrap_result_with_functor(self.results, deduper_factory())
def _fetch_results(self, query): # If we're manually excluding PKs, and we've specified a limit to the results # we need to make sure that we grab more than we were asked for otherwise we could filter # out too many! These are again limited back to the original request limit # while we're processing the results later # Apply the namespace before excluding excluded_pks = [ datastore.Key.from_path(x.kind(), x.id_or_name(), namespace=self.namespace) for x in self.query.excluded_pks ] high_mark = self.query.high_mark low_mark = self.query.low_mark excluded_pk_count = 0 if excluded_pks and high_mark: excluded_pk_count = len(excluded_pks) high_mark += excluded_pk_count limit = None if high_mark is None else (high_mark - (low_mark or 0)) offset = low_mark or 0 if self.query.kind == "COUNT": if excluded_pks: # If we're excluding pks, relying on a traditional count won't work # so we have two options: # 1. Do a keys_only query instead and count the results excluding keys # 2. Do a count, then a pk__in=excluded_pks to work out how many to subtract # Here I've favoured option one as it means a single RPC call. Testing locally # didn't seem to indicate much of a performance difference, even when doing the pk__in # with GetAsync while the count was running. That might not be true of prod though so # if anyone comes up with a faster idea let me know! if isinstance(query, QueryByKeys): # If this is a QueryByKeys, just do the datastore Get and count the results resultset = (x.key() for x in query.Run(limit=limit, offset=offset) if x) else: count_query = Query(query._Query__kind, keys_only=True, namespace=self.namespace) count_query.update(query) resultset = count_query.Run(limit=limit, offset=offset) self.results = [ len([ y for y in resultset if y not in excluded_pks]) ] self.results_returned = 1 else: self.results = [query.Count(limit=limit, offset=offset)] self.results_returned = 1 return elif self.query.kind == "AVERAGE": raise ValueError("AVERAGE not yet supported") # Ensure that the results returned is reset self.results_returned = 0 self.results = [] seen = set() def dedupe(result): # FIXME: This logic can't be right. I think we need to store the distinct fields # somewhere on the query if getattr(self.original_query, "annotation_select", None): columns = self.original_query.annotation_select.keys() else: columns = self.query.columns or [] if not columns: return result key = tuple([ result[x] for x in self._exclude_pk(columns) if x in result ]) if key in seen: return None seen.add(key) return result for entity in query.Run(limit=limit, offset=offset): # If this is a keys only query, we need to generate a fake entity # for each key in the result set if self.keys_only: entity = EntityTransforms.convert_key_to_entity(entity) entity = EntityTransforms.ignore_excluded_pks(excluded_pks, entity) entity = EntityTransforms.convert_datetime_fields(self.query, entity) entity = EntityTransforms.rename_pk_field(self.query.model, self.query.concrete_model, entity) entity = EntityTransforms.process_extra_selects(self.query, entity) if self.query.distinct and self.query.extra_selects: entity = dedupe(entity) if entity: self.results.append(entity) self.results_returned += 1 if limit and self.results_returned >= (limit - excluded_pk_count): break
def tear_down_helper(self): query = Query('Greeting', _app=PROJECT_ID) results = yield self.datastore.run_query(query) yield self.datastore.delete([entity.key() for entity in results])
def _build_query(self): self._sanity_check() queries = [] projection = self._exclude_pk(self.query.columns) or None query_kwargs = { "kind": self.query.concrete_model._meta.db_table, "distinct": self.query.distinct or None, "keys_only": self.keys_only or None, "projection": projection } ordering = convert_django_ordering_to_gae(self.query.order_by) if self.query.distinct and not ordering: # If we specified we wanted a distinct query, but we didn't specify # an ordering, we must set the ordering to the distinct columns, otherwise # App Engine shouts at us. Nastily. And without remorse. ordering = self.query.columns[:] # Deal with the no filters case if self.query.where is None: query = Query(**query_kwargs) try: query.Order(*ordering) except datastore_errors.BadArgumentError as e: raise NotSupportedError(e) return query assert self.query.where # Go through the normalized query tree for and_branch in self.query.where.children: query = Query(**query_kwargs) # This deals with the oddity that the root of the tree may well be a leaf filters = [and_branch ] if and_branch.is_leaf else and_branch.children for filter_node in filters: lookup = "{} {}".format(filter_node.column, filter_node.operator) value = filter_node.value # This is a special case. Annoyingly Django's decimal field doesn't # ever call ops.get_prep_save or lookup or whatever when you are filtering # on a query. It *does* do it on a save, so we basically need to do a # conversion here, when really it should be handled elsewhere if isinstance(value, decimal.Decimal): field = get_field_from_column(self.query.model, filter_node.column) value = self.connection.ops.value_to_db_decimal( value, field.max_digits, field.decimal_places) elif isinstance(value, basestring): value = unicode(value) # If there is already a value for this lookup, we need to make the # value a list and append the new entry if lookup in query and not isinstance( query[lookup], (list, tuple)) and query[lookup] != value: query[lookup] = [query[lookup]] + [value] else: # If the value is a list, we can't just assign it to the query # which will treat each element as its own value. So in this # case we nest it. This has the side effect of throwing a BadValueError # which we could throw ourselves, but the datastore might start supporting # list values in lookups.. you never know! if isinstance(value, (list, tuple)): query[lookup] = [value] else: # Common case: just add the raw where constraint query[lookup] = value if ordering: try: query.Order(*ordering) except datastore_errors.BadArgumentError as e: # This is the easiest way to detect unsupported orderings # ideally we'd detect this at the query normalization stage # but it's a lot of hassle, this is much easier and seems to work OK raise NotSupportedError(e) queries.append(query) if can_perform_datastore_get(self.query): # Yay for optimizations! return QueryByKeys(self.query.model, queries, ordering) if len(queries) == 1: identifier = query_is_unique(self.query.model, queries[0]) if identifier: # Yay for optimizations! return UniqueQuery(identifier, queries[0], self.query.model) return queries[0] else: return datastore.MultiQuery(queries, ordering)
def Run(self, limit=None, offset=None): """ Here are the options: 1. Single key, hit memcache 2. Multikey projection, async MultiQueries with ancestors chained 3. Full select, datastore get """ opts = self.queries[0]._Query__query_options key_count = len(self.queries_by_key) is_projection = False results = None if key_count == 1: # FIXME: Potentially could use get_multi in memcache and the make a query # for whatever remains key = self.queries_by_key.keys()[0] result = caching.get_from_cache_by_key(key) if result is not None: results = [result] cache = False # Don't update cache, we just got it from there if results is None: if opts.projection: is_projection = True # Don't cache projection results! # Assumes projection ancestor queries are faster than a datastore Get # due to lower traffic over the RPC. This should be faster for queries with # < 30 keys (which is the most common case), and faster if the entities are # larger and there are many results, but there is probably a slower middle ground # because the larger number of RPC calls. Still, if performance is an issue the # user can just do a normal get() rather than values/values_list/only/defer to_fetch = (offset or 0) + limit if limit else None additional_cols = set([ x[0] for x in self.ordering if x[0] not in opts.projection ]) multi_query = [] final_queries = [] orderings = self.queries[0]._Query__orderings for key, queries in self.queries_by_key.iteritems(): for query in queries: if additional_cols: # We need to include additional orderings in the projection so that we can # sort them in memory. Annoyingly that means reinstantiating the queries query = Query( kind=query._Query__kind, filters=query, projection=list(opts.projection).extend( list(additional_cols)), namespace=self.namespace, ) query.Ancestor(key) # Make this an ancestor query multi_query.append(query) if len(multi_query) == 30: final_queries.append( datastore.MultiQuery( multi_query, orderings).Run(limit=to_fetch)) multi_query = [] else: if len(multi_query) == 1: final_queries.append( multi_query[0].Run(limit=to_fetch)) elif multi_query: final_queries.append( datastore.MultiQuery( multi_query, orderings).Run(limit=to_fetch)) results = chain(*final_queries) else: results = datastore.Get(self.queries_by_key.keys()) def iter_results(results): returned = 0 # This is safe, because Django is fetching all results any way :( sorted_results = sorted(results, cmp=partial( utils.django_ordering_comparison, self.ordering)) sorted_results = [ result for result in sorted_results if result is not None ] if not is_projection and sorted_results: caching.add_entities_to_cache( self.model, sorted_results, caching.CachingSituation.DATASTORE_GET, self.namespace, ) for result in sorted_results: if is_projection: entity_matches_query = True else: entity_matches_query = any( utils.entity_matches_query(result, qry) for qry in self.queries_by_key[result.key()]) if not entity_matches_query: continue if offset and returned < offset: # Skip entities based on offset returned += 1 continue else: yield _convert_entity_based_on_query_options(result, opts) returned += 1 # If there is a limit, we might be done! if limit is not None and returned == (offset or 0) + limit: break return iter_results(results)
def key_exists(key): qry = Query(keys_only=True) qry.Ancestor(key) return qry.Count(limit=1) > 0
def _build_gae_query(self): """ Build and return the Datstore Query object. """ query_kwargs = { "kind": str(self.db_table) } if self.distinct: query_kwargs["distinct"] = True if self.keys_only: query_kwargs["keys_only"] = self.keys_only elif self.projection: query_kwargs["projection"] = self.projection query = Query( **query_kwargs ) if has_concrete_parents(self.model) and not self.model._meta.proxy: query["class ="] = self.model._meta.db_table ordering = [] for order in self.ordering: if isinstance(order, int): direction = datastore.Query.ASCENDING if order == 1 else datastore.Query.DESCENDING order = self.queried_fields[0] else: direction = datastore.Query.DESCENDING if order.startswith("-") else datastore.Query.ASCENDING order = order.lstrip("-") if order == self.model._meta.pk.column or order == "pk": order = "__key__" ordering.append((order, direction)) def process_and_branch(query, and_branch): for column, op, value in and_branch[-1]: if column == self.pk_col: column = "__key__" #FIXME: This EmptyResultSet check should happen during normalization so that Django doesn't count it as a query if op == "=" and "__key__ =" in query: #We've already done an exact lookup on a key, this query can't return anything! raise EmptyResultSet() if not isinstance(value, datastore.Key): value = get_datastore_key(self.model, value) key = "%s %s" % (column, op) if key in query: query[key] = [ query[key], value ] else: query[key] = value if self.where: queries = [] #If there is a single filter, we make it out it's an OR with only one branch #just so that the code below is simpler if isinstance(self.where, tuple) and len(self.where) == 3: self.where = ('OR', [(u'AND', [ self.where ])]) elif isinstance(self.where, tuple) and self.where[0] == 'AND': self.where = ('OR', [self.where]) elif isinstance(self.where, tuple) and self.where[0] == 'OR' and isinstance(self.where[1][0], tuple) and self.where[1][0][0] != 'AND': self.where = ('OR', [ ('AND', [x]) for x in self.where[-1] ]) operator = self.where[0] assert operator == 'OR' #print query._Query__kind, self.where for and_branch in self.where[1]: #Duplicate the query for all the "OR"s queries.append(Query(**query_kwargs)) queries[-1].update(query) #Make sure we copy across filters (e.g. class =) try: process_and_branch(queries[-1], and_branch) except EmptyResultSet: return NoOpQuery() def all_queries_same_except_key(_queries): """ Returns True if all queries in the list of queries filter on the same thing except for "__key__ =". Determine if we can do a Get basically. """ test = _queries[0] for qry in _queries: if "__key__ =" not in qry.keys(): return False if qry._Query__kind != test._Query__kind: return False if qry.keys() != test.keys(): return False for k, v in qry.items(): if k.startswith("__key__"): continue if v != test[k]: return False return True if all_queries_same_except_key(queries): included_pks = [ qry["__key__ ="] for qry in queries ] return QueryByKeys(queries[0], included_pks, ordering) #Just use whatever query to determine the matches else: if len(queries) > 1: #Disable keys only queries for MultiQuery new_queries = [] for query in queries: qry = Query(query._Query__kind, projection=query._Query__query_options.projection) qry.update(query) new_queries.append(qry) query = datastore.MultiQuery(new_queries, ordering) else: query = queries[0] query.Order(*ordering) else: query.Order(*ordering) #If the resulting query was unique, then wrap as a unique query which #will hit the cache first unique_identifier = query_is_unique(self.model, query) if unique_identifier: return UniqueQuery(unique_identifier, query, self.model) DJANGAE_LOG.debug("Select query: {0}, {1}".format(self.model.__name__, self.where)) return query
def _build_gae_query(self): """ Build and return the Datstore Query object. """ combined_filters = [] query_kwargs = {} if self.keys_only: query_kwargs["keys_only"] = self.keys_only elif self.projection: query_kwargs["projection"] = self.projection query = Query( self.db_table, **query_kwargs ) if has_concrete_parents(self.model) and not self.model._meta.proxy: query["class ="] = self.model._meta.db_table DJANGAE_LOG.debug("Select query: {0}, {1}".format(self.model.__name__, self.where)) for column, op, value in self.where: if column == self.pk_col: column = "__key__" final_op = OPERATORS_MAP.get(op) if final_op is None: if op in REQUIRES_SPECIAL_INDEXES: add_special_index(self.model, column, op) #Add the index if we can (e.g. on dev_appserver) if op not in special_indexes_for_column(self.model, column): raise RuntimeError("There is a missing index in your djangaeidx.yaml - \n\n{0}:\n\t{1}: [{2}]".format( self.model, column, op) ) indexer = REQUIRES_SPECIAL_INDEXES[op] column = indexer.indexed_column_name(column) value = indexer.prep_value_for_query(value) query["%s =" % column] = value else: if op == "in": combined_filters.append((column, op, value)) elif op == "gt_and_lt": combined_filters.append((column, op, value)) elif op == "isnull": query["%s =" % column] = None elif op == "startswith": #You can emulate starts with by adding the last unicode char #to the value, then doing <=. Genius. query["%s >=" % column] = value if isinstance(value, str): value = value.decode("utf-8") value += u'\ufffd' query["%s <=" % column] = value else: raise NotImplementedError("Unimplemented operator {0}".format(op)) else: query["%s %s" % (column, final_op)] = value ordering = [] for order in self.ordering: if isinstance(order, int): direction = datastore.Query.ASCENDING if order == 1 else datastore.Query.DESCENDING order = self.queried_fields[0] else: direction = datastore.Query.DESCENDING if order.startswith("-") else datastore.Query.ASCENDING order = order.lstrip("-") if order == self.model._meta.pk.column: order = "__key__" ordering.append((order, direction)) if combined_filters: queries = [ query ] for column, op, value in combined_filters: new_queries = [] for query in queries: if op == "in": for val in value: new_query = datastore.Query(self.model._meta.db_table) new_query.update(query) new_query["%s =" % column] = val new_queries.append(new_query) elif op == "gt_and_lt": for tmp_op in ("<", ">"): new_query = datastore.Query(self.model._meta.db_table) new_query.update(query) new_query["%s %s" % (column, tmp_op)] = value new_queries.append(new_query) queries = new_queries query = datastore.MultiQuery(queries, ordering) elif ordering: query.Order(*ordering) return query
def spawn_query(kind, key): qry = Query(kind) qry["__key__ ="] = key return qry
def _build_gae_query(self): """ Build and return the Datastore Query object. """ query_kwargs = { "kind": str(self.db_table) } if self.distinct: if self.projection: query_kwargs["distinct"] = True else: logging.warning("Ignoring distinct on a query where a projection wasn't possible") if self.keys_only: query_kwargs["keys_only"] = self.keys_only elif self.projection: query_kwargs["projection"] = self.projection query = Query( **query_kwargs ) if has_concrete_parents(self.model) and not self.model._meta.proxy: query["class ="] = self.model._meta.db_table ordering = [] for order in self.ordering: if isinstance(order, (long, int)): direction = datastore.Query.ASCENDING if order == 1 else datastore.Query.DESCENDING order = self.queried_fields[0] else: direction = datastore.Query.DESCENDING if order.startswith("-") else datastore.Query.ASCENDING order = order.lstrip("-") if order == self.model._meta.pk.column or order == "pk": order = "__key__" #Flip the ordering if someone called reverse() on the queryset if not self.original_query.standard_ordering: direction = datastore.Query.DESCENDING if direction == datastore.Query.ASCENDING else datastore.Query.ASCENDING ordering.append((order, direction)) def process_and_branch(query, and_branch): for child in and_branch[-1]: column, op, value = child[1] # for column, op, value in and_branch[-1]: if column == self.pk_col: column = "__key__" #FIXME: This EmptyResultSet check should happen during normalization so that Django doesn't count it as a query if op == "=" and "__key__ =" in query and query["__key__ ="] != value: # We've already done an exact lookup on a key, this query can't return anything! raise EmptyResultSet() if not isinstance(value, datastore.Key): value = get_datastore_key(self.model, value) key = "%s %s" % (column, op) try: if isinstance(value, basestring): value = coerce_unicode(value) if key in query: if type(query[key]) == list: if value not in query[key]: query[key].append(value) else: if query[key] != value: query[key] = [ query[key], value ] else: query[key] = value except datastore_errors.BadFilterError as e: raise NotSupportedError(str(e)) if self.where: queries = [] # print query._Query__kind, self.where for and_branch in self.where[1]: # Duplicate the query for all the "OR"s queries.append(Query(**query_kwargs)) queries[-1].update(query) # Make sure we copy across filters (e.g. class =) try: if and_branch[0] == "LIT": and_branch = ("AND", [and_branch]) process_and_branch(queries[-1], and_branch) except EmptyResultSet: # This is a little hacky but basically if there is only one branch in the or, and it raises # and EmptyResultSet, then we just bail, however if there is more than one branch the query the # query might still return something. This logic needs cleaning up and moving to the DNF phase if len(self.where[1]) == 1: return NoOpQuery() else: queries.pop() if not queries: return NoOpQuery() included_pks = [ qry["__key__ ="] for qry in queries if "__key__ =" in qry ] if len(included_pks) == len(queries): # If all queries have a key, we can perform a Get return QueryByKeys(self.model, queries, ordering) # Just use whatever query to determine the matches else: if len(queries) > 1: # Disable keys only queries for MultiQuery new_queries = [] for i, query in enumerate(queries): if i > 30: raise NotSupportedError("Too many subqueries (max: 30, got {}). Probably cause too many IN/!= filters".format( len(queries) )) qry = Query(query._Query__kind, projection=query._Query__query_options.projection) qry.update(query) try: qry.Order(*ordering) except datastore_errors.BadArgumentError as e: raise NotSupportedError(e) new_queries.append(qry) query = datastore.MultiQuery(new_queries, ordering) else: query = queries[0] try: query.Order(*ordering) except datastore_errors.BadArgumentError as e: raise NotSupportedError(e) else: try: query.Order(*ordering) except datastore_errors.BadArgumentError as e: raise NotSupportedError(e) # If the resulting query was unique, then wrap as a unique query which # will hit the cache first unique_identifier = query_is_unique(self.model, query) if unique_identifier: return UniqueQuery(unique_identifier, query, self.model) DJANGAE_LOG.debug("Select query: {0}, {1}".format(self.model.__name__, self.where)) return query
def _fetch_results(self, query): # If we're manually excluding PKs, and we've specified a limit to the results # we need to make sure that we grab more than we were asked for otherwise we could filter # out too many! These are again limited back to the original request limit # while we're processing the results later high_mark = self.query.high_mark low_mark = self.query.low_mark excluded_pk_count = 0 if self.excluded_pks and high_mark: excluded_pk_count = len(self.excluded_pks) high_mark += excluded_pk_count limit = None if high_mark is None else (high_mark - (low_mark or 0)) offset = low_mark or 0 if self.query.kind == "COUNT": if self.excluded_pks: # If we're excluding pks, relying on a traditional count won't work # so we have two options: # 1. Do a keys_only query instead and count the results excluding keys # 2. Do a count, then a pk__in=excluded_pks to work out how many to subtract # Here I've favoured option one as it means a single RPC call. Testing locally # didn't seem to indicate much of a performance difference, even when doing the pk__in # with GetAsync while the count was running. That might not be true of prod though so # if anyone comes up with a faster idea let me know! count_query = Query(query._Query__kind, keys_only=True) count_query.update(query) resultset = count_query.Run(limit=limit, offset=offset) self.results = (x for x in [ len([y for y in resultset if y not in self.excluded_pks]) ]) else: self.results = ( x for x in [query.Count(limit=limit, offset=offset)]) return elif self.query.kind == "AVERAGE": raise ValueError("AVERAGE not yet supported") else: self.results = query.Run(limit=limit, offset=offset) # Ensure that the results returned is reset self.results_returned = 0 def increment_returned_results(result): self.results_returned += 1 return result def convert_key_to_entity(result): class FakeEntity(dict): def __init__(self, key): self._key = key def key(self): return self._key return FakeEntity(result) def rename_pk_field(result): if result is None: return result value = result.key().id_or_name() result[self.query.model._meta.pk.column] = value result[self.query.concrete_model._meta.pk.column] = value return result def process_extra_selects(result): """ We handle extra selects by generating the new columns from each result. We can handle simple boolean logic and operators. """ extra_selects = self.query.extra_selects model_fields = self.query.model._meta.fields DATE_FORMATS = ("%Y-%m-%d", "%Y-%m-%d %H:%M:%S") def process_arg(arg): if arg.startswith("'") and arg.endswith("'"): # String literal arg = arg.strip("'") # Check to see if this is a date for date in DATE_FORMATS: try: value = datetime.strptime(arg, date) return value except ValueError: continue return arg elif arg in [x.column for x in model_fields]: # Column value return result.get(arg) # Handle NULL if arg.lower() == 'null': return None elif arg.lower() == 'true': return True elif arg.lower() == 'false': return False # See if it's an integer try: arg = int(arg) except (TypeError, ValueError): pass # Just a plain old literal return arg for col, select in extra_selects: result[col] = select[0](*[process_arg(x) for x in select[1]]) return result def convert_datetime_fields(result): fields = [ x for x in self.query.model._meta.fields if x.get_internal_type() in ("DateTimeField", "DateField", "TimeField") ] for field in fields: column = field.column if isinstance(result, dict): # sometimes it's a key! value = result.get(column) else: value = None if value is not None: result[column] = ensure_datetime(value) return result def ignore_excluded_pks(result): if result.key() in self.query.excluded_pks: return None return result self.results = wrap_result_with_functor(self.results, increment_returned_results) # If this is a keys only query, we need to generate a fake entity # for each key in the result set if self.keys_only: self.results = wrap_result_with_functor(self.results, convert_key_to_entity) self.results = wrap_result_with_functor(self.results, ignore_excluded_pks) self.results = wrap_result_with_functor(self.results, convert_datetime_fields) self.results = wrap_result_with_functor(self.results, rename_pk_field) self.results = wrap_result_with_functor(self.results, process_extra_selects) if self.query.distinct and self.query.extra_selects: # If we had extra selects, and we're distinct, we must deduplicate results def deduper_factory(): seen = set() def dedupe(result): # FIXME: This logic can't be right. I think we need to store the distinct fields # somewhere on the query if getattr(self.original_query, "annotation_select", None): columns = self.original_query.annotation_select.keys() else: columns = self.query.columns or [] if not columns: return result key = tuple([ result[x] for x in self._exclude_pk(columns) if x in result ]) if key in seen: return None seen.add(key) return result return dedupe self.results = wrap_result_with_functor(self.results, deduper_factory())