def _add_inheritence_filter(self): """ We support inheritence with polymodels. Whenever we set the 'where' on this query, we manipulate the tree so that the lookups are ANDed with a filter on 'class = db_table' and on inserts, we add the 'class' column if the model is part of an inheritance tree. We only do any of this if the model has concrete parents and isn't a proxy model """ if has_concrete_parents(self.model) and not self.model._meta.proxy: if self.polymodel_filter_added: return new_filter = WhereNode() new_filter.column = POLYMODEL_CLASS_ATTRIBUTE new_filter.operator = '=' new_filter.value = self.model._meta.db_table # We add this bare AND just to stay consistent with what Django does new_and = WhereNode() new_and.connector = 'AND' new_and.children = [ new_filter ] new_root = WhereNode() new_root.connector = 'AND' new_root.children = [ new_and ] if self._where: # Add the original where if there was one new_root.children.append(self._where) self._where = new_root self.polymodel_filter_added = True
def _add_inheritence_filter(self): """ We support inheritence with polymodels. Whenever we set the 'where' on this query, we manipulate the tree so that the lookups are ANDed with a filter on 'class = db_table' and on inserts, we add the 'class' column if the model is part of an inheritance tree. We only do any of this if the model has concrete parents and isn't a proxy model """ if has_concrete_parents(self.model) and not self.model._meta.proxy: if self.polymodel_filter_added: return new_filter = WhereNode() new_filter.column = POLYMODEL_CLASS_ATTRIBUTE new_filter.operator = '=' new_filter.value = self.model._meta.db_table # We add this bare AND just to stay consistent with what Django does new_and = WhereNode() new_and.connector = 'AND' new_and.children = [new_filter] new_root = WhereNode() new_root.connector = 'AND' new_root.children = [new_and] if self._where: # Add the original where if there was one new_root.children.append(self._where) self._where = new_root self.polymodel_filter_added = True
def execute(self): if self.has_pk and not has_concrete_parents(self.model): results = [] #We are inserting, but we specified an ID, we need to check for existence before we Put() #We do it in a loop so each check/put is transactional - because it's an ancestor query it shouldn't #cost any entity groups for key, ent in zip(self.included_keys, self.entities): @db.transactional def txn(): if key is not None: if utils.key_exists(key): raise IntegrityError("Tried to INSERT with existing key") id_or_name = key.id_or_name() if isinstance(id_or_name, basestring) and id_or_name.startswith("__"): raise NotSupportedError("Datastore ids cannot start with __. Id was %s" % id_or_name) markers = constraints.acquire(self.model, ent) try: results.append(datastore.Put(ent)) caching.add_entity_to_context_cache(self.model, ent) except: #Make sure we delete any created markers before we re-raise constraints.release_markers(markers) raise txn() return results else: #FIXME: We should rearrange this so that each entity is handled individually like above. We'll #lose insert performance, but gain consistency on errors which is more important markers = constraints.acquire_bulk(self.model, self.entities) try: results = datastore.Put(self.entities) for entity in self.entities: caching.add_entity_to_context_cache(self.model, entity) except: to_delete = chain(*markers) constraints.release_markers(to_delete) raise for ent, m in zip(self.entities, markers): constraints.update_instance_on_markers(ent, m) return results
def execute(self): if self.has_pk and not has_concrete_parents(self.model): results = [] #We are inserting, but we specified an ID, we need to check for existence before we Put() #We do it in a loop so each check/put is transactional - because it's an ancestor query it shouldn't #cost any entity groups for key, ent in zip(self.included_keys, self.entities): @db.transactional def txn(): if key is not None: if utils.key_exists(key): raise IntegrityError("Tried to INSERT with existing key") markers = constraints.acquire(self.model, ent) try: results.append(datastore.Put(ent)) except: #Make sure we delete any created markers before we re-raise constraints.release_markers(markers) raise entity_post_insert.send(sender=self.model, entity=ent) txn() return results else: markers = constraints.acquire_bulk(self.model, self.entities) try: results = datastore.Put(self.entities) except: to_delete = chain(*markers) constraints.release_markers(to_delete) raise for ent, m in zip(self.entities, markers): constraints.update_instance_on_markers(ent, m) entity_post_insert.send(sender=self.model, entity=ent) return results
def execute(self): if self.has_pk and not has_concrete_parents(self.model): results = [] #We are inserting, but we specified an ID, we need to check for existence before we Put() #FIXME/TODO: if we have many pks, then surely a multi datastore.Get would be faster than this loop, no? for key, ent in zip(self.included_keys, self.entities): @db.transactional def txn(): if key is not None: existing = datastore.Query(keys_only=True) existing.Ancestor(key) existing["__key__"] = key res = existing.Count() if res: #FIXME: For now this raises (correctly) when using model inheritance #We need to make model inheritance not insert the base, only the subclass raise IntegrityError("Tried to INSERT with existing key") results.append(datastore.Put(ent)) txn() return results else: return datastore.Put(self.entities)
def _build_gae_query(self): """ Build and return the Datastore Query object. """ query_kwargs = { "kind": str(self.db_table) } if self.distinct: if self.projection: query_kwargs["distinct"] = True else: logging.warning("Ignoring distinct on a query where a projection wasn't possible") if self.keys_only: query_kwargs["keys_only"] = self.keys_only elif self.projection: query_kwargs["projection"] = self.projection query = Query( **query_kwargs ) if has_concrete_parents(self.model) and not self.model._meta.proxy: query["class ="] = self.model._meta.db_table ordering = [] for order in self.ordering: if isinstance(order, (long, int)): direction = datastore.Query.ASCENDING if order == 1 else datastore.Query.DESCENDING order = self.queried_fields[0] else: direction = datastore.Query.DESCENDING if order.startswith("-") else datastore.Query.ASCENDING order = order.lstrip("-") if order == self.model._meta.pk.column or order == "pk": order = "__key__" #Flip the ordering if someone called reverse() on the queryset if not self.original_query.standard_ordering: direction = datastore.Query.DESCENDING if direction == datastore.Query.ASCENDING else datastore.Query.ASCENDING ordering.append((order, direction)) def process_and_branch(query, and_branch): for child in and_branch[-1]: column, op, value = child[1] # for column, op, value in and_branch[-1]: if column == self.pk_col: column = "__key__" #FIXME: This EmptyResultSet check should happen during normalization so that Django doesn't count it as a query if op == "=" and "__key__ =" in query and query["__key__ ="] != value: # We've already done an exact lookup on a key, this query can't return anything! raise EmptyResultSet() if not isinstance(value, datastore.Key): value = get_datastore_key(self.model, value) key = "%s %s" % (column, op) try: if isinstance(value, basestring): value = coerce_unicode(value) if key in query: if type(query[key]) == list: if value not in query[key]: query[key].append(value) else: if query[key] != value: query[key] = [ query[key], value ] else: query[key] = value except datastore_errors.BadFilterError as e: raise NotSupportedError(str(e)) if self.where: queries = [] # print query._Query__kind, self.where for and_branch in self.where[1]: # Duplicate the query for all the "OR"s queries.append(Query(**query_kwargs)) queries[-1].update(query) # Make sure we copy across filters (e.g. class =) try: if and_branch[0] == "LIT": and_branch = ("AND", [and_branch]) process_and_branch(queries[-1], and_branch) except EmptyResultSet: # This is a little hacky but basically if there is only one branch in the or, and it raises # and EmptyResultSet, then we just bail, however if there is more than one branch the query the # query might still return something. This logic needs cleaning up and moving to the DNF phase if len(self.where[1]) == 1: return NoOpQuery() else: queries.pop() if not queries: return NoOpQuery() included_pks = [ qry["__key__ ="] for qry in queries if "__key__ =" in qry ] if len(included_pks) == len(queries): # If all queries have a key, we can perform a Get return QueryByKeys(self.model, queries, ordering) # Just use whatever query to determine the matches else: if len(queries) > 1: # Disable keys only queries for MultiQuery new_queries = [] for i, query in enumerate(queries): if i > 30: raise NotSupportedError("Too many subqueries (max: 30, got {}). Probably cause too many IN/!= filters".format( len(queries) )) qry = Query(query._Query__kind, projection=query._Query__query_options.projection) qry.update(query) try: qry.Order(*ordering) except datastore_errors.BadArgumentError as e: raise NotSupportedError(e) new_queries.append(qry) query = datastore.MultiQuery(new_queries, ordering) else: query = queries[0] try: query.Order(*ordering) except datastore_errors.BadArgumentError as e: raise NotSupportedError(e) else: try: query.Order(*ordering) except datastore_errors.BadArgumentError as e: raise NotSupportedError(e) # If the resulting query was unique, then wrap as a unique query which # will hit the cache first unique_identifier = query_is_unique(self.model, query) if unique_identifier: return UniqueQuery(unique_identifier, query, self.model) DJANGAE_LOG.debug("Select query: {0}, {1}".format(self.model.__name__, self.where)) return query
def execute(self): if self.has_pk and not has_concrete_parents(self.model): results = [] # We are inserting, but we specified an ID, we need to check for existence before we Put() # We do it in a loop so each check/put is transactional - because it's an ancestor query it shouldn't # cost any entity groups was_in_transaction = datastore.IsInTransaction() for key, ent in zip(self.included_keys, self.entities): @db.transactional def txn(): if key is not None: if utils.key_exists(key): raise IntegrityError("Tried to INSERT with existing key") id_or_name = key.id_or_name() if isinstance(id_or_name, basestring) and id_or_name.startswith("__"): raise NotSupportedError("Datastore ids cannot start with __. Id was %s" % id_or_name) if not constraints.constraint_checks_enabled(self.model): # Fast path, just insert results.append(datastore.Put(ent)) else: markers = constraints.acquire(self.model, ent) try: results.append(datastore.Put(ent)) if not was_in_transaction: # We can cache if we weren't in a transaction before this little nested one caching.add_entity_to_cache(self.model, ent, caching.CachingSituation.DATASTORE_GET_PUT) except: # Make sure we delete any created markers before we re-raise constraints.release_markers(markers) raise # Make sure we notify app engine that we are using this ID # FIXME: Copy ancestor across to the template key reserve_id(key.kind(), key.id_or_name()) txn() return results else: if not constraints.constraint_checks_enabled(self.model): # Fast path, just bulk insert results = datastore.Put(self.entities) for entity in self.entities: caching.add_entity_to_cache(self.model, entity, caching.CachingSituation.DATASTORE_PUT) return results else: markers = [] try: #FIXME: We should rearrange this so that each entity is handled individually like above. We'll # lose insert performance, but gain consistency on errors which is more important markers = constraints.acquire_bulk(self.model, self.entities) results = datastore.Put(self.entities) for entity in self.entities: caching.add_entity_to_cache(self.model, entity, caching.CachingSituation.DATASTORE_PUT) except: to_delete = chain(*markers) constraints.release_markers(to_delete) raise for ent, k, m in zip(self.entities, results, markers): ent.__key = k constraints.update_instance_on_markers(ent, m) return results
def _build_gae_query(self): """ Build and return the Datstore Query object. """ combined_filters = [] query_kwargs = {} if self.keys_only: query_kwargs["keys_only"] = self.keys_only elif self.projection: query_kwargs["projection"] = self.projection query = Query( self.db_table, **query_kwargs ) if has_concrete_parents(self.model) and not self.model._meta.proxy: query["class ="] = self.model._meta.db_table DJANGAE_LOG.debug("Select query: {0}, {1}".format(self.model.__name__, self.where)) for column, op, value in self.where: if column == self.pk_col: column = "__key__" final_op = OPERATORS_MAP.get(op) if final_op is None: if op in REQUIRES_SPECIAL_INDEXES: add_special_index(self.model, column, op) #Add the index if we can (e.g. on dev_appserver) if op not in special_indexes_for_column(self.model, column): raise RuntimeError("There is a missing index in your djangaeidx.yaml - \n\n{0}:\n\t{1}: [{2}]".format( self.model, column, op) ) indexer = REQUIRES_SPECIAL_INDEXES[op] column = indexer.indexed_column_name(column) value = indexer.prep_value_for_query(value) query["%s =" % column] = value else: if op == "in": combined_filters.append((column, op, value)) elif op == "gt_and_lt": combined_filters.append((column, op, value)) elif op == "isnull": query["%s =" % column] = None elif op == "startswith": #You can emulate starts with by adding the last unicode char #to the value, then doing <=. Genius. query["%s >=" % column] = value if isinstance(value, str): value = value.decode("utf-8") value += u'\ufffd' query["%s <=" % column] = value else: raise NotImplementedError("Unimplemented operator {0}".format(op)) else: query["%s %s" % (column, final_op)] = value ordering = [] for order in self.ordering: if isinstance(order, int): direction = datastore.Query.ASCENDING if order == 1 else datastore.Query.DESCENDING order = self.queried_fields[0] else: direction = datastore.Query.DESCENDING if order.startswith("-") else datastore.Query.ASCENDING order = order.lstrip("-") if order == self.model._meta.pk.column: order = "__key__" ordering.append((order, direction)) if combined_filters: queries = [ query ] for column, op, value in combined_filters: new_queries = [] for query in queries: if op == "in": for val in value: new_query = datastore.Query(self.model._meta.db_table) new_query.update(query) new_query["%s =" % column] = val new_queries.append(new_query) elif op == "gt_and_lt": for tmp_op in ("<", ">"): new_query = datastore.Query(self.model._meta.db_table) new_query.update(query) new_query["%s %s" % (column, tmp_op)] = value new_queries.append(new_query) queries = new_queries query = datastore.MultiQuery(queries, ordering) elif ordering: query.Order(*ordering) return query
def execute(self): check_existence = self.has_pk and not has_concrete_parents(self.model) if not constraints.has_active_unique_constraints(self.model) and not check_existence: # Fast path, no constraint checks and no keys mean we can just do a normal datastore.Put # which isn't limited to 25 results = datastore.Put(self.entities) # This modifies self.entities and sets their keys caching.add_entities_to_cache( self.model, self.entities, caching.CachingSituation.DATASTORE_GET_PUT, self.namespace, skip_memcache=True ) return results def insert_chunk(keys, entities): # Note that this is limited to a maximum of 25 entities. markers = [] @db.transactional(xg=len(entities) > 1) def txn(): for key in keys: if check_existence and key is not None: if utils.key_exists(key): raise IntegrityError("Tried to INSERT with existing key") id_or_name = key.id_or_name() if isinstance(id_or_name, basestring) and id_or_name.startswith("__"): raise NotSupportedError("Datastore ids cannot start with __. Id was %s" % id_or_name) # Notify App Engine of any keys we're specifying intentionally reserve_id(key.kind(), key.id_or_name(), self.namespace) results = datastore.Put(entities) for entity in entities: markers.extend(constraints.acquire(self.model, entity)) caching.add_entities_to_cache( self.model, entities, caching.CachingSituation.DATASTORE_GET_PUT, self.namespace, skip_memcache=True ) return results try: return txn() except: # There are 3 possible reasons why we've ended up here: # 1. The datastore.Put() failed, but note that because it's a transaction, the # exception isn't raised until the END of the transaction block. # 2. Some of the markers were acquired, but then we hit a unique constraint # conflict and so the outer transaction was rolled back. # 3. Something else went wrong after we'd acquired markers, e.g. the # caching.add_entities_to_cache call got hit by a metaphorical bus. # In any of these cases, we (may) have acquired markers via (a) nested, independent # transaction(s), and so we need to release them again. constraints.release_markers(markers) raise # We can't really support this and maintain expected behaviour. If we chunked the insert and one of the # chunks fails it will mean some of the data would be saved and rather than trying to communicate that back # to the user it's better that they chunk the data themselves as they can deal with the failure better if len(self.entities) > datastore_stub_util._MAX_EG_PER_TXN: raise BulkInsertError("Bulk inserts with unique constraints, or pre-defined keys are limited to {} instances on the datastore".format( datastore_stub_util._MAX_EG_PER_TXN )) return insert_chunk(self.included_keys, self.entities)
def execute(self): check_existence = self.has_pk and not has_concrete_parents(self.model) def perform_insert(entities): results = [] for primary, descendents in entities: new_key = rpc.Put(primary) if descendents: for i, descendent in enumerate(descendents): descendents[i] = Entity( descendent.kind(), parent=new_key, namespace=new_key.namespace(), id=descendent.key().id() or None, name=descendent.key().name() or None ) descendents[i].update(descendent) rpc.Put(descendents) results.append(new_key) return results if not constraints.has_active_unique_constraints(self.model) and not check_existence: # Fast path, no constraint checks and no keys mean we can just do a normal rpc.Put # which isn't limited to 25 results = perform_insert(self.entities) # This modifies self.entities and sets their keys caching.add_entities_to_cache( self.model, [x[0] for x in self.entities], caching.CachingSituation.DATASTORE_GET_PUT, self.namespace, skip_memcache=True ) return results entity_group_count = len(self.entities) def insert_chunk(keys, entities): # Note that this is limited to a maximum of 25 entities. markers = [] @db.transactional(xg=entity_group_count > 1) def txn(): for key in keys: if check_existence and key is not None: if utils.key_exists(key): raise IntegrityError("Tried to INSERT with existing key") id_or_name = key.id_or_name() if isinstance(id_or_name, six.string_types) and id_or_name.startswith("__"): raise NotSupportedError("Datastore ids cannot start with __. Id was %s" % id_or_name) # Notify App Engine of any keys we're specifying intentionally reserve_id(key.kind(), key.id_or_name(), self.namespace) results = perform_insert(entities) for entity, _ in entities: markers.extend(constraints.acquire(self.model, entity)) caching.add_entities_to_cache( self.model, [x[0] for x in entities], caching.CachingSituation.DATASTORE_GET_PUT, self.namespace, skip_memcache=True ) return results try: return txn() except: # There are 3 possible reasons why we've ended up here: # 1. The rpc.Put() failed, but note that because it's a transaction, the # exception isn't raised until the END of the transaction block. # 2. Some of the markers were acquired, but then we hit a unique constraint # conflict and so the outer transaction was rolled back. # 3. Something else went wrong after we'd acquired markers, e.g. the # caching.add_entities_to_cache call got hit by a metaphorical bus. # In any of these cases, we (may) have acquired markers via (a) nested, independent # transaction(s), and so we need to release them again. constraints.release_markers(markers) raise # We can't really support this and maintain expected behaviour. If we chunked the insert and one of the # chunks fails it will mean some of the data would be saved and rather than trying to communicate that back # to the user it's better that they chunk the data themselves as they can deal with the failure better if entity_group_count > datastore_stub_util._MAX_EG_PER_TXN: raise BulkInsertError("Bulk inserts with unique constraints, or pre-defined keys are limited to {} instances on the datastore".format( datastore_stub_util._MAX_EG_PER_TXN )) return insert_chunk(self.included_keys, self.entities)
def _build_gae_query(self): """ Build and return the Datstore Query object. """ query_kwargs = { "kind": str(self.db_table) } if self.distinct: query_kwargs["distinct"] = True if self.keys_only: query_kwargs["keys_only"] = self.keys_only elif self.projection: query_kwargs["projection"] = self.projection query = Query( **query_kwargs ) if has_concrete_parents(self.model) and not self.model._meta.proxy: query["class ="] = self.model._meta.db_table ordering = [] for order in self.ordering: if isinstance(order, int): direction = datastore.Query.ASCENDING if order == 1 else datastore.Query.DESCENDING order = self.queried_fields[0] else: direction = datastore.Query.DESCENDING if order.startswith("-") else datastore.Query.ASCENDING order = order.lstrip("-") if order == self.model._meta.pk.column or order == "pk": order = "__key__" ordering.append((order, direction)) def process_and_branch(query, and_branch): for column, op, value in and_branch[-1]: if column == self.pk_col: column = "__key__" #FIXME: This EmptyResultSet check should happen during normalization so that Django doesn't count it as a query if op == "=" and "__key__ =" in query: #We've already done an exact lookup on a key, this query can't return anything! raise EmptyResultSet() if not isinstance(value, datastore.Key): value = get_datastore_key(self.model, value) key = "%s %s" % (column, op) if key in query: query[key] = [ query[key], value ] else: query[key] = value if self.where: queries = [] #If there is a single filter, we make it out it's an OR with only one branch #just so that the code below is simpler if isinstance(self.where, tuple) and len(self.where) == 3: self.where = ('OR', [(u'AND', [ self.where ])]) elif isinstance(self.where, tuple) and self.where[0] == 'AND': self.where = ('OR', [self.where]) elif isinstance(self.where, tuple) and self.where[0] == 'OR' and isinstance(self.where[1][0], tuple) and self.where[1][0][0] != 'AND': self.where = ('OR', [ ('AND', [x]) for x in self.where[-1] ]) operator = self.where[0] assert operator == 'OR' #print query._Query__kind, self.where for and_branch in self.where[1]: #Duplicate the query for all the "OR"s queries.append(Query(**query_kwargs)) queries[-1].update(query) #Make sure we copy across filters (e.g. class =) try: process_and_branch(queries[-1], and_branch) except EmptyResultSet: return NoOpQuery() def all_queries_same_except_key(_queries): """ Returns True if all queries in the list of queries filter on the same thing except for "__key__ =". Determine if we can do a Get basically. """ test = _queries[0] for qry in _queries: if "__key__ =" not in qry.keys(): return False if qry._Query__kind != test._Query__kind: return False if qry.keys() != test.keys(): return False for k, v in qry.items(): if k.startswith("__key__"): continue if v != test[k]: return False return True if all_queries_same_except_key(queries): included_pks = [ qry["__key__ ="] for qry in queries ] return QueryByKeys(queries[0], included_pks, ordering) #Just use whatever query to determine the matches else: if len(queries) > 1: #Disable keys only queries for MultiQuery new_queries = [] for query in queries: qry = Query(query._Query__kind, projection=query._Query__query_options.projection) qry.update(query) new_queries.append(qry) query = datastore.MultiQuery(new_queries, ordering) else: query = queries[0] query.Order(*ordering) else: query.Order(*ordering) #If the resulting query was unique, then wrap as a unique query which #will hit the cache first unique_identifier = query_is_unique(self.model, query) if unique_identifier: return UniqueQuery(unique_identifier, query, self.model) DJANGAE_LOG.debug("Select query: {0}, {1}".format(self.model.__name__, self.where)) return query
def execute(self): if self.has_pk and not has_concrete_parents(self.model): results = [] # We are inserting, but we specified an ID, we need to check for existence before we Put() # We do it in a loop so each check/put is transactional - because it's an ancestor query it shouldn't # cost any entity groups for key, ent in zip(self.included_keys, self.entities): @db.transactional def txn(): if key is not None: if utils.key_exists(key): raise IntegrityError("Tried to INSERT with existing key") id_or_name = key.id_or_name() if isinstance(id_or_name, basestring) and id_or_name.startswith("__"): raise NotSupportedError("Datastore ids cannot start with __. Id was %s" % id_or_name) if not constraints.constraint_checks_enabled(self.model): # Fast path, just insert results.append(datastore.Put(ent)) else: markers = constraints.acquire(self.model, ent) try: results.append(datastore.Put(ent)) caching.add_entity_to_context_cache(self.model, ent) except: # Make sure we delete any created markers before we re-raise constraints.release_markers(markers) raise # Make sure we notify app engine that we are using this ID # FIXME: Copy ancestor across to the template key id_or_name = key.id_or_name() if isinstance(id_or_name, (int, long)): try: db.allocate_id_range(datastore.Key.from_path(key.kind(), 1), id_or_name, id_or_name) except: # We don't re-raise because it's not terminal, but if this happens we need to know why logging.exception("An error occurred when notifying app engine that an ID has been used. Please report.") txn() return results else: if not constraints.constraint_checks_enabled(self.model): # Fast path, just bulk insert results = datastore.Put(self.entities) for entity in self.entities: caching.add_entity_to_context_cache(self.model, entity) return results else: markers = [] try: #FIXME: We should rearrange this so that each entity is handled individually like above. We'll # lose insert performance, but gain consistency on errors which is more important markers = constraints.acquire_bulk(self.model, self.entities) results = datastore.Put(self.entities) for entity in self.entities: caching.add_entity_to_context_cache(self.model, entity) except: to_delete = chain(*markers) constraints.release_markers(to_delete) raise for ent, m in zip(self.entities, markers): constraints.update_instance_on_markers(ent, m) return results