def execute(self): check_existence = self.has_pk and not has_concrete_parents(self.model) if not constraints.has_active_unique_constraints(self.model) and not check_existence: # Fast path, no constraint checks and no keys mean we can just do a normal datastore.Put # which isn't limited to 25 results = datastore.Put(self.entities) # This modifies self.entities and sets their keys caching.add_entities_to_cache( self.model, self.entities, caching.CachingSituation.DATASTORE_GET_PUT, self.namespace, skip_memcache=True ) return results def insert_chunk(keys, entities): # Note that this is limited to a maximum of 25 entities. markers = [] @db.transactional(xg=len(entities) > 1) def txn(): for key in keys: if check_existence and key is not None: if utils.key_exists(key): raise IntegrityError("Tried to INSERT with existing key") id_or_name = key.id_or_name() if isinstance(id_or_name, basestring) and id_or_name.startswith("__"): raise NotSupportedError("Datastore ids cannot start with __. Id was %s" % id_or_name) # Notify App Engine of any keys we're specifying intentionally reserve_id(key.kind(), key.id_or_name(), self.namespace) results = datastore.Put(entities) for entity in entities: markers.extend(constraints.acquire(self.model, entity)) caching.add_entities_to_cache( self.model, entities, caching.CachingSituation.DATASTORE_GET_PUT, self.namespace, skip_memcache=True ) return results try: return txn() except: # There are 3 possible reasons why we've ended up here: # 1. The datastore.Put() failed, but note that because it's a transaction, the # exception isn't raised until the END of the transaction block. # 2. Some of the markers were acquired, but then we hit a unique constraint # conflict and so the outer transaction was rolled back. # 3. Something else went wrong after we'd acquired markers, e.g. the # caching.add_entities_to_cache call got hit by a metaphorical bus. # In any of these cases, we (may) have acquired markers via (a) nested, independent # transaction(s), and so we need to release them again. constraints.release_markers(markers) raise # We can't really support this and maintain expected behaviour. If we chunked the insert and one of the # chunks fails it will mean some of the data would be saved and rather than trying to communicate that back # to the user it's better that they chunk the data themselves as they can deal with the failure better if len(self.entities) > datastore_stub_util._MAX_EG_PER_TXN: raise BulkInsertError("Bulk inserts with unique constraints, or pre-defined keys are limited to {} instances on the datastore".format( datastore_stub_util._MAX_EG_PER_TXN )) return insert_chunk(self.included_keys, self.entities)
def execute(self): """ Ideally we'd just be able to tell appengine to delete all the entities which match the query, that would be nice wouldn't it? Except we can't. Firstly Delete() only accepts keys so we first have to execute a keys_only query to find the entities that match the query, then send those keys to Delete(), except it's not as easy as that either because the query might be eventually consistent and so we might delete entities which were updated in another request and no-longer match the query. Bugger. And then there might be constraints... in which case we need to grab the entity in its entirety, release any constraints and then delete the entity. And then there are polymodels (model inheritence) which means we might not even be deleting the entity after all, only deleting some of the fields from it. What we do then is do a keys_only query, then iterate the entities in batches of 25 (well _MAX_EG_PER_TXN), each entity in the batch has its polymodel fields wiped out (if necessary) and then we do either a PutAsync or DeleteAsync all inside a transaction. Oh, and we wipe out memcache and delete the constraints in an independent transaction. Things to improve: - Delete the constraints in a background thread. We don't need to wait for them, and really, we don't want the non-deletion of them to affect the deletion of the entity. Lingering markers are handled automatically they just case a small performance hit on write. - Check the entity matches the query still (there's a fixme there) """ self.select.execute() constraints_enabled = constraints.has_active_unique_constraints(self.model) keys = [x.key() for x in self.select.results] def wipe_polymodel_from_entity(entity, db_table): """ Wipes out the fields associated with the specified polymodel table """ polymodel_value = entity.get('class', []) if polymodel_value and self.table_to_delete in polymodel_value: # Remove any local fields from this model from the entity model = utils.get_model_from_db_table(self.table_to_delete) for field in model._meta.local_fields: col = field.column if col in entity: del entity[col] # Then remove this model from the polymodel heirarchy polymodel_value.remove(self.table_to_delete) if polymodel_value: entity['class'] = polymodel_value @db.transactional(xg=True) def delete_batch(key_slice): entities = datastore.Get(key_slice) #FIXME: We need to make sure the entity still matches the query! # entities = (x for x in entities if utils.entity_matches_query(x, self.select.gae_query)) to_delete = [] to_update = [] updated_keys = [] # Go through the entities for entity in entities: if entity is None: continue wipe_polymodel_from_entity(entity, self.table_to_delete) if not entity.get('class'): to_delete.append(entity) constraints.release(self.model, entity) else: to_update.append(entity) updated_keys.append(entity.key()) datastore.DeleteAsync([x.key() for x in to_delete]) datastore.PutAsync(to_update) caching.remove_entities_from_cache_by_key( updated_keys, self.namespace ) return len(updated_keys) deleted = 0 while keys: deleted += delete_batch(keys[:datastore_stub_util._MAX_EG_PER_TXN]) keys = keys[datastore_stub_util._MAX_EG_PER_TXN:] return deleted
def txn(): caching.remove_entities_from_cache_by_key([key], self.namespace) try: result = datastore.Get(key) except datastore_errors.EntityNotFoundError: # Return false to indicate update failure return False if ( isinstance(self.select.gae_query, (Query, UniqueQuery)) # ignore QueryByKeys and NoOpQuery and not utils.entity_matches_query(result, self.select.gae_query) ): # Due to eventual consistency they query may have returned an entity which no longer # matches the query return False original = copy.deepcopy(result) instance_kwargs = {field.attname:value for field, param, value in self.values} # Note: If you replace MockInstance with self.model, you'll find that some delete # tests fail in the test app. This is because any unspecified fields would then call # get_default (even though we aren't going to use them) which may run a query which # fails inside this transaction. Given as we are just using MockInstance so that we can # call django_instance_to_entity it on it with the subset of fields we pass in, # what we have is fine. meta = self.model._meta instance = MockInstance( _original=MockInstance(_meta=meta, **result), _meta=meta, **instance_kwargs ) # We need to add to the class attribute, rather than replace it! original_class = result.get(POLYMODEL_CLASS_ATTRIBUTE, []) # Update the entity we read above with the new values result.update(django_instance_to_entity( self.connection, self.model, [ x[0] for x in self.values], # Pass in the fields that were updated True, instance) ) # Make sure we keep all classes in the inheritence tree! if original_class: if result[POLYMODEL_CLASS_ATTRIBUTE] is not None: result[POLYMODEL_CLASS_ATTRIBUTE].extend(original_class) # Make sure we don't add duplicates else: result[POLYMODEL_CLASS_ATTRIBUTE] = original_class if POLYMODEL_CLASS_ATTRIBUTE in result: result[POLYMODEL_CLASS_ATTRIBUTE] = list(set(result[POLYMODEL_CLASS_ATTRIBUTE])) if not constraints.has_active_unique_constraints(self.model): # The fast path, no constraint checking datastore.Put(result) caching.add_entities_to_cache( self.model, [result], caching.CachingSituation.DATASTORE_PUT, self.namespace, skip_memcache=True, ) else: markers_to_acquire[:], markers_to_release[:] = constraints.get_markers_for_update( self.model, original, result ) datastore.Put(result) constraints.update_identifiers(markers_to_acquire, markers_to_release, result.key()) # If the datastore.Put() fails then the exception will only be raised when the # transaction applies, which means that we will still get to here and will still have # applied the marker changes (because they're in a nested, independent transaction). # Hence we set this flag to tell us that we got this far and that we should roll them back. rollback_markers[0] = True # If something dies between here and the `return` statement then we'll have stale unique markers try: # Update the cache before dealing with unique markers, as CachingSituation.DATASTORE_PUT # will only update the context cache caching.add_entities_to_cache( self.model, [result], caching.CachingSituation.DATASTORE_PUT, self.namespace, skip_memcache=True, ) except: # We ignore the exception because raising will rollback the transaction causing # an inconsistent state logger.exception("Unable to update the context cache") pass # Return true to indicate update success return True
def txn(): caching.remove_entities_from_cache_by_key([key], self.namespace) try: result = rpc.Get(key) except datastore_errors.EntityNotFoundError: # Return false to indicate update failure return False if ( isinstance(self.select.gae_query, (Query, meta_queries.UniqueQuery)) # ignore QueryByKeys and NoOpQuery and not utils.entity_matches_query(result, self.select.gae_query) ): # Due to eventual consistency they query may have returned an entity which no longer # matches the query return False original = copy.deepcopy(result) instance_kwargs = {field.attname: value for field, param, value in self.values} # Note: If you replace MockInstance with self.model, you'll find that some delete # tests fail in the test app. This is because any unspecified fields would then call # get_default (even though we aren't going to use them) which may run a query which # fails inside this transaction. Given as we are just using MockInstance so that we can # call django_instance_to_entities it on it with the subset of fields we pass in, # what we have is fine. meta = self.model._meta instance = MockInstance( _original=MockInstance(_meta=meta, **result), _meta=meta, **instance_kwargs ) # Convert the instance to an entity primary, descendents = django_instance_to_entities( self.connection, [x[0] for x in self.values], # Pass in the fields that were updated True, instance, model=self.model ) # Update the entity we read above with the new values result.update(primary) # Remove fields which have been marked to be unindexed for col in getattr(primary, "_properties_to_remove", []): if col in result: del result[col] # Make sure that any polymodel classes which were in the original entity are kept, # as django_instance_to_entities may have wiped them as well as added them. polymodel_classes = list(set( original.get(POLYMODEL_CLASS_ATTRIBUTE, []) + result.get(POLYMODEL_CLASS_ATTRIBUTE, []) )) if polymodel_classes: result[POLYMODEL_CLASS_ATTRIBUTE] = polymodel_classes def perform_insert(): """ Inserts result, and any descendents with their ancestor value set """ inserted_key = rpc.Put(result) if descendents: for i, descendent in enumerate(descendents): descendents[i] = Entity( descendent.kind(), parent=inserted_key, namespace=inserted_key.namespace(), id=descendent.key().id() or None, name=descendent.key().name() or None ) descendents[i].update(descendent) rpc.Put(descendents) if not constraints.has_active_unique_constraints(self.model): # The fast path, no constraint checking perform_insert() caching.add_entities_to_cache( self.model, [result], caching.CachingSituation.DATASTORE_PUT, self.namespace, skip_memcache=True, ) else: markers_to_acquire[:], markers_to_release[:] = constraints.get_markers_for_update( self.model, original, result ) perform_insert() constraints.update_identifiers(markers_to_acquire, markers_to_release, result.key()) # If the rpc.Put() fails then the exception will only be raised when the # transaction applies, which means that we will still get to here and will still have # applied the marker changes (because they're in a nested, independent transaction). # Hence we set this flag to tell us that we got this far and that we should roll them back. rollback_markers[0] = True # If something dies between here and the `return` statement then we'll have stale unique markers try: # Update the cache before dealing with unique markers, as CachingSituation.DATASTORE_PUT # will only update the context cache caching.add_entities_to_cache( self.model, [result], caching.CachingSituation.DATASTORE_PUT, self.namespace, skip_memcache=True, ) except: # We ignore the exception because raising will rollback the transaction causing # an inconsistent state logger.exception("Unable to update the context cache") pass # Return true to indicate update success return True
def execute(self): """ Ideally we'd just be able to tell appengine to delete all the entities which match the query, that would be nice wouldn't it? Except we can't. Firstly Delete() only accepts keys so we first have to execute a keys_only query to find the entities that match the query, then send those keys to Delete(), except it's not as easy as that either because the query might be eventually consistent and so we might delete entities which were updated in another request and no-longer match the query. Bugger. And then there might be constraints... in which case we need to grab the entity in its entirety, release any constraints and then delete the entity. And then there are polymodels (model inheritence) which means we might not even be deleting the entity after all, only deleting some of the fields from it. What we do then is do a keys_only query, then iterate the entities in batches of 25 (well _MAX_EG_PER_TXN), each entity in the batch has its polymodel fields wiped out (if necessary) and then we do either a PutAsync or DeleteAsync all inside a transaction. Oh, and we wipe out memcache and delete the constraints in an independent transaction. Things to improve: - Delete the constraints in a background thread. We don't need to wait for them, and really, we don't want the non-deletion of them to affect the deletion of the entity. Lingering markers are handled automatically they just case a small performance hit on write. - Check the entity matches the query still (there's a fixme there) """ from djangae.db.backends.appengine.indexing import indexers_for_model self.select.execute() constraints_enabled = constraints.has_active_unique_constraints(self.model) keys = [x.key() for x in self.select.results] def wipe_polymodel_from_entity(entity, db_table): """ Wipes out the fields associated with the specified polymodel table """ polymodel_value = entity.get('class', []) if polymodel_value and self.table_to_delete in polymodel_value: # Remove any local fields from this model from the entity model = utils.get_model_from_db_table(self.table_to_delete) for field in model._meta.local_fields: col = field.column if col in entity: del entity[col] # Then remove this model from the polymodel heirarchy polymodel_value.remove(self.table_to_delete) if polymodel_value: entity['class'] = polymodel_value @db.transactional(xg=True) def delete_batch(key_slice): entities = rpc.Get(key_slice) # FIXME: We need to make sure the entity still matches the query! # entities = (x for x in entities if utils.entity_matches_query(x, self.select.gae_query)) to_delete = [] to_update = [] updated_keys = [] # Go through the entities for entity in entities: if entity is None: continue wipe_polymodel_from_entity(entity, self.table_to_delete) if not entity.get('class'): to_delete.append(entity.key()) if constraints_enabled: constraints.release(self.model, entity) else: to_update.append(entity) updated_keys.append(entity.key()) rpc.DeleteAsync(to_delete) rpc.PutAsync(to_update) # Clean up any special index things that need to be cleaned for indexer in indexers_for_model(self.model): for key in to_delete: indexer.cleanup(key) caching.remove_entities_from_cache_by_key( updated_keys, self.namespace ) return len(updated_keys) deleted = 0 while keys: deleted += delete_batch(keys[:datastore_stub_util._MAX_EG_PER_TXN]) keys = keys[datastore_stub_util._MAX_EG_PER_TXN:] return deleted
def execute(self): check_existence = self.has_pk and not has_concrete_parents(self.model) def perform_insert(entities): results = [] for primary, descendents in entities: new_key = rpc.Put(primary) if descendents: for i, descendent in enumerate(descendents): descendents[i] = Entity( descendent.kind(), parent=new_key, namespace=new_key.namespace(), id=descendent.key().id() or None, name=descendent.key().name() or None ) descendents[i].update(descendent) rpc.Put(descendents) results.append(new_key) return results if not constraints.has_active_unique_constraints(self.model) and not check_existence: # Fast path, no constraint checks and no keys mean we can just do a normal rpc.Put # which isn't limited to 25 results = perform_insert(self.entities) # This modifies self.entities and sets their keys caching.add_entities_to_cache( self.model, [x[0] for x in self.entities], caching.CachingSituation.DATASTORE_GET_PUT, self.namespace, skip_memcache=True ) return results entity_group_count = len(self.entities) def insert_chunk(keys, entities): # Note that this is limited to a maximum of 25 entities. markers = [] @db.transactional(xg=entity_group_count > 1) def txn(): for key in keys: if check_existence and key is not None: if utils.key_exists(key): raise IntegrityError("Tried to INSERT with existing key") id_or_name = key.id_or_name() if isinstance(id_or_name, six.string_types) and id_or_name.startswith("__"): raise NotSupportedError("Datastore ids cannot start with __. Id was %s" % id_or_name) # Notify App Engine of any keys we're specifying intentionally reserve_id(key.kind(), key.id_or_name(), self.namespace) results = perform_insert(entities) for entity, _ in entities: markers.extend(constraints.acquire(self.model, entity)) caching.add_entities_to_cache( self.model, [x[0] for x in entities], caching.CachingSituation.DATASTORE_GET_PUT, self.namespace, skip_memcache=True ) return results try: return txn() except: # There are 3 possible reasons why we've ended up here: # 1. The rpc.Put() failed, but note that because it's a transaction, the # exception isn't raised until the END of the transaction block. # 2. Some of the markers were acquired, but then we hit a unique constraint # conflict and so the outer transaction was rolled back. # 3. Something else went wrong after we'd acquired markers, e.g. the # caching.add_entities_to_cache call got hit by a metaphorical bus. # In any of these cases, we (may) have acquired markers via (a) nested, independent # transaction(s), and so we need to release them again. constraints.release_markers(markers) raise # We can't really support this and maintain expected behaviour. If we chunked the insert and one of the # chunks fails it will mean some of the data would be saved and rather than trying to communicate that back # to the user it's better that they chunk the data themselves as they can deal with the failure better if entity_group_count > datastore_stub_util._MAX_EG_PER_TXN: raise BulkInsertError("Bulk inserts with unique constraints, or pre-defined keys are limited to {} instances on the datastore".format( datastore_stub_util._MAX_EG_PER_TXN )) return insert_chunk(self.included_keys, self.entities)
def txn(): caching.remove_entities_from_cache_by_key([key], self.namespace) try: result = datastore.Get(key) except datastore_errors.EntityNotFoundError: # Return false to indicate update failure return False if (isinstance( self.select.gae_query, (Query, UniqueQuery)) # ignore QueryByKeys and NoOpQuery and not utils.entity_matches_query(result, self.select.gae_query)): # Due to eventual consistency they query may have returned an entity which no longer # matches the query return False original = copy.deepcopy(result) instance_kwargs = { field.attname: value for field, param, value in self.values } # Note: If you replace MockInstance with self.model, you'll find that some delete # tests fail in the test app. This is because any unspecified fields would then call # get_default (even though we aren't going to use them) which may run a query which # fails inside this transaction. Given as we are just using MockInstance so that we can # call django_instance_to_entity it on it with the subset of fields we pass in, # what we have is fine. meta = self.model._meta instance = MockInstance(_original=MockInstance(_meta=meta, **result), _meta=meta, **instance_kwargs) # We need to add to the class attribute, rather than replace it! original_class = result.get(POLYMODEL_CLASS_ATTRIBUTE, []) # Update the entity we read above with the new values result.update( django_instance_to_entity( self.connection, self.model, [x[0] for x in self.values ], # Pass in the fields that were updated True, instance)) # Make sure we keep all classes in the inheritence tree! if original_class: if result[POLYMODEL_CLASS_ATTRIBUTE] is not None: result[POLYMODEL_CLASS_ATTRIBUTE].extend(original_class) # Make sure we don't add duplicates else: result[POLYMODEL_CLASS_ATTRIBUTE] = original_class if POLYMODEL_CLASS_ATTRIBUTE in result: result[POLYMODEL_CLASS_ATTRIBUTE] = list( set(result[POLYMODEL_CLASS_ATTRIBUTE])) if not constraints.has_active_unique_constraints(self.model): # The fast path, no constraint checking datastore.Put(result) caching.add_entities_to_cache( self.model, [result], caching.CachingSituation.DATASTORE_PUT, self.namespace, skip_memcache=True, ) else: markers_to_acquire[:], markers_to_release[:] = constraints.get_markers_for_update( self.model, original, result) datastore.Put(result) constraints.update_identifiers(markers_to_acquire, markers_to_release, result.key()) # If the datastore.Put() fails then the exception will only be raised when the # transaction applies, which means that we will still get to here and will still have # applied the marker changes (because they're in a nested, independent transaction). # Hence we set this flag to tell us that we got this far and that we should roll them back. rollback_markers[0] = True # If something dies between here and the `return` statement then we'll have stale unique markers try: # Update the cache before dealing with unique markers, as CachingSituation.DATASTORE_PUT # will only update the context cache caching.add_entities_to_cache( self.model, [result], caching.CachingSituation.DATASTORE_PUT, self.namespace, skip_memcache=True, ) except: # We ignore the exception because raising will rollback the transaction causing # an inconsistent state logging.exception("Unable to update the context cache") pass # Return true to indicate update success return True