Beispiel #1
0
    def execute(self):
        check_existence = self.has_pk and not has_concrete_parents(self.model)

        if not constraints.has_active_unique_constraints(self.model) and not check_existence:
            # Fast path, no constraint checks and no keys mean we can just do a normal datastore.Put
            # which isn't limited to 25
            results = datastore.Put(self.entities) # This modifies self.entities and sets their keys
            caching.add_entities_to_cache(
                self.model,
                self.entities,
                caching.CachingSituation.DATASTORE_GET_PUT,
                self.namespace,
                skip_memcache=True
            )
            return results

        def insert_chunk(keys, entities):
            # Note that this is limited to a maximum of 25 entities.
            markers = []
            @db.transactional(xg=len(entities) > 1)
            def txn():
                for key in keys:
                    if check_existence and key is not None:
                        if utils.key_exists(key):
                            raise IntegrityError("Tried to INSERT with existing key")

                        id_or_name = key.id_or_name()
                        if isinstance(id_or_name, basestring) and id_or_name.startswith("__"):
                            raise NotSupportedError("Datastore ids cannot start with __. Id was %s" % id_or_name)

                        # Notify App Engine of any keys we're specifying intentionally
                        reserve_id(key.kind(), key.id_or_name(), self.namespace)

                results = datastore.Put(entities)
                for entity in entities:
                    markers.extend(constraints.acquire(self.model, entity))

                caching.add_entities_to_cache(
                    self.model,
                    entities,
                    caching.CachingSituation.DATASTORE_GET_PUT,
                    self.namespace,
                    skip_memcache=True
                )

                return results

            try:
                return txn()
            except:
                # There are 3 possible reasons why we've ended up here:
                # 1. The datastore.Put() failed, but note that because it's a transaction, the
                #    exception isn't raised until the END of the transaction block.
                # 2. Some of the markers were acquired, but then we hit a unique constraint
                #    conflict and so the outer transaction was rolled back.
                # 3. Something else went wrong after we'd acquired markers, e.g. the
                #    caching.add_entities_to_cache call got hit by a metaphorical bus.
                # In any of these cases, we (may) have acquired markers via (a) nested, independent
                # transaction(s), and so we need to release them again.
                constraints.release_markers(markers)
                raise

        # We can't really support this and maintain expected behaviour. If we chunked the insert and one of the
        # chunks fails it will mean some of the data would be saved and rather than trying to communicate that back
        # to the user it's better that they chunk the data themselves as they can deal with the failure better
        if len(self.entities) > datastore_stub_util._MAX_EG_PER_TXN:
            raise BulkInsertError("Bulk inserts with unique constraints, or pre-defined keys are limited to {} instances on the datastore".format(
                datastore_stub_util._MAX_EG_PER_TXN
            ))

        return insert_chunk(self.included_keys, self.entities)
Beispiel #2
0
    def execute(self):
        """
            Ideally we'd just be able to tell appengine to delete all the entities
            which match the query, that would be nice wouldn't it?

            Except we can't. Firstly Delete() only accepts keys so we first have to
            execute a keys_only query to find the entities that match the query, then send
            those keys to Delete(), except it's not as easy as that either because the
            query might be eventually consistent and so we might delete entities which
            were updated in another request and no-longer match the query. Bugger.

            And then there might be constraints... in which case we need to grab the entity
            in its entirety, release any constraints and then delete the entity.

            And then there are polymodels (model inheritence) which means we might not even be
            deleting the entity after all, only deleting some of the fields from it.

            What we do then is do a keys_only query, then iterate the entities in batches of
            25 (well _MAX_EG_PER_TXN), each entity in the batch has its polymodel fields wiped out
            (if necessary) and then we do either a PutAsync or DeleteAsync all inside a transaction.

            Oh, and we wipe out memcache and delete the constraints in an independent transaction.

            Things to improve:

             - Delete the constraints in a background thread. We don't need to wait for them, and really,
             we don't want the non-deletion of them to affect the deletion of the entity. Lingering markers
             are handled automatically they just case a small performance hit on write.
             - Check the entity matches the query still (there's a fixme there)
        """

        self.select.execute()

        constraints_enabled = constraints.has_active_unique_constraints(self.model)
        keys = [x.key() for x in self.select.results]

        def wipe_polymodel_from_entity(entity, db_table):
            """
                Wipes out the fields associated with the specified polymodel table
            """
            polymodel_value = entity.get('class', [])
            if polymodel_value and self.table_to_delete in polymodel_value:
                # Remove any local fields from this model from the entity
                model = utils.get_model_from_db_table(self.table_to_delete)
                for field in model._meta.local_fields:
                    col = field.column
                    if col in entity:
                        del entity[col]

                # Then remove this model from the polymodel heirarchy
                polymodel_value.remove(self.table_to_delete)
                if polymodel_value:
                    entity['class'] = polymodel_value

        @db.transactional(xg=True)
        def delete_batch(key_slice):
            entities = datastore.Get(key_slice)

            #FIXME: We need to make sure the entity still matches the query!
#            entities = (x for x in entities if utils.entity_matches_query(x, self.select.gae_query))

            to_delete = []
            to_update = []
            updated_keys = []

            # Go through the entities
            for entity in entities:
                if entity is None:
                    continue

                wipe_polymodel_from_entity(entity, self.table_to_delete)
                if not entity.get('class'):
                    to_delete.append(entity)
                    constraints.release(self.model, entity)
                else:
                    to_update.append(entity)
                updated_keys.append(entity.key())

            datastore.DeleteAsync([x.key() for x in to_delete])
            datastore.PutAsync(to_update)

            caching.remove_entities_from_cache_by_key(
                updated_keys, self.namespace
            )

            return len(updated_keys)

        deleted = 0
        while keys:
            deleted += delete_batch(keys[:datastore_stub_util._MAX_EG_PER_TXN])
            keys = keys[datastore_stub_util._MAX_EG_PER_TXN:]

        return deleted
Beispiel #3
0
        def txn():
            caching.remove_entities_from_cache_by_key([key], self.namespace)

            try:
                result = datastore.Get(key)
            except datastore_errors.EntityNotFoundError:
                # Return false to indicate update failure
                return False

            if (
                isinstance(self.select.gae_query, (Query, UniqueQuery)) # ignore QueryByKeys and NoOpQuery
                and not utils.entity_matches_query(result, self.select.gae_query)
            ):
                # Due to eventual consistency they query may have returned an entity which no longer
                # matches the query
                return False

            original = copy.deepcopy(result)

            instance_kwargs = {field.attname:value for field, param, value in self.values}

            # Note: If you replace MockInstance with self.model, you'll find that some delete
            # tests fail in the test app. This is because any unspecified fields would then call
            # get_default (even though we aren't going to use them) which may run a query which
            # fails inside this transaction. Given as we are just using MockInstance so that we can
            # call django_instance_to_entity it on it with the subset of fields we pass in,
            # what we have is fine.
            meta = self.model._meta
            instance = MockInstance(
                _original=MockInstance(_meta=meta, **result),
                _meta=meta,
                **instance_kwargs
            )

            # We need to add to the class attribute, rather than replace it!
            original_class = result.get(POLYMODEL_CLASS_ATTRIBUTE, [])

            # Update the entity we read above with the new values
            result.update(django_instance_to_entity(
                self.connection, self.model,
                [ x[0] for x in self.values],  # Pass in the fields that were updated
                True, instance)
            )

            # Make sure we keep all classes in the inheritence tree!
            if original_class:
                if result[POLYMODEL_CLASS_ATTRIBUTE] is not None:
                    result[POLYMODEL_CLASS_ATTRIBUTE].extend(original_class)
                    # Make sure we don't add duplicates
                else:
                    result[POLYMODEL_CLASS_ATTRIBUTE] = original_class

            if POLYMODEL_CLASS_ATTRIBUTE in result:
                result[POLYMODEL_CLASS_ATTRIBUTE] = list(set(result[POLYMODEL_CLASS_ATTRIBUTE]))

            if not constraints.has_active_unique_constraints(self.model):
                # The fast path, no constraint checking
                datastore.Put(result)
                caching.add_entities_to_cache(
                    self.model,
                    [result],
                    caching.CachingSituation.DATASTORE_PUT,
                    self.namespace,
                    skip_memcache=True,
                )
            else:
                markers_to_acquire[:], markers_to_release[:] = constraints.get_markers_for_update(
                    self.model, original, result
                )
                datastore.Put(result)

                constraints.update_identifiers(markers_to_acquire, markers_to_release, result.key())

                # If the datastore.Put() fails then the exception will only be raised when the
                # transaction applies, which means that we will still get to here and will still have
                # applied the marker changes (because they're in a nested, independent transaction).
                # Hence we set this flag to tell us that we got this far and that we should roll them back.
                rollback_markers[0] = True
                # If something dies between here and the `return` statement then we'll have stale unique markers

                try:
                    # Update the cache before dealing with unique markers, as CachingSituation.DATASTORE_PUT
                    # will only update the context cache
                    caching.add_entities_to_cache(
                        self.model,
                        [result],
                        caching.CachingSituation.DATASTORE_PUT,
                        self.namespace,
                        skip_memcache=True,
                    )
                except:
                    # We ignore the exception because raising will rollback the transaction causing
                    # an inconsistent state
                    logger.exception("Unable to update the context cache")
                    pass

            # Return true to indicate update success
            return True
Beispiel #4
0
        def txn():
            caching.remove_entities_from_cache_by_key([key], self.namespace)

            try:
                result = rpc.Get(key)
            except datastore_errors.EntityNotFoundError:
                # Return false to indicate update failure
                return False

            if (
                isinstance(self.select.gae_query, (Query, meta_queries.UniqueQuery)) # ignore QueryByKeys and NoOpQuery
                and not utils.entity_matches_query(result, self.select.gae_query)
            ):
                # Due to eventual consistency they query may have returned an entity which no longer
                # matches the query
                return False

            original = copy.deepcopy(result)

            instance_kwargs = {field.attname: value for field, param, value in self.values}

            # Note: If you replace MockInstance with self.model, you'll find that some delete
            # tests fail in the test app. This is because any unspecified fields would then call
            # get_default (even though we aren't going to use them) which may run a query which
            # fails inside this transaction. Given as we are just using MockInstance so that we can
            # call django_instance_to_entities it on it with the subset of fields we pass in,
            # what we have is fine.
            meta = self.model._meta
            instance = MockInstance(
                _original=MockInstance(_meta=meta, **result),
                _meta=meta,
                **instance_kwargs
            )

            # Convert the instance to an entity
            primary, descendents = django_instance_to_entities(
                self.connection,
                [x[0] for x in self.values],  # Pass in the fields that were updated
                True, instance,
                model=self.model
            )

            # Update the entity we read above with the new values
            result.update(primary)

            # Remove fields which have been marked to be unindexed
            for col in getattr(primary, "_properties_to_remove", []):
                if col in result:
                    del result[col]

            # Make sure that any polymodel classes which were in the original entity are kept,
            # as django_instance_to_entities may have wiped them as well as added them.
            polymodel_classes = list(set(
                original.get(POLYMODEL_CLASS_ATTRIBUTE, []) + result.get(POLYMODEL_CLASS_ATTRIBUTE, [])
            ))
            if polymodel_classes:
                result[POLYMODEL_CLASS_ATTRIBUTE] = polymodel_classes

            def perform_insert():
                """
                    Inserts result, and any descendents with their ancestor
                    value set
                """
                inserted_key = rpc.Put(result)
                if descendents:
                    for i, descendent in enumerate(descendents):
                        descendents[i] = Entity(
                            descendent.kind(),
                            parent=inserted_key,
                            namespace=inserted_key.namespace(),
                            id=descendent.key().id() or None,
                            name=descendent.key().name() or None
                        )
                        descendents[i].update(descendent)
                    rpc.Put(descendents)

            if not constraints.has_active_unique_constraints(self.model):
                # The fast path, no constraint checking
                perform_insert()

                caching.add_entities_to_cache(
                    self.model,
                    [result],
                    caching.CachingSituation.DATASTORE_PUT,
                    self.namespace,
                    skip_memcache=True,
                )
            else:
                markers_to_acquire[:], markers_to_release[:] = constraints.get_markers_for_update(
                    self.model, original, result
                )

                perform_insert()

                constraints.update_identifiers(markers_to_acquire, markers_to_release, result.key())

                # If the rpc.Put() fails then the exception will only be raised when the
                # transaction applies, which means that we will still get to here and will still have
                # applied the marker changes (because they're in a nested, independent transaction).
                # Hence we set this flag to tell us that we got this far and that we should roll them back.
                rollback_markers[0] = True
                # If something dies between here and the `return` statement then we'll have stale unique markers

                try:
                    # Update the cache before dealing with unique markers, as CachingSituation.DATASTORE_PUT
                    # will only update the context cache
                    caching.add_entities_to_cache(
                        self.model,
                        [result],
                        caching.CachingSituation.DATASTORE_PUT,
                        self.namespace,
                        skip_memcache=True,
                    )
                except:
                    # We ignore the exception because raising will rollback the transaction causing
                    # an inconsistent state
                    logger.exception("Unable to update the context cache")
                    pass

            # Return true to indicate update success
            return True
Beispiel #5
0
    def execute(self):
        """
            Ideally we'd just be able to tell appengine to delete all the entities
            which match the query, that would be nice wouldn't it?

            Except we can't. Firstly Delete() only accepts keys so we first have to
            execute a keys_only query to find the entities that match the query, then send
            those keys to Delete(), except it's not as easy as that either because the
            query might be eventually consistent and so we might delete entities which
            were updated in another request and no-longer match the query. Bugger.

            And then there might be constraints... in which case we need to grab the entity
            in its entirety, release any constraints and then delete the entity.

            And then there are polymodels (model inheritence) which means we might not even be
            deleting the entity after all, only deleting some of the fields from it.

            What we do then is do a keys_only query, then iterate the entities in batches of
            25 (well _MAX_EG_PER_TXN), each entity in the batch has its polymodel fields wiped out
            (if necessary) and then we do either a PutAsync or DeleteAsync all inside a transaction.

            Oh, and we wipe out memcache and delete the constraints in an independent transaction.

            Things to improve:

             - Delete the constraints in a background thread. We don't need to wait for them, and
             really, we don't want the non-deletion of them to affect the deletion of the entity.
             Lingering markers are handled automatically they just case a small performance hit on
             write.
             - Check the entity matches the query still (there's a fixme there)
        """
        from djangae.db.backends.appengine.indexing import indexers_for_model

        self.select.execute()

        constraints_enabled = constraints.has_active_unique_constraints(self.model)
        keys = [x.key() for x in self.select.results]

        def wipe_polymodel_from_entity(entity, db_table):
            """
                Wipes out the fields associated with the specified polymodel table
            """
            polymodel_value = entity.get('class', [])
            if polymodel_value and self.table_to_delete in polymodel_value:
                # Remove any local fields from this model from the entity
                model = utils.get_model_from_db_table(self.table_to_delete)
                for field in model._meta.local_fields:
                    col = field.column
                    if col in entity:
                        del entity[col]

                # Then remove this model from the polymodel heirarchy
                polymodel_value.remove(self.table_to_delete)
                if polymodel_value:
                    entity['class'] = polymodel_value

        @db.transactional(xg=True)
        def delete_batch(key_slice):
            entities = rpc.Get(key_slice)

            # FIXME: We need to make sure the entity still matches the query!
#            entities = (x for x in entities if utils.entity_matches_query(x, self.select.gae_query))

            to_delete = []
            to_update = []
            updated_keys = []

            # Go through the entities
            for entity in entities:
                if entity is None:
                    continue

                wipe_polymodel_from_entity(entity, self.table_to_delete)
                if not entity.get('class'):
                    to_delete.append(entity.key())
                    if constraints_enabled:
                        constraints.release(self.model, entity)
                else:
                    to_update.append(entity)
                updated_keys.append(entity.key())

            rpc.DeleteAsync(to_delete)
            rpc.PutAsync(to_update)

            # Clean up any special index things that need to be cleaned
            for indexer in indexers_for_model(self.model):
                for key in to_delete:
                    indexer.cleanup(key)

            caching.remove_entities_from_cache_by_key(
                updated_keys, self.namespace
            )

            return len(updated_keys)

        deleted = 0
        while keys:
            deleted += delete_batch(keys[:datastore_stub_util._MAX_EG_PER_TXN])
            keys = keys[datastore_stub_util._MAX_EG_PER_TXN:]

        return deleted
Beispiel #6
0
    def execute(self):
        check_existence = self.has_pk and not has_concrete_parents(self.model)

        def perform_insert(entities):
            results = []
            for primary, descendents in entities:
                new_key = rpc.Put(primary)
                if descendents:
                    for i, descendent in enumerate(descendents):
                        descendents[i] = Entity(
                            descendent.kind(),
                            parent=new_key,
                            namespace=new_key.namespace(),
                            id=descendent.key().id() or None,
                            name=descendent.key().name() or None
                        )
                        descendents[i].update(descendent)

                    rpc.Put(descendents)
                results.append(new_key)
            return results

        if not constraints.has_active_unique_constraints(self.model) and not check_existence:
            # Fast path, no constraint checks and no keys mean we can just do a normal rpc.Put
            # which isn't limited to 25
            results = perform_insert(self.entities)  # This modifies self.entities and sets their keys
            caching.add_entities_to_cache(
                self.model,
                [x[0] for x in self.entities],
                caching.CachingSituation.DATASTORE_GET_PUT,
                self.namespace,
                skip_memcache=True
            )
            return results

        entity_group_count = len(self.entities)

        def insert_chunk(keys, entities):
            # Note that this is limited to a maximum of 25 entities.
            markers = []

            @db.transactional(xg=entity_group_count > 1)
            def txn():
                for key in keys:
                    if check_existence and key is not None:
                        if utils.key_exists(key):
                            raise IntegrityError("Tried to INSERT with existing key")

                        id_or_name = key.id_or_name()
                        if isinstance(id_or_name, six.string_types) and id_or_name.startswith("__"):
                            raise NotSupportedError("Datastore ids cannot start with __. Id was %s" % id_or_name)

                        # Notify App Engine of any keys we're specifying intentionally
                        reserve_id(key.kind(), key.id_or_name(), self.namespace)

                results = perform_insert(entities)

                for entity, _ in entities:
                    markers.extend(constraints.acquire(self.model, entity))

                caching.add_entities_to_cache(
                    self.model,
                    [x[0] for x in entities],
                    caching.CachingSituation.DATASTORE_GET_PUT,
                    self.namespace,
                    skip_memcache=True
                )

                return results

            try:
                return txn()
            except:
                # There are 3 possible reasons why we've ended up here:
                # 1. The rpc.Put() failed, but note that because it's a transaction, the
                #    exception isn't raised until the END of the transaction block.
                # 2. Some of the markers were acquired, but then we hit a unique constraint
                #    conflict and so the outer transaction was rolled back.
                # 3. Something else went wrong after we'd acquired markers, e.g. the
                #    caching.add_entities_to_cache call got hit by a metaphorical bus.
                # In any of these cases, we (may) have acquired markers via (a) nested, independent
                # transaction(s), and so we need to release them again.
                constraints.release_markers(markers)
                raise

        # We can't really support this and maintain expected behaviour. If we chunked the insert and one of the
        # chunks fails it will mean some of the data would be saved and rather than trying to communicate that back
        # to the user it's better that they chunk the data themselves as they can deal with the failure better
        if entity_group_count > datastore_stub_util._MAX_EG_PER_TXN:
            raise BulkInsertError("Bulk inserts with unique constraints, or pre-defined keys are limited to {} instances on the datastore".format(
                datastore_stub_util._MAX_EG_PER_TXN
            ))

        return insert_chunk(self.included_keys, self.entities)
Beispiel #7
0
        def txn():
            caching.remove_entities_from_cache_by_key([key], self.namespace)

            try:
                result = datastore.Get(key)
            except datastore_errors.EntityNotFoundError:
                # Return false to indicate update failure
                return False

            if (isinstance(
                    self.select.gae_query,
                (Query, UniqueQuery))  # ignore QueryByKeys and NoOpQuery
                    and not utils.entity_matches_query(result,
                                                       self.select.gae_query)):
                # Due to eventual consistency they query may have returned an entity which no longer
                # matches the query
                return False

            original = copy.deepcopy(result)

            instance_kwargs = {
                field.attname: value
                for field, param, value in self.values
            }

            # Note: If you replace MockInstance with self.model, you'll find that some delete
            # tests fail in the test app. This is because any unspecified fields would then call
            # get_default (even though we aren't going to use them) which may run a query which
            # fails inside this transaction. Given as we are just using MockInstance so that we can
            # call django_instance_to_entity it on it with the subset of fields we pass in,
            # what we have is fine.
            meta = self.model._meta
            instance = MockInstance(_original=MockInstance(_meta=meta,
                                                           **result),
                                    _meta=meta,
                                    **instance_kwargs)

            # We need to add to the class attribute, rather than replace it!
            original_class = result.get(POLYMODEL_CLASS_ATTRIBUTE, [])

            # Update the entity we read above with the new values
            result.update(
                django_instance_to_entity(
                    self.connection,
                    self.model,
                    [x[0] for x in self.values
                     ],  # Pass in the fields that were updated
                    True,
                    instance))

            # Make sure we keep all classes in the inheritence tree!
            if original_class:
                if result[POLYMODEL_CLASS_ATTRIBUTE] is not None:
                    result[POLYMODEL_CLASS_ATTRIBUTE].extend(original_class)
                    # Make sure we don't add duplicates
                else:
                    result[POLYMODEL_CLASS_ATTRIBUTE] = original_class

            if POLYMODEL_CLASS_ATTRIBUTE in result:
                result[POLYMODEL_CLASS_ATTRIBUTE] = list(
                    set(result[POLYMODEL_CLASS_ATTRIBUTE]))

            if not constraints.has_active_unique_constraints(self.model):
                # The fast path, no constraint checking
                datastore.Put(result)
                caching.add_entities_to_cache(
                    self.model,
                    [result],
                    caching.CachingSituation.DATASTORE_PUT,
                    self.namespace,
                    skip_memcache=True,
                )
            else:
                markers_to_acquire[:], markers_to_release[:] = constraints.get_markers_for_update(
                    self.model, original, result)
                datastore.Put(result)

                constraints.update_identifiers(markers_to_acquire,
                                               markers_to_release,
                                               result.key())

                # If the datastore.Put() fails then the exception will only be raised when the
                # transaction applies, which means that we will still get to here and will still have
                # applied the marker changes (because they're in a nested, independent transaction).
                # Hence we set this flag to tell us that we got this far and that we should roll them back.
                rollback_markers[0] = True
                # If something dies between here and the `return` statement then we'll have stale unique markers

                try:
                    # Update the cache before dealing with unique markers, as CachingSituation.DATASTORE_PUT
                    # will only update the context cache
                    caching.add_entities_to_cache(
                        self.model,
                        [result],
                        caching.CachingSituation.DATASTORE_PUT,
                        self.namespace,
                        skip_memcache=True,
                    )
                except:
                    # We ignore the exception because raising will rollback the transaction causing
                    # an inconsistent state
                    logging.exception("Unable to update the context cache")
                    pass

            # Return true to indicate update success
            return True