def start(self, fdb_clusterfile): self._db = fdb.open(fdb_clusterfile) self._tornado_fdb = TornadoFDB(IOLoop.current()) ds_dir = fdb.directory.create_or_open(self._db, DS_ROOT) self._directory_cache = DirectoryCache(self._db, self._tornado_fdb, ds_dir) self._directory_cache.initialize() self._data_manager = DataManager(self._tornado_fdb, self._directory_cache) self._tx_manager = TransactionManager(self._db, self._tornado_fdb, self._directory_cache) self._index_manager = IndexManager(self._db, self._tornado_fdb, self._data_manager, self._directory_cache) self._index_manager.start() self._gc = GarbageCollector(self._db, self._tornado_fdb, self._data_manager, self._index_manager, self._tx_manager, self._directory_cache) self._gc.start() self._stats_buffer = StatsBuffer(self._db, self._tornado_fdb, self._directory_cache, self) self._stats_buffer.start()
class FDBDatastore(object): """ A datastore implementation that uses FoundationDB. """ def __init__(self): self._data_manager = None self._db = None self._scattered_allocator = ScatteredAllocator() self._tornado_fdb = None self._tx_manager = None self._gc = None self._index_manager = None self._stats_buffer = None def start(self, fdb_clusterfile): self._db = fdb.open(fdb_clusterfile) self._tornado_fdb = TornadoFDB(IOLoop.current()) ds_dir = fdb.directory.create_or_open(self._db, DS_ROOT) self._directory_cache = DirectoryCache(self._db, self._tornado_fdb, ds_dir) self._directory_cache.initialize() self._data_manager = DataManager(self._tornado_fdb, self._directory_cache) self._tx_manager = TransactionManager(self._db, self._tornado_fdb, self._directory_cache) self._index_manager = IndexManager(self._db, self._tornado_fdb, self._data_manager, self._directory_cache) self._index_manager.start() self._gc = GarbageCollector(self._db, self._tornado_fdb, self._data_manager, self._index_manager, self._tx_manager, self._directory_cache) self._gc.start() self._stats_buffer = StatsBuffer(self._db, self._tornado_fdb, self._directory_cache, self) self._stats_buffer.start() @gen.coroutine def dynamic_put(self, project_id, put_request, put_response, retries=5): # logger.debug(u'put_request:\n{}'.format(put_request)) project_id = decode_str(project_id) # TODO: Enforce max key length (100 elements). # Enforce max element size (1500 bytes). # Enforce max kind size (1500 bytes). # Enforce key name regex (reserved names match __.*__). if put_request.auto_id_policy() != put_request.CURRENT: raise BadRequest(u'Sequential allocator is not implemented') tr = self._db.create_transaction() if put_request.has_transaction(): yield self._tx_manager.log_puts(tr, project_id, put_request) writes = { self._collapsible_id(entity): (VersionEntry.from_key(entity.key()), VersionEntry.from_key(entity.key()), None) for entity in put_request.entity_list() } else: # Eliminate multiple puts to the same key. puts_by_key = { self._collapsible_id(entity): entity for entity in put_request.entity_list() } writes = yield { key: self._upsert(tr, entity) for key, entity in six.iteritems(puts_by_key) } old_entries = [ old_entry for old_entry, _, _ in six.itervalues(writes) if old_entry.present ] versionstamp_future = None if old_entries: versionstamp_future = tr.get_versionstamp() try: yield self._tornado_fdb.commit(tr, convert_exceptions=False) except fdb.FDBError as fdb_error: if fdb_error.code == FDBErrorCodes.NOT_COMMITTED: pass elif fdb_error.code == FDBErrorCodes.COMMIT_RESULT_UNKNOWN: logger.error('Unable to determine commit result. Retrying.') else: raise InternalError(fdb_error.description) retries -= 1 if retries < 0: raise InternalError(fdb_error.description) yield self.dynamic_put(project_id, put_request, put_response, retries) return if old_entries: self._gc.clear_later(old_entries, versionstamp_future.wait().value) mutations = [ (old_entry, new_entry, index_stats) for old_entry, new_entry, index_stats in six.itervalues(writes) if index_stats is not None ] IOLoop.current().spawn_callback(self._stats_buffer.update, project_id, mutations) for entity in put_request.entity_list(): write_entry = writes[self._collapsible_id(entity)][1] put_response.add_key().CopyFrom(write_entry.key) if write_entry.version != ABSENT_VERSION: put_response.add_version(write_entry.version) #logger.debug('put_response:\n{}'.format(put_response)) @gen.coroutine def dynamic_get(self, project_id, get_request, get_response): logger.debug(u'get_request:\n{}'.format(get_request)) project_id = decode_str(project_id) tr = self._db.create_transaction() read_versionstamp = None if get_request.has_transaction(): yield self._tx_manager.log_lookups(tr, project_id, get_request) # Ensure the GC hasn't cleaned up an entity written after the tx start. safe_read_stamps = yield [ self._gc.safe_read_versionstamp(tr, key) for key in get_request.key_list() ] safe_read_stamps = [ vs for vs in safe_read_stamps if vs is not None ] read_versionstamp = TransactionID.decode( get_request.transaction().handle())[1] if any(safe_versionstamp > read_versionstamp for safe_versionstamp in safe_read_stamps): raise BadRequest(u'The specified transaction has expired') futures = [] for key in get_request.key_list(): futures.append( self._data_manager.get_latest(tr, key, read_versionstamp, snapshot=True)) version_entries = yield futures # If this read is in a transaction, logging the RPC is a mutation. yield self._tornado_fdb.commit(tr) for entry in version_entries: response_entity = get_response.add_entity() response_entity.set_version(entry.version) if entry.has_entity: response_entity.mutable_entity().MergeFrom(entry.decoded) else: response_entity.mutable_key().MergeFrom(entry.key) logger.debug(u'fetched paths: {}'.format( [entry.path for entry in version_entries if entry.has_entity])) @gen.coroutine def dynamic_delete(self, project_id, delete_request, retries=5): logger.debug(u'delete_request:\n{}'.format(delete_request)) project_id = decode_str(project_id) tr = self._db.create_transaction() if delete_request.has_transaction(): yield self._tx_manager.log_deletes(tr, project_id, delete_request) deletes = [(VersionEntry.from_key(key), None, None) for key in delete_request.key_list()] else: # Eliminate multiple deletes to the same key. deletes_by_key = { key.Encode(): key for key in delete_request.key_list() } deletes = yield [ self._delete(tr, key) for key in six.itervalues(deletes_by_key) ] old_entries = [ old_entry for old_entry, _, _ in deletes if old_entry.present ] versionstamp_future = None if old_entries: versionstamp_future = tr.get_versionstamp() try: yield self._tornado_fdb.commit(tr, convert_exceptions=False) except fdb.FDBError as fdb_error: if fdb_error.code == FDBErrorCodes.NOT_COMMITTED: pass elif fdb_error.code == FDBErrorCodes.COMMIT_RESULT_UNKNOWN: logger.error('Unable to determine commit result. Retrying.') else: raise InternalError(fdb_error.description) retries -= 1 if retries < 0: raise InternalError(fdb_error.description) yield self.dynamic_delete(project_id, delete_request, retries) return if old_entries: self._gc.clear_later(old_entries, versionstamp_future.wait().value) mutations = [(old_entry, None, stats) for old_entry, _, stats in deletes if stats is not None] IOLoop.current().spawn_callback(self._stats_buffer.update, project_id, mutations) # TODO: Once the Cassandra backend is removed, populate a delete response. for old_entry, new_version, _ in deletes: logger.debug(u'new_version: {}'.format(new_version)) @gen.coroutine def _dynamic_run_query(self, query, query_result): logger.debug(u'query: {}'.format(query)) project_id = decode_str(query.app()) tr = self._db.create_transaction() read_versionstamp = None if query.has_transaction(): yield self._tx_manager.log_query(tr, project_id, query) # Ensure the GC hasn't cleaned up an entity written after the tx start. safe_versionstamp = yield self._gc.safe_read_versionstamp( tr, query.ancestor()) read_versionstamp = TransactionID.decode( query.transaction().handle())[1] if (safe_versionstamp is not None and safe_versionstamp > read_versionstamp): raise BadRequest(u'The specified transaction has expired') fetch_data = self._index_manager.include_data(query) rpc_limit, check_more_results = self._index_manager.rpc_limit(query) iterator = yield self._index_manager.get_iterator( tr, query, read_versionstamp) for prop_name in query.property_name_list(): prop_name = decode_str(prop_name) if prop_name not in iterator.prop_names: raise BadRequest( u'Projections on {} are not supported'.format(prop_name)) data_futures = [] if fetch_data else None unique_keys = set() results = [] entries_fetched = 0 skipped_results = 0 cursor = None while True: remainder = rpc_limit - entries_fetched iter_offset = max(query.offset() - entries_fetched, 0) entries, more_iterator_results = yield iterator.next_page() entries_fetched += len(entries) if not entries and more_iterator_results: continue if not entries and not more_iterator_results: break skipped_results += min(len(entries), iter_offset) suitable_entries = entries[iter_offset:remainder] if entries[:remainder]: cursor = entries[:remainder][-1] if not fetch_data and not query.keys_only(): results.extend( [entry.prop_result() for entry in suitable_entries]) continue for entry in suitable_entries: if entry.path in unique_keys: continue unique_keys.add(entry.path) if fetch_data: data_futures.append( self._data_manager.get_entry(tr, entry, snapshot=True)) else: results.append(entry.key_result()) if not more_iterator_results: break if fetch_data: entity_results = yield data_futures results = [entity.encoded for entity in entity_results] else: results = [result.Encode() for result in results] yield self._tornado_fdb.commit(tr) query_result.result_list().extend(results) # TODO: Figure out how ndb multi queries use compiled cursors. if query.compile(): ordered_props = tuple(prop_name for prop_name, _ in get_order_info(query) if prop_name != KEY_PROP) mutable_cursor = query_result.mutable_compiled_cursor() if cursor is not None: mutable_cursor.MergeFrom(cursor.cursor_result(ordered_props)) more_results = check_more_results and entries_fetched > rpc_limit query_result.set_more_results(more_results) if skipped_results: query_result.set_skipped_results(skipped_results) if query.keys_only(): query_result.set_keys_only(True) logger.debug(u'{} results'.format(len(query_result.result_list()))) @gen.coroutine def setup_transaction(self, project_id, is_xg): project_id = decode_str(project_id) txid = yield self._tx_manager.create(project_id) logger.debug(u'Started new transaction: {}:{}'.format( project_id, txid)) raise gen.Return(txid) @gen.coroutine def apply_txn_changes(self, project_id, txid, retries=5): logger.debug(u'Applying {}:{}'.format(project_id, txid)) project_id = decode_str(project_id) tr = self._db.create_transaction() read_versionstamp = TransactionID.decode(txid)[1] lookups, queried_groups, mutations = yield self._tx_manager.get_metadata( tr, project_id, txid) try: writes = yield self._apply_mutations(tr, project_id, queried_groups, mutations, lookups, read_versionstamp) finally: yield self._tx_manager.delete(tr, project_id, txid) versionstamp_future = None old_entries = [ old_entry for old_entry, _, _ in writes if old_entry.present ] if old_entries: versionstamp_future = tr.get_versionstamp() try: yield self._tornado_fdb.commit(tr, convert_exceptions=False) except fdb.FDBError as fdb_error: if fdb_error.code != FDBErrorCodes.NOT_COMMITTED: raise InternalError(fdb_error.description) retries -= 1 if retries < 0: raise InternalError(fdb_error.description) yield self.apply_txn_changes(project_id, txid, retries) return if old_entries: self._gc.clear_later(old_entries, versionstamp_future.wait().value) mutations = [(old_entry, FDBDatastore._filter_version(new_entry), index_stats) for old_entry, new_entry, index_stats in writes if index_stats is not None] IOLoop.current().spawn_callback(self._stats_buffer.update, project_id, mutations) logger.debug(u'Finished applying {}:{}'.format(project_id, txid)) @gen.coroutine def rollback_transaction(self, project_id, txid): project_id = decode_str(project_id) logger.debug(u'Rolling back {}:{}'.format(project_id, txid)) tr = self._db.create_transaction() yield self._tx_manager.delete(tr, project_id, txid) yield self._tornado_fdb.commit(tr) @gen.coroutine def update_composite_index(self, project_id, index): project_id = decode_str(project_id) yield self._index_manager.update_composite_index(project_id, index) @gen.coroutine def get_indexes(self, project_id): """ Retrieves list of indexes for a project. Args: project_id: A string specifying a project ID. Returns: A list of entity_pb.CompositeIndex objects. Raises: BadRequest if project_id is not found. """ tr = self._db.create_transaction() composite_index_manager = self._index_manager._composite_index_manager project_indexes = yield composite_index_manager.get_definitions( tr, project_id) raise gen.Return([index.to_pb() for index in project_indexes]) @gen.coroutine def add_indexes(self, project_id, indexes): """ Adds composite index definitions to a project. Only indexes that do not already exist will be created. Args: project_id: A string specifying a project ID. indexes: An iterable containing index definitions. """ tr = self._db.create_transaction() yield self._index_manager.merge(tr, project_id, indexes) yield self._tornado_fdb.commit(tr) @gen.coroutine def allocate_size(self, project_id, namespace, path_prefix, size, retries=5): tr = self._db.create_transaction() key = yield sequential_id_key(tr, project_id, namespace, path_prefix, self._directory_cache) old_max = yield old_max_id(tr, key, self._tornado_fdb) new_max = old_max + size # TODO: Check behavior on reaching max sequential ID. if new_max > _MAX_SEQUENTIAL_ID: raise BadRequest( u'There are not enough remaining IDs to satisfy request') tr[key] = SequentialIDsNamespace.encode_value(new_max) try: yield self._tornado_fdb.commit(tr) except fdb.FDBError as fdb_error: if fdb_error.code != FDBErrorCodes.NOT_COMMITTED: raise InternalError(fdb_error.description) retries -= 1 if retries < 0: raise InternalError(fdb_error.description) range_start, range_end = yield self.allocate_size( project_id, namespace, path_prefix, size, retries) raise gen.Return((range_start, range_end)) raise gen.Return((old_max + 1, new_max)) @gen.coroutine def allocate_max(self, project_id, namespace, path_prefix, new_max, retries=5): tr = self._db.create_transaction() key = yield sequential_id_key(tr, project_id, namespace, path_prefix, self._directory_cache) old_max = yield old_max_id(tr, key, self._tornado_fdb) if new_max > old_max: tr[key] = SequentialIDsNamespace.encode_value(new_max) try: yield self._tornado_fdb.commit(tr) except fdb.FDBError as fdb_error: if fdb_error.code != FDBErrorCodes.NOT_COMMITTED: raise InternalError(fdb_error.description) retries -= 1 if retries < 0: raise InternalError(fdb_error.description) range_start, range_end = yield self.allocate_max( project_id, namespace, path_prefix, new_max, retries) raise gen.Return((range_start, range_end)) raise gen.Return((old_max + 1, max(new_max, old_max))) @gen.coroutine def _upsert(self, tr, entity, old_entry_future=None): auto_id = self._auto_id(entity) if auto_id: # Avoid mutating the object given. new_entity = entity_pb.EntityProto() new_entity.CopyFrom(entity) entity = new_entity last_element = entity.key().path().element(-1) last_element.set_id(self._scattered_allocator.get_id()) if old_entry_future is None: old_entry = yield self._data_manager.get_latest(tr, entity.key()) else: old_entry = yield old_entry_future # If the datastore chose an ID, don't overwrite existing data. if auto_id and old_entry.present: self._scattered_allocator.invalidate() raise InternalError(u'The datastore chose an existing ID') new_version = next_entity_version(old_entry.version) encoded_entity = entity.Encode() yield self._data_manager.put(tr, entity.key(), new_version, encoded_entity) index_stats = yield self._index_manager.put_entries( tr, old_entry, entity) if old_entry.present: yield self._gc.index_deleted_version(tr, old_entry) new_entry = VersionEntry.from_key(entity.key()) new_entry._encoded_entity = encoded_entity new_entry._decoded_entity = entity new_entry.version = new_version raise gen.Return((old_entry, new_entry, index_stats)) @gen.coroutine def _delete(self, tr, key, old_entry_future=None): if old_entry_future is None: old_entry = yield self._data_manager.get_latest(tr, key) else: old_entry = yield old_entry_future if not old_entry.present: raise gen.Return((old_entry, None, None)) new_version = next_entity_version(old_entry.version) yield self._data_manager.put(tr, key, new_version, b'') index_stats = yield self._index_manager.put_entries(tr, old_entry, new_entity=None) if old_entry.present: yield self._gc.index_deleted_version(tr, old_entry) raise gen.Return((old_entry, new_version, index_stats)) @gen.coroutine def _apply_mutations(self, tr, project_id, queried_groups, mutations, lookups, read_versionstamp): # TODO: Check if transactional tasks count as a side effect. if not mutations: raise gen.Return([]) group_update_futures = [ self._data_manager.last_group_versionstamp(tr, project_id, namespace, group_path) for namespace, group_path in queried_groups ] # Index keys that require a full lookup rather than a versionstamp. require_data = set() for mutation in mutations: key = (mutation if isinstance(mutation, entity_pb.Reference) else mutation.key()) require_data.add(key.Encode()) # Start fetching versionstamps for lookups first to invalidate sooner. futures = {} for key in lookups: encoded_key = key.Encode() futures[encoded_key] = self._data_manager.get_latest( tr, key, include_data=encoded_key in require_data) group_updates = yield group_update_futures group_updates = [vs for vs in group_updates if vs is not None] if any(commit_vs > read_versionstamp for commit_vs in group_updates): raise ConcurrentModificationException( u'A queried group was modified after this transaction was started.' ) version_entries = yield [futures[key.Encode()] for key in lookups] if any(entry.present and entry.commit_versionstamp > read_versionstamp for entry in version_entries): raise ConcurrentModificationException( u'An entity was modified after this transaction was started.') # TODO: Check if this constraint is still needed. self._enforce_max_groups(mutations) # Apply mutations. mutation_futures = [] for mutation in self._collapse_mutations(mutations): if isinstance(mutation, entity_pb.Reference): old_entry_future = futures.get(mutation.Encode()) mutation_futures.append( self._delete(tr, mutation, old_entry_future)) else: old_entry_future = futures.get(mutation.key().Encode()) mutation_futures.append( self._upsert(tr, mutation, old_entry_future)) responses = yield mutation_futures raise gen.Return(responses) @staticmethod def _collapse_mutations(mutations): """ Selects the last mutation for each key as the one to apply. """ # TODO: For the v1 API, test if insert, update succeeds in mutation list. mutations_by_key = {} for mutation in mutations: if isinstance(mutation, entity_pb.Reference): key = mutation.Encode() else: key = FDBDatastore._collapsible_id(mutation) mutations_by_key[key] = mutation return tuple(mutation for mutation in six.itervalues(mutations_by_key)) @staticmethod def _collapsible_id(entity): """ The "collapsible" identity is the encoded key or the entity if an identity will be allocated later. """ if FDBDatastore._auto_id(entity): return id(entity) else: return entity.key().Encode() @staticmethod def _filter_version(entity): """ Filter out any entity that is actually a delete version. """ if isinstance(entity, (int, long)): return None else: return entity @staticmethod def _enforce_max_groups(mutations): """ Raises an exception if too many groups were modified. """ mutated_groups = set() for mutation in mutations: if isinstance(mutation, entity_pb.Reference): key = mutation else: key = mutation.key() namespace = decode_str(key.name_space()) flat_group = (namespace, ) + Path.flatten(key.path())[:2] mutated_groups.add(flat_group) if len(mutated_groups) > 25: raise BadRequest( u'Too many entity groups modified in transaction') @staticmethod def _auto_id(entity): """ Should perform auto identity allocation for entity. """ last_element = entity.key().path().element(-1) auto_id = False if not last_element.has_name(): auto_id = not (last_element.has_id() and last_element.id() != 0) return auto_id