def from_filter(cls, db, project_id, namespace, kind, pb_filter): """ Creates a new RangeIterator from a filter. Args: db: A database interface object. project_id: A string specifying a project ID. namespace: A string specifying a namespace. kind: A string specifying an entity kind. pb_filter: A datastore_pb.Query_Filter object. Raises: BadRequest if the filter cannot be used to create the range. """ # Make sure this filter can be used for a merge join. if pb_filter.op() != Query_Filter.EQUAL: raise BadRequest('Invalid filter for merge join ' '(op must be equal): {}'.format(pb_filter)) if pb_filter.property_size() != 1: raise BadRequest('Invalid filter for merge join ' '(multiple properties): {}'.format(pb_filter)) property_ = pb_filter.property(0) if property_.name() == '__key__': raise BadRequest('Invalid property for merge join ' '(must not be __key__): {}'.format(property_)) return cls(db, project_id, namespace, kind, property_.name(), property_.value())
def _update_parts(self, parts, index, new_value): if index < 0: index = self._expected_parts + index # Ensure fields are set in order. if len(parts) < index: raise BadRequest(u'Invalid filter combination') if len(parts) == index: parts.append(new_value) return if new_value == parts[index]: return # If this field has already been set, ensure the new range is smaller. candidate = parts[:index] + [new_value] if parts is self._start_parts: if b''.join(candidate) < b''.join(parts): raise BadRequest(u'Invalid filter combination') self._start_parts = candidate elif parts is self._stop_parts: if b''.join(candidate) > b''.join(parts): raise BadRequest(u'Invalid filter combination') self._stop_parts = candidate
def apply_prop_filter(self, prop_name, op, value): index, direction = self._prop_details(prop_name) prop_reverse = direction == Query_Order.DESCENDING encoded_value = encode_value(value, prop_reverse) if op == Query_Filter.EQUAL: self._set_start(index, encoded_value) self._set_stop(index, encoded_value) self._set_stop(index + 1, b'\xFF') return if (op == Query_Filter.GREATER_THAN_OR_EQUAL and not prop_reverse or op == Query_Filter.LESS_THAN_OR_EQUAL and prop_reverse): self._set_start(index, encoded_value) elif (op == Query_Filter.GREATER_THAN and not prop_reverse or op == Query_Filter.LESS_THAN and prop_reverse): self._set_start(index, encoded_value + b'\xFF') elif (op == Query_Filter.LESS_THAN_OR_EQUAL and not prop_reverse or op == Query_Filter.GREATER_THAN_OR_EQUAL and prop_reverse): self._set_stop(index, encoded_value) self._set_stop(index + 1, b'\xFF') elif (op == Query_Filter.LESS_THAN and not prop_reverse or op == Query_Filter.GREATER_THAN and prop_reverse): self._set_stop(index, encoded_value) else: raise BadRequest(u'Unexpected filter operation')
def restrict_to_path(self, path): """ Narrows the range to a specific entity path. Args: path: An entity_pb.Path object. """ start_key = self.prefix + str(encode_index_pb(path)) end_key = ''.join([start_key, TERMINATING_STRING]) if start_key < self._range[0] or end_key > self._range[-1]: raise BadRequest('Restriction must be within range') if self._cursor.key > end_key: raise BadRequest('Cursor already exceeds new range') self._range = (start_key, end_key) self._cursor.key = max(start_key, self._cursor.key)
def allocate_size(self, project_id, namespace, path_prefix, size, retries=5): tr = self._db.create_transaction() key = yield sequential_id_key(tr, project_id, namespace, path_prefix, self._directory_cache) old_max = yield old_max_id(tr, key, self._tornado_fdb) new_max = old_max + size # TODO: Check behavior on reaching max sequential ID. if new_max > _MAX_SEQUENTIAL_ID: raise BadRequest( u'There are not enough remaining IDs to satisfy request') tr[key] = SequentialIDsNamespace.encode_value(new_max) try: yield self._tornado_fdb.commit(tr) except fdb.FDBError as fdb_error: if fdb_error.code != FDBErrorCodes.NOT_COMMITTED: raise InternalError(fdb_error.description) retries -= 1 if retries < 0: raise InternalError(fdb_error.description) range_start, range_end = yield self.allocate_size( project_id, namespace, path_prefix, size, retries) raise gen.Return((range_start, range_end)) raise gen.Return((old_max + 1, new_max))
def apply_path_filter(self, op, path, ancestor_path=()): if not isinstance(path, tuple): path = Path.flatten(path) remaining_path = path[len(ancestor_path):] if self._ancestor else path if not remaining_path: raise InternalError(u'Path filter must be within ancestor') start = Path.pack(remaining_path, omit_terminator=True) # Since the commit versionstamp could potentially start with 0xFF, this # selection scans up to the next possible path value. stop = start + six.int2byte(Path.MIN_ID_MARKER) index = -2 if op == Query_Filter.EQUAL: self._set_start(index, start) self._set_stop(index, stop) self._set_stop(index + 1, b'\xFF') return if op == Query_Filter.GREATER_THAN_OR_EQUAL: self._set_start(index, start) elif op == Query_Filter.GREATER_THAN: self._set_start(index, stop) elif op == Query_Filter.LESS_THAN_OR_EQUAL: self._set_stop(index, stop) elif op == Query_Filter.LESS_THAN: self._set_stop(index, start) else: raise BadRequest(u'Unexpected filter operation')
def encode_id_or_name(cls, id_or_name, reverse=False): if isinstance(id_or_name, six.text_type): name_marker = encode_marker(cls.NAME_MARKER, reverse) return Text.encode(id_or_name, name_marker, reverse) elif isinstance(id_or_name, int): return Int64.encode(id_or_name, reverse) else: raise BadRequest(u'Invalid path element type')
def log_query(self, tr, project_id, query): txid = query.transaction().handle() namespace = decode_str(query.name_space()) if not query.has_ancestor(): raise BadRequest(u'Queries in a transaction must specify an ancestor') tx_dir = yield self._tx_metadata(tr, project_id) tr[tx_dir.encode_query_key(txid, namespace, query.ancestor().path())] = b''
def group_filters(query): filter_props = [] for query_filter in query.filter_list(): if query_filter.property_size() != 1: raise BadRequest(u'Each filter must have exactly one property') prop = query_filter.property(0) prop_name = decode_str(prop.name()) filter_info = (query_filter.op(), prop.value()) if filter_props and filter_props[-1].name == prop_name: filter_props[-1].filters.append(filter_info) else: filter_props.append(FilterProperty(prop_name, [filter_info])) # Since the filter list can come in any order, put inequality filters last. inequality_index = None for index, filter_prop in enumerate(filter_props): if not filter_prop.equality: inequality_index = index break if inequality_index is not None: inequality_prop = filter_props.pop(inequality_index) filter_props.append(inequality_prop) # Put key filters last. key_index = None for index, filter_prop in enumerate(filter_props): if filter_prop.name == KEY_PROP: key_index = index break if key_index is not None: key_prop = filter_props.pop(key_index) filter_props.append(key_prop) for filter_prop in filter_props[:-1]: if filter_prop.name == KEY_PROP: raise BadRequest( u'Only the last filter property can be on {}'.format(KEY_PROP)) if not filter_prop.equality: raise BadRequest( u'All but the last property must be equality filters') return tuple(filter_props)
def _encode_path(pb): """ Takes a protocol buffer and returns the encoded path. """ path = [] for e in pb.element_list(): if e.has_name(): key_id = e.name() elif e.has_id(): key_id = str(e.id()).zfill(ID_KEY_LENGTH) else: raise BadRequest('Entity path must contain name or ID') if ID_SEPARATOR in e.type(): raise BadRequest('Kind names must not include ":"') path.append(ID_SEPARATOR.join([e.type(), key_id])) val = dbconstants.KIND_SEPARATOR.join(path) val += dbconstants.KIND_SEPARATOR return val
def _get_perfect_index(self, tr, query): project_id = decode_str(query.app()) namespace = decode_str(query.name_space()) filter_props = group_filters(query) order_info = get_order_info(query) prop_names = [filter_prop.name for filter_prop in filter_props] prop_names.extend([ prop_name for prop_name, _ in order_info if prop_name not in prop_names ]) prop_names.extend([ decode_str(prop_name) for prop_name in query.property_name_list() if prop_name not in prop_names ]) if not query.has_kind(): if not all(prop_name == KEY_PROP for prop_name in prop_names): raise BadRequest( u'kind must be specified when filtering or ordering ' u'properties other than __key__') kindless_index = yield self._kindless_index( tr, project_id, namespace) raise gen.Return(kindless_index) kind = decode_str(query.kind()) if all(prop_name == KEY_PROP for prop_name in prop_names): kind_index = yield self._kind_index(tr, project_id, namespace, kind) raise gen.Return(kind_index) if sum(prop_name != KEY_PROP for prop_name in prop_names) == 1: prop_name = next(prop_name for prop_name in prop_names if prop_name != KEY_PROP) ordered_prop = prop_name in [ order_name for order_name, _ in order_info ] if not query.has_ancestor() or not ordered_prop: single_prop_index = yield self._single_prop_index( tr, project_id, namespace, decode_str(query.kind()), prop_name) raise gen.Return(single_prop_index) queryable = [ index.to_pb() for index in (yield self._composite_index_manager.get_definitions( tr, project_id)) if index.ready ] index_pb = FindIndexToUse(query, queryable) if index_pb is not None: composite_index = yield self._composite_index( tr, project_id, index_pb.id(), namespace) raise gen.Return(composite_index)
def encode_element(element): """ Converts a path element protobuf object to a tuple. """ if element.has_id(): id_or_name = int(element.id()) elif element.has_name(): id_or_name = decode_str(element.name()) else: raise BadRequest( u'All path elements must either have a name or ID') return decode_str(element.type()), id_or_name
def get_metadata(self, tr, project_id, txid): tx_dir = yield self._tx_metadata(tr, project_id) results = yield ResultIterator(tr, self._tornado_fdb, tx_dir.get_txid_slice(txid)).list() scatter_val, tx_start_versionstamp = TransactionID.decode(txid) if (not results or results[0].key != tx_dir.encode_start_key(scatter_val, tx_start_versionstamp)): raise BadRequest(u'Transaction not found') raise gen.Return(tx_dir.decode_metadata(txid, results[1:]))
def include_data(self, query): if query.keys_only() and query.property_name_list(): raise BadRequest( u'A keys-only query cannot include a property name list') if query.keys_only(): return False if not query.property_name_list(): return True return False
def set_cursor(self, path, inclusive): """ Changes the range's cursor position. Args: path: An entity_pb.Path object. inclusive: A boolean specifying that the next result can include the given path. Raises: BadRequest if unable to set the cursor to the given path. """ range_start, range_end = self._range cursor = Cursor(self.prefix + str(encode_index_pb(path)), inclusive) if cursor.key < self._cursor.key: raise BadRequest('Cursor cannot be moved backwards ' '({} < {})'.format(repr(cursor.key), repr(self._cursor.key))) if cursor.key < range_start or cursor.key > range_end: raise BadRequest('Cursor outside range: {}'.format(self._range)) self._cursor = cursor
def _enforce_max_groups(mutations): """ Raises an exception if too many groups were modified. """ mutated_groups = set() for mutation in mutations: if isinstance(mutation, entity_pb.Reference): key = mutation else: key = mutation.key() namespace = decode_str(key.name_space()) flat_group = (namespace, ) + Path.flatten(key.path())[:2] mutated_groups.add(flat_group) if len(mutated_groups) > 25: raise BadRequest( u'Too many entity groups modified in transaction')
def dynamic_get(self, project_id, get_request, get_response): logger.debug(u'get_request:\n{}'.format(get_request)) project_id = decode_str(project_id) tr = self._db.create_transaction() read_versionstamp = None if get_request.has_transaction(): yield self._tx_manager.log_lookups(tr, project_id, get_request) # Ensure the GC hasn't cleaned up an entity written after the tx start. safe_read_stamps = yield [ self._gc.safe_read_versionstamp(tr, key) for key in get_request.key_list() ] safe_read_stamps = [ vs for vs in safe_read_stamps if vs is not None ] read_versionstamp = TransactionID.decode( get_request.transaction().handle())[1] if any(safe_versionstamp > read_versionstamp for safe_versionstamp in safe_read_stamps): raise BadRequest(u'The specified transaction has expired') futures = [] for key in get_request.key_list(): futures.append( self._data_manager.get_latest(tr, key, read_versionstamp, snapshot=True)) version_entries = yield futures # If this read is in a transaction, logging the RPC is a mutation. yield self._tornado_fdb.commit(tr) for entry in version_entries: response_entity = get_response.add_entity() response_entity.set_version(entry.version) if entry.has_entity: response_entity.mutable_entity().MergeFrom(entry.decoded) else: response_entity.mutable_key().MergeFrom(entry.key) logger.debug(u'fetched paths: {}'.format( [entry.path for entry in version_entries if entry.has_entity]))
def _encode_entity_len(self, encoded_entity): if len(encoded_entity) > MAX_ENTITY_SIZE: raise BadRequest(u'Entity exceeds maximum size') return Int64.encode_bare(len(encoded_entity), self._ENTITY_LEN_SIZE)
def get_iterator(self, tr, query, read_versionstamp=None): project_id = decode_str(query.app()) namespace = decode_str(query.name_space()) filter_props = group_filters(query) ancestor_path = tuple() if query.has_ancestor(): ancestor_path = Path.flatten(query.ancestor().path()) start_cursor = None if query.has_compiled_cursor(): start_cursor = ListCursor(query)._GetLastResult() end_cursor = None if query.has_end_compiled_cursor(): end_compiled = query.end_compiled_cursor() end_cursor = ListCursor(query)._DecodeCompiledCursor( end_compiled)[0] rpc_limit, check_more_results = self.rpc_limit(query) fetch_limit = rpc_limit if check_more_results: fetch_limit += 1 if query.has_kind() and query.kind() == u'__namespace__': project_dir = yield self._directory_cache.get(tr, (project_id, )) raise gen.Return( NamespaceIterator(tr, self._tornado_fdb, project_dir)) elif query.has_kind() and query.kind() == u'__kind__': project_dir = yield self._directory_cache.get(tr, (project_id, )) raise gen.Return( KindIterator(tr, self._tornado_fdb, project_dir, namespace)) elif query.has_kind() and query.kind() == u'__property__': project_dir = yield self._directory_cache.get(tr, (project_id, )) raise gen.Return( PropertyIterator(tr, self._tornado_fdb, project_dir, namespace)) index = yield self._get_perfect_index(tr, query) reverse = get_scan_direction(query, index) == Query_Order.DESCENDING if index is None: if not all(prop.equality for prop in filter_props): raise BadRequest(u'Query not supported') indexes = [] equality_props = [ filter_prop for filter_prop in filter_props if filter_prop.name == KEY_PROP ] if len(equality_props) > 1: raise BadRequest(u'Only one equality key filter is supported') equality_prop = next(iter(equality_props), None) other_props = [ filter_prop for filter_prop in filter_props if filter_prop.name != KEY_PROP ] for filter_prop in other_props: index = yield self._single_prop_index(tr, project_id, namespace, decode_str(query.kind()), filter_prop.name) for op, value in filter_prop.filters: tmp_filter_prop = FilterProperty(filter_prop.name, [(op, value)]) if equality_prop is not None: tmp_filter_props = (tmp_filter_prop, equality_prop) else: tmp_filter_props = (tmp_filter_prop, ) slice = index.get_slice(tmp_filter_props, ancestor_path, start_cursor, end_cursor) indexes.append([index, slice, filter_prop.name, value]) raise gen.Return( MergeJoinIterator(tr, self._tornado_fdb, filter_props, indexes, fetch_limit, read_versionstamp, ancestor_path, snapshot=True)) equality_prop = next( (filter_prop for filter_prop in filter_props if filter_prop.equality), None) if equality_prop is not None and len(equality_prop.filters) > 1: indexes = [] for op, value in equality_prop.filters: tmp_filter_props = [] for filter_prop in filter_props: if filter_prop.name == equality_prop.name: tmp_filter_props.append( FilterProperty(filter_prop.name, [(op, value)])) else: tmp_filter_props.append(filter_prop) desired_slice = index.get_slice(tmp_filter_props, ancestor_path, start_cursor, end_cursor, reverse) indexes.append( [index, desired_slice, equality_prop.name, value]) raise gen.Return( MergeJoinIterator(tr, self._tornado_fdb, filter_props, indexes, fetch_limit, read_versionstamp, ancestor_path, snapshot=True)) desired_slice = index.get_slice(filter_props, ancestor_path, start_cursor, end_cursor, reverse) iterator = IndexIterator(tr, self._tornado_fdb, index, desired_slice, fetch_limit, reverse, read_versionstamp, snapshot=True) raise gen.Return(iterator)
def _dynamic_run_query(self, query, query_result): logger.debug(u'query: {}'.format(query)) project_id = decode_str(query.app()) tr = self._db.create_transaction() read_versionstamp = None if query.has_transaction(): yield self._tx_manager.log_query(tr, project_id, query) # Ensure the GC hasn't cleaned up an entity written after the tx start. safe_versionstamp = yield self._gc.safe_read_versionstamp( tr, query.ancestor()) read_versionstamp = TransactionID.decode( query.transaction().handle())[1] if (safe_versionstamp is not None and safe_versionstamp > read_versionstamp): raise BadRequest(u'The specified transaction has expired') fetch_data = self._index_manager.include_data(query) rpc_limit, check_more_results = self._index_manager.rpc_limit(query) iterator = yield self._index_manager.get_iterator( tr, query, read_versionstamp) for prop_name in query.property_name_list(): prop_name = decode_str(prop_name) if prop_name not in iterator.prop_names: raise BadRequest( u'Projections on {} are not supported'.format(prop_name)) data_futures = [] if fetch_data else None unique_keys = set() results = [] entries_fetched = 0 skipped_results = 0 cursor = None while True: remainder = rpc_limit - entries_fetched iter_offset = max(query.offset() - entries_fetched, 0) entries, more_iterator_results = yield iterator.next_page() entries_fetched += len(entries) if not entries and more_iterator_results: continue if not entries and not more_iterator_results: break skipped_results += min(len(entries), iter_offset) suitable_entries = entries[iter_offset:remainder] if entries[:remainder]: cursor = entries[:remainder][-1] if not fetch_data and not query.keys_only(): results.extend( [entry.prop_result() for entry in suitable_entries]) continue for entry in suitable_entries: if entry.path in unique_keys: continue unique_keys.add(entry.path) if fetch_data: data_futures.append( self._data_manager.get_entry(tr, entry, snapshot=True)) else: results.append(entry.key_result()) if not more_iterator_results: break if fetch_data: entity_results = yield data_futures results = [entity.encoded for entity in entity_results] else: results = [result.Encode() for result in results] yield self._tornado_fdb.commit(tr) query_result.result_list().extend(results) # TODO: Figure out how ndb multi queries use compiled cursors. if query.compile(): ordered_props = tuple(prop_name for prop_name, _ in get_order_info(query) if prop_name != KEY_PROP) mutable_cursor = query_result.mutable_compiled_cursor() if cursor is not None: mutable_cursor.MergeFrom(cursor.cursor_result(ordered_props)) more_results = check_more_results and entries_fetched > rpc_limit query_result.set_more_results(more_results) if skipped_results: query_result.set_skipped_results(skipped_results) if query.keys_only(): query_result.set_keys_only(True) logger.debug(u'{} results'.format(len(query_result.result_list())))
def dynamic_put(self, project_id, put_request, put_response, retries=5): # logger.debug(u'put_request:\n{}'.format(put_request)) project_id = decode_str(project_id) # TODO: Enforce max key length (100 elements). # Enforce max element size (1500 bytes). # Enforce max kind size (1500 bytes). # Enforce key name regex (reserved names match __.*__). if put_request.auto_id_policy() != put_request.CURRENT: raise BadRequest(u'Sequential allocator is not implemented') tr = self._db.create_transaction() if put_request.has_transaction(): yield self._tx_manager.log_puts(tr, project_id, put_request) writes = { self._collapsible_id(entity): (VersionEntry.from_key(entity.key()), VersionEntry.from_key(entity.key()), None) for entity in put_request.entity_list() } else: # Eliminate multiple puts to the same key. puts_by_key = { self._collapsible_id(entity): entity for entity in put_request.entity_list() } writes = yield { key: self._upsert(tr, entity) for key, entity in six.iteritems(puts_by_key) } old_entries = [ old_entry for old_entry, _, _ in six.itervalues(writes) if old_entry.present ] versionstamp_future = None if old_entries: versionstamp_future = tr.get_versionstamp() try: yield self._tornado_fdb.commit(tr, convert_exceptions=False) except fdb.FDBError as fdb_error: if fdb_error.code == FDBErrorCodes.NOT_COMMITTED: pass elif fdb_error.code == FDBErrorCodes.COMMIT_RESULT_UNKNOWN: logger.error('Unable to determine commit result. Retrying.') else: raise InternalError(fdb_error.description) retries -= 1 if retries < 0: raise InternalError(fdb_error.description) yield self.dynamic_put(project_id, put_request, put_response, retries) return if old_entries: self._gc.clear_later(old_entries, versionstamp_future.wait().value) mutations = [ (old_entry, new_entry, index_stats) for old_entry, new_entry, index_stats in six.itervalues(writes) if index_stats is not None ] IOLoop.current().spawn_callback(self._stats_buffer.update, project_id, mutations) for entity in put_request.entity_list(): write_entry = writes[self._collapsible_id(entity)][1] put_response.add_key().CopyFrom(write_entry.key) if write_entry.version != ABSENT_VERSION: put_response.add_version(write_entry.version)