def set_x_timestamp_header_tween(request): ts1 = get_timestamp() response = handler(request) # The storage might have created a new timestamp when processing # a write. Report that one if it's newer than the current one. ts2 = get_timestamp(response.headers.get("X-Last-Modified")) response.headers["X-Weave-Timestamp"] = str(max(ts1, ts2)) return response
def del_item(self, user, item): modified = get_timestamp() data, casid = self.get_cached_data(user) num_deleted = self._del_items(user, [item], modified, data, casid) if num_deleted == 0: raise ItemNotFoundError return modified
def valid_batch(self, user, batch): ts = get_timestamp() batchid = str(batch) bdata, bcasid = self.get_cached_batches(user, ts) if not bdata: return False return (batchid in bdata)
def get_cached_data(self, user, refresh_if_missing=True): """Get the cached collection data, pulling into cache if missing. This method returns the cached collection data, populating it from the underlying store if it is not cached. """ key = self.get_key(user) data, casid = self.cache.gets(key) if data is None and refresh_if_missing: data = {} try: storage = self.storage collection = self.collection ttl_base = int(get_timestamp()) with self.owner.lock_for_read(user, collection): ts = storage.get_collection_timestamp(user, collection) data["modified"] = ts data["items"] = {} for bso in storage.get_items(user, collection)["items"]: if bso.get("ttl") is not None: bso["ttl"] = ttl_base + bso["ttl"] data["items"][bso["id"]] = bso self.cache.add(key, data) data, casid = self.cache.gets(key) except CollectionNotFoundError: data = None return data, casid
def __init__(self, storage, timestamp=None): self.storage = storage self.connection = storage.dbconnector.connect() self.timestamp = get_timestamp(timestamp) self.cache = defaultdict(SQLCachedCollectionData) self.locked_collections = {} self._nesting_level = 0
def del_item(self, userid, item): modified = get_timestamp() data, casid = self.get_cached_data(userid) num_deleted = self._del_items(userid, [item], modified, data, casid) if num_deleted == 0: raise ItemNotFoundError return modified
def get_cached_data(self, userid, refresh_if_missing=True): """Get the cached collection data, pulling into cache if missing. This method returns the cached collection data, populating it from the underlying store if it is not cached. """ key = self.get_key(userid) data, casid = self.cache.gets(key) if data is None and refresh_if_missing: data = {} try: storage = self.storage collection = self.collection ttl_base = int(get_timestamp()) with self.owner.lock_for_read(userid, collection): ts = storage.get_collection_timestamp(userid, collection) data["modified"] = ts data["items"] = {} for bso in storage.get_items(userid, collection)["items"]: if bso.get("ttl") is not None: bso["ttl"] = ttl_base + bso["ttl"] data["items"][bso["id"]] = bso self.cache.add(key, data) data, casid = self.cache.gets(key) except CollectionNotFoundError: data = None return data, casid
def valid_batch(self, userid, batch): ts = get_timestamp() batchid = str(batch) bdata, bcasid = self.get_cached_batches(userid, ts) if not bdata: return False return (batchid in bdata)
def get_item(self, session, user, collection, item): """Returns one item from a collection.""" userid = user_key(user) collectionid = self._get_collection_id(collection) row = session.transaction.execute_sql( getq(queries.ITEM_DETAILS), params={"userid": userid, "collectionid": collectionid, "item": item, "ttl": ts2dt(session.timestamp or get_timestamp())}, param_types={"userid": param_types.STRING, "collectionid": param_types.INT64, "item": param_types.STRING, "ttl": param_types.TIMESTAMP} ).one_or_none() if row is None: raise ItemNotFoundError # Mix-in the column names as row_to_bso needs row = zip(["id", "sortindex", "modified", "payload"], row) result = session.transaction.execute_sql( "SELECT CURRENT_TIMESTAMP()").one() current_ts = dt2ts(result[0]) return self._row_to_bso(row, int(current_ts))
def get_item(self, session, user, collection, item): """Returns one item from a collection.""" userid = user_key(user) collectionid = self._get_collection_id(collection) row = session.transaction.execute_sql(getq(queries.ITEM_DETAILS), params={ "userid": userid, "collectionid": collectionid, "item": item, "ttl": ts2dt(session.timestamp or get_timestamp()) }, param_types={ "userid": param_types.STRING, "collectionid": param_types.INT64, "item": param_types.STRING, "ttl": param_types.TIMESTAMP }).one_or_none() if row is None: raise ItemNotFoundError # Mix-in the column names as row_to_bso needs row = zip(["id", "sortindex", "modified", "payload"], row) result = session.transaction.execute_sql( "SELECT CURRENT_TIMESTAMP()").one() current_ts = dt2ts(result[0]) return self._row_to_bso(row, int(current_ts))
def set_item(self, userid, item, bso): bso["id"] = item modified = get_timestamp() data, casid = self.get_cached_data(userid) num_created = self._set_items(userid, [bso], modified, data, casid) return { "created": num_created == 1, "modified": modified, }
def set_item(self, user, item, bso): bso["id"] = item modified = get_timestamp() data, casid = self.get_cached_data(user) num_created = self._set_items(user, [bso], modified, data, casid) return { "created": num_created == 1, "modified": modified, }
def get_collection_with_internal_pagination(request): """Get the contents of a collection, in a respectful manner. We provide a client-driven pagination API, but some clients don't use it. Instead they make humungous queries such as "give me all 100,000 history items as a single batch" and unfortunately, we have to comply. This wrapper view breaks up such requests so that they use the pagination API internally, which is more respectful of server resources and avoids bogging down queries from other users. """ try: settings = request.registry.settings batch_size = settings.get("storage.pagination_batch_size") # If we're not doing internal pagination, fulfill it directly. if batch_size is None: return get_collection(request) # If the request is already limited, fulfill it directly. limit = request.validated.get("limit", None) if limit is not None and limit < batch_size: return get_collection(request) # Otherwise, we'll have to paginate internally for reduce db load. items = [] request.validated["limit"] = batch_size while True: # Do the actual fetch, knowing it won't be too big. res = get_collection(request) items.extend(res) if limit is not None: max_left = limit - len(items) # If we've fetched up to the requested limit then stop, # leaving the X-Weave-Next-Offset header intact. if max_left <= 0: break request.validated["limit"] = min(max_left, batch_size) # Check Next-Offset to see if we've fetched all available items. try: offset = request.response.headers.pop("X-Weave-Next-Offset") except KeyError: break # Fetch again, using the given offset token and sanity-checking # that the collection has not been concurrently modified. # Taking a collection lock here would defeat the point of this # pagination, which is to free up db resources. request.validated["offset"] = offset if "if_unmodified_since" not in request.validated: last_modified = request.response.headers["X-Last-Modified"] last_modified = get_timestamp(last_modified) request.validated["if_unmodified_since"] = last_modified return items except NotFoundError: # For b/w compat, non-existent collections must return an empty list. return []
def apply_batch(self, user, batch): modified = get_timestamp() batchid = str(batch) bdata, bcasid = self.get_cached_batches(user, modified) # Invalid, closed, or expired batch if not bdata or batchid not in bdata: raise InvalidBatch(batch) data, casid = self.get_cached_data(user) self._set_items(user, bdata[batchid]["items"], modified, data, casid) return modified
def apply_batch(self, userid, batch): modified = get_timestamp() batchid = str(batch) bdata, bcasid = self.get_cached_batches(userid, modified) # Invalid, closed, or expired batch if not bdata or batchid not in bdata: raise InvalidBatch(batch) data, casid = self.get_cached_data(userid) self._set_items(userid, bdata[batchid]["items"], modified, data, casid) return modified
def get_cached_batches(self, userid, ts=None): if ts is None: ts = get_timestamp() ts = int(ts) bdata, bcasid = self.cache.gets(self.get_batches_key(userid)) # Remove any expired batches, but let the # calling code write it back out to memcache. if bdata: for batchid, batch in bdata.items(): if batch["created"] + BATCH_LIFETIME < ts: del bdata[batchid] return bdata, bcasid
def get_cached_batches(self, user, ts=None): if ts is None: ts = get_timestamp() ts = int(ts) bdata, bcasid = self.cache.gets(self.get_batches_key(user)) # Remove any expired batches, but let the # calling code write it back out to memcache. if bdata: for batchid, batch in bdata.items(): if batch["created"] + BATCH_LIFETIME < ts: del bdata[batchid] return bdata, bcasid
def extract_precondition_headers(request): """Validator to extract the X-If-[Unm|M]odified-Since headers. This validator extracts the X-If-Modified-Since- header or the X-If-Unmodified-Since header, validates it and parses it into a float. an integer. The result is stored under the key "if_modified_since" or "if_unmodified_since" as appropriate. It is an error to specify both headers in a single request. """ if_modified_since = request.headers.get("X-If-Modified-Since") if if_modified_since is not None: try: if_modified_since = get_timestamp(if_modified_since) if if_modified_since < 0: raise ValueError except ValueError: msg = "Bad value for X-If-Modified-Since: %r" request.errors.add("header", "X-If-Modified-Since", msg % (if_modified_since)) else: request.validated["if_modified_since"] = if_modified_since if_unmodified_since = request.headers.get("X-If-Unmodified-Since") if if_unmodified_since is not None: try: if_unmodified_since = get_timestamp(if_unmodified_since) if if_unmodified_since < 0: raise ValueError except ValueError: msg = 'Invalid value for "X-If-Unmodified-Since": %r' request.errors.add("header", "X-If-Unmodified-Since", msg % (if_unmodified_since, )) else: if if_modified_since is not None: msg = "Cannot specify both X-If-Modified-Since and "\ "X-If-Unmodified-Since on a single request" request.errors.add("header", "X-If-Unmodified-Since", msg) else: request.validated["if_unmodified_since"] = if_unmodified_since
def extract_precondition_headers(request): """Validator to extract the X-If-[Unm|M]odified-Since headers. This validator extracts the X-If-Modified-Since- header or the X-If-Unmodified-Since header, validates it and parses it into a float. an integer. The result is stored under the key "if_modified_since" or "if_unmodified_since" as appropriate. It is an error to specify both headers in a single request. """ if_modified_since = request.headers.get("X-If-Modified-Since") if if_modified_since is not None: try: if_modified_since = get_timestamp(if_modified_since) if if_modified_since < 0: raise ValueError except ValueError: msg = "Bad value for X-If-Modified-Since: %r" request.errors.add("header", "X-If-Modified-Since", msg % (if_modified_since)) else: request.validated["if_modified_since"] = if_modified_since if_unmodified_since = request.headers.get("X-If-Unmodified-Since") if if_unmodified_since is not None: try: if_unmodified_since = get_timestamp(if_unmodified_since) if if_unmodified_since < 0: raise ValueError except ValueError: msg = 'Invalid value for "X-If-Unmodified-Since": %r' request.errors.add("header", "X-If-Unmodified-Since", msg % (if_unmodified_since,)) else: if if_modified_since is not None: msg = "Cannot specify both X-If-Modified-Since and "\ "X-If-Unmodified-Since on a single request" request.errors.add("header", "X-If-Unmodified-Since", msg) else: request.validated["if_unmodified_since"] = if_unmodified_since
def create_batch(self, user): ts = get_timestamp() bdata, bcasid = self.get_cached_batches(user, ts) batchid = int(ts * 1000) if not bdata: bdata = {} if batchid in bdata: raise ConflictError bdata[batchid] = {"created": int(ts), "items": []} key = self.get_batches_key(user) if not self.cache.cas(key, bdata, bcasid): raise ConflictError return batchid
def append_items_to_batch(self, userid, batch, items): modified = get_timestamp() batchid = str(batch) bdata, bcasid = self.get_cached_batches(userid, modified) # Invalid, closed, or expired batch if not bdata or batchid not in bdata: raise InvalidBatch(batch) bdata[batchid]["items"].extend(items) key = self.get_batches_key(userid) if not self.cache.cas(key, bdata, bcasid): raise ConflictError return modified
def append_items_to_batch(self, user, batch, items): modified = get_timestamp() batchid = str(batch) bdata, bcasid = self.get_cached_batches(user, modified) # Invalid, closed, or expired batch if not bdata or batchid not in bdata: raise InvalidBatch(batch) bdata[batchid]["items"].extend(items) key = self.get_batches_key(user) if not self.cache.cas(key, bdata, bcasid): raise ConflictError return modified
def create_batch(self, userid): ts = get_timestamp() bdata, bcasid = self.get_cached_batches(userid, ts) batchid = int(ts * 1000) if not bdata: bdata = {} if batchid in bdata: raise ConflictError bdata[batchid] = { "created": int(ts), "items": [] } key = self.get_batches_key(userid) if not self.cache.cas(key, bdata, bcasid): raise ConflictError return batchid
def get_item_timestamp(self, session, user, collection, item): userid = user_key(user) collectionid = self._get_collection_id(collection) result = session.transaction.execute_sql( getq(queries.ITEM_TIMESTAMP), params={"userid": userid, "collectionid": collectionid, "item": item, "ttl": ts2dt(session.timestamp or get_timestamp())}, param_types={"userid": param_types.STRING, "collectionid": param_types.INT64, "item": param_types.STRING, "ttl": param_types.TIMESTAMP} ).one_or_none() if not result: raise ItemNotFoundError return dt2ts(result[0])
def get_item_timestamp(self, session, user, collection, item): userid = user_key(user) collectionid = self._get_collection_id(collection) result = session.transaction.execute_sql( getq(queries.ITEM_TIMESTAMP), params={ "userid": userid, "collectionid": collectionid, "item": item, "ttl": ts2dt(session.timestamp or get_timestamp()) }, param_types={ "userid": param_types.STRING, "collectionid": param_types.INT64, "item": param_types.STRING, "ttl": param_types.TIMESTAMP }).one_or_none() if not result: raise ItemNotFoundError return dt2ts(result[0])
def dt2ts(dt): """Convert a Python datetime to seconds""" val = (dt.replace(tzinfo=None) - EPOCH).total_seconds() return get_timestamp(math.floor(val * 100) / 100)
def _find_items(self, session, user, collection, **params): """Find items matching the given search parameters.""" userid = user_key(user) collectionid = self._get_collection_id(collection) bind = {"userid": userid, "collectionid": collectionid} bind_types = { "userid": param_types.STRING, "collectionid": param_types.INT64 } ts = session.timestamp or get_timestamp() if "ttl" not in params: params["ttl"] = ts if "ids" in params: for i, id in enumerate(params["ids"], 1): bind["id_%d" % (i, )] = id bind_types["id_%d" % (i, )] = param_types.STRING if "sort" in params: bind["sort"] = params["sort"] bind_types["sort"] = param_types.STRING # We always fetch one more item than necessary, so we can tell whether # there are additional items to be fetched with next_offset. limit = params.get("limit") if limit is not None: params["limit"] = bind["limit"] = limit + 1 bind_types["limit"] = param_types.INT64 offset = params.pop("offset", None) if offset is not None: self.decode_offset(params, offset) bind["offset"] = params["offset"] bind_types["offset"] = param_types.INT64 if limit is None: # avoid sqlalchemy defaulting to LIMIT -1 when none provided. # subtract offset to avoid overflow errors (that only occur w/ # a FORCE_INDEX= directive) OutOfRange: 400 int64 overflow: # <INT64_MAX> + offset params["limit"] = bind["limit"] = INT64_MAX - params["offset"] bind_types["limit"] = param_types.INT64 # Convert timestamp types for ts_var in ["newer", "newer_eq", "older", "older_eq", "ttl"]: if ts_var in params: bind[ts_var] = ts2dt(params[ts_var]) bind_types[ts_var] = param_types.TIMESTAMP # Generate the query query = FIND_ITEMS(bso, params) query = str(query.compile()) if self._force_bsolm_index: query = query.replace("FROM bso", "FROM bso@{FORCE_INDEX=BsoLastModified}") query = query.replace(":", "@") result = session.transaction.execute_sql( query, params=bind, param_types=bind_types, ) rows = list(result) items = [] if rows: # Get the names of the columns for the result row placements fields = [x.name for x in result.fields] items = [ self._row_to_bso(zip(fields, row), int(ts)) for row in rows ] # If the query returned no results, we don't know whether that's # because it's empty or because it doesn't exist. Read the collection # timestamp and let it raise CollectionNotFoundError if necessary. if not items: self.get_collection_timestamp(user, collection) # Check if we read past the original limit, set next_offset if so. next_offset = None if limit is not None and len(items) > limit: items = items[:-1] next_offset = self.encode_next_offset(params, items) return { "items": items, "next_offset": next_offset, }
def bigint2ts(bigint): return get_timestamp(bigint / 1000.0)
def extract_query_params(request): """Validator to extract BSO search parameters from the query string. This validator will extract and validate the following search params: * newer: lower-bound on last-modified time (float timestamp) * sort: order in which to return results (string) * limit: maximum number of items to return (integer) * offset: position at which to restart search (string) * ids: a comma-separated list of BSO ids (list of strings) * full: flag, whether to include full bodies (bool) """ newer = request.GET.get("newer") if newer is not None: try: newer = get_timestamp(newer) if newer < 0: raise ValueError except ValueError: msg = "Invalid value for newer: %r" % (newer,) request.errors.add("querystring", "newer", msg) else: request.validated["newer"] = newer limit = request.GET.get("limit") if limit is not None: try: limit = int(limit) if limit < 0: raise ValueError except ValueError: msg = "Invalid value for limit: %r" % (limit,) request.errors.add("querystring", "limit", msg) else: request.validated["limit"] = limit # The offset token is an opaque string, with semantics determined by # the storage backend, so we can't parse or validate it here. Rather, # we must catch InvalidOffsetError if something goes wrong. offset = request.GET.get("offset") if offset is not None: request.validated["offset"] = offset sort = request.GET.get("sort") if sort is not None: if sort not in ("newest", "index"): msg = "Invalid value for sort: %r" % (sort,) request.errors.add("querystring", "sort", msg) else: request.validated["sort"] = sort ids = request.GET.get("ids") if ids is not None: ids = [id.strip() for id in ids.split(",")] if len(ids) > BATCH_MAX_COUNT: msg = 'Cannot process more than %s BSOs at a time' msg = msg % (BATCH_MAX_COUNT,) request.errors.add("querysting", "items", msg) else: for id in ids: if not VALID_ID_REGEX.match(id): msg = "Invalid BSO id: %r" % (id,) request.errors.add("querystring", "ids", msg) request.validated["ids"] = ids if "full" in request.GET: request.validated["full"] = True
def del_items(self, userid, items): modified = get_timestamp() data, casid = self.get_cached_data(userid) self._del_items(userid, items, modified, data, casid) return data["modified"]
def del_collection(self, userid): if not self.cache.delete(self.get_key(userid)): raise CollectionNotFoundError return get_timestamp()
def set_items(self, userid, items): modified = get_timestamp() data, casid = self.get_cached_data(userid) self._set_items(userid, items, modified, data, casid) return modified
def set_items(self, user, items): modified = get_timestamp() data, casid = self.get_cached_data(user) self._set_items(user, items, modified, data, casid) return modified
def del_items(self, user, items): modified = get_timestamp() data, casid = self.get_cached_data(user) self._del_items(user, items, modified, data, casid) return data["modified"]
def del_collection(self, user): if not self.cache.delete(self.get_key(user)): raise CollectionNotFoundError return get_timestamp()
def extract_query_params(request): """Validator to extract BSO search parameters from the query string. This validator will extract and validate the following search params: * newer: lower-bound on last-modified time (float timestamp) * older: upper-bound on last-modified time (float timestamp) * sort: order in which to return results (string) * limit: maximum number of items to return (integer) * offset: position at which to restart search (string) * ids: a comma-separated list of BSO ids (list of strings) * full: flag, whether to include full bodies (bool) """ newer = request.GET.get("newer") if newer is not None: try: newer = get_timestamp(newer) if newer < 0: raise ValueError except ValueError: msg = "Invalid value for newer: %r" % (newer, ) request.errors.add("querystring", "newer", msg) else: request.validated["newer"] = newer older = request.GET.get("older") if older is not None: try: older = get_timestamp(older) if older < 0: raise ValueError except ValueError: msg = "Invalid value for older: %r" % (older, ) request.errors.add("querystring", "older", msg) else: request.validated["older"] = older limit = request.GET.get("limit") if limit is not None: try: limit = int(limit) if limit < 0: raise ValueError except ValueError: msg = "Invalid value for limit: %r" % (limit, ) request.errors.add("querystring", "limit", msg) else: request.validated["limit"] = limit # The offset token is an opaque string, with semantics determined by # the storage backend, so we can't parse or validate it here. Rather, # we must catch InvalidOffsetError if something goes wrong. offset = request.GET.get("offset") if offset is not None: request.validated["offset"] = offset sort = request.GET.get("sort") if sort is not None: if sort not in ("newest", "oldest", "index"): msg = "Invalid value for sort: %r" % (sort, ) request.errors.add("querystring", "sort", msg) else: request.validated["sort"] = sort ids = request.GET.get("ids") if ids is not None: ids = [id.strip() for id in ids.split(",")] if len(ids) > BATCH_MAX_IDS: msg = 'Cannot process more than %s BSOs at a time' msg = msg % (BATCH_MAX_IDS, ) request.errors.add("querysting", "items", msg) else: for id in ids: if not VALID_ID_REGEX.match(id): msg = "Invalid BSO id: %r" % (id, ) request.errors.add("querystring", "ids", msg) request.validated["ids"] = ids if "full" in request.GET: request.validated["full"] = True
def _find_items(self, session, user, collection, **params): """Find items matching the given search parameters.""" userid = user_key(user) collectionid = self._get_collection_id(collection) bind = {"userid": userid, "collectionid": collectionid} bind_types = {"userid": param_types.STRING, "collectionid": param_types.INT64} ts = session.timestamp or get_timestamp() if "ttl" not in params: params["ttl"] = ts if "ids" in params: for i, id in enumerate(params["ids"], 1): bind["id_%d" % (i,)] = id bind_types["id_%d" % (i,)] = param_types.STRING if "sort" in params: bind["sort"] = params["sort"] bind_types["sort"] = param_types.STRING # We always fetch one more item than necessary, so we can tell whether # there are additional items to be fetched with next_offset. limit = params.get("limit") if limit is not None: params["limit"] = bind["limit"] = limit + 1 bind_types["limit"] = param_types.INT64 offset = params.pop("offset", None) if offset is not None: self.decode_offset(params, offset) bind["offset"] = params["offset"] bind_types["offset"] = param_types.INT64 if limit is None: # avoid sqlalchemy defaulting to LIMIT -1 when none provided. # subtract offset to avoid overflow errors (that only occur w/ # a FORCE_INDEX= directive) OutOfRange: 400 int64 overflow: # <INT64_MAX> + offset params["limit"] = bind["limit"] = INT64_MAX - params["offset"] bind_types["limit"] = param_types.INT64 # Convert timestamp types for ts_var in ["newer", "newer_eq", "older", "older_eq", "ttl"]: if ts_var in params: bind[ts_var] = ts2dt(params[ts_var]) bind_types[ts_var] = param_types.TIMESTAMP # Generate the query query = FIND_ITEMS(bso, params) query = str(query.compile()) if self._force_bsolm_index: query = query.replace( "FROM bso", "FROM bso@{FORCE_INDEX=BsoLastModified}" ) query = query.replace(":", "@") result = session.transaction.execute_sql( query, params=bind, param_types=bind_types, ) rows = list(result) items = [] if rows: # Get the names of the columns for the result row placements fields = [x.name for x in result.fields] items = [self._row_to_bso(zip(fields, row), int(ts)) for row in rows] # If the query returned no results, we don't know whether that's # because it's empty or because it doesn't exist. Read the collection # timestamp and let it raise CollectionNotFoundError if necessary. if not items: self.get_collection_timestamp(user, collection) # Check if we read past the original limit, set next_offset if so. next_offset = None if limit is not None and len(items) > limit: items = items[:-1] next_offset = self.encode_next_offset(params, items) return { "items": items, "next_offset": next_offset, }