def allowed_status_keys(self): """Return the subset of IStatusUpdate keys that are related to UUIDs of accessible contexts. I.e. blacklist all IStatusUpdate that has a context which we don't have permission to access. This is the key security protection used by all getters. Because it's called a lot we're caching results per user request. """ uuid_blacklist = self._blacklist_microblogcontext_uuids() if not uuid_blacklist: return self._status_mapping.keys() else: # for each uid, expand uid into set of statusids blacklisted_treesets = (self._uuid_mapping.get(uuid) for uuid in uuid_blacklist if uuid in self._uuid_mapping.keys()) # merge sets of blacklisted statusids into single blacklist blacklisted_statusids = reduce(LLBTree.union, blacklisted_treesets, LLBTree.TreeSet()) # subtract blacklisted statusids from all statusids all_statusids = LLBTree.LLSet(self._status_mapping.keys()) return LLBTree.difference(all_statusids, blacklisted_statusids) return self._allowed_status_keys()
def __init__(self, context=None): # last add in milliseconds - used in both async and cache key self._mtime = 0 # cache buster in MICROseconds - used in cache key only self._ctime = 0 # primary storage: (long statusid) -> (object IStatusUpdate) self._status_mapping = LOBTree.LOBTree() # archive deleted: (long statusid) -> (object IStatusUpdate) self._status_archive = LOBTree.LOBTree() # index by user: (string userid) -> (object TreeSet(long statusid)) self._user_mapping = OOBTree.OOBTree() # index by tag: (string tag) -> (object TreeSet(long statusid)) self._tag_mapping = OOBTree.OOBTree() # index by microblog_context: # (string UUID) -> (object TreeSet(long statusid)) self._uuid_mapping = OOBTree.OOBTree() # keep old name for backcompat # index by content_context: # (string UUID) -> (object TreeSet(long statusid)) self._content_uuid_mapping = OOBTree.OOBTree() # index by thread (string UUID) -> (object TreeSet(long statusid)) self._threadid_mapping = OOBTree.OOBTree() # index by mentions (string UUID) -> (object TreeSet(long statusid)) self._mentions_mapping = OOBTree.OOBTree() # index all content updates: (object TreeSet(long statusid)) self._is_content_mapping = LLBTree.LLTreeSet() # index all human updates: (object TreeSet(long statusid)) self._is_human_mapping = LLBTree.LLTreeSet()
def secure(self, keyset): """Filter keyset to return only keys the current user may see. NB this may return statusupdates with a microblog_context (workspace) accessible to the user, but referencing a content_context (document) which the user may not access yet because of content workflow. Filtering that is quite costly and not done here - instead there's a postprocessing filter in activitystream just before rendering. """ return LLBTree.intersection( LLBTree.LLTreeSet(keyset), LLBTree.LLTreeSet(self.allowed_status_keys()))
def _allowed_status_keys(self, uuid_blacklist=[]): if not uuid_blacklist: return self._status_mapping.keys() else: # for each uid, expand uid into set of statusids blacklisted_treesets = (self._uuid_mapping.get(uuid) for uuid in uuid_blacklist if uuid in self._uuid_mapping.keys()) # merge sets of blacklisted statusids into single blacklist blacklisted_statusids = reduce(LLBTree.union, blacklisted_treesets, LLBTree.TreeSet()) # subtract blacklisted statusids from all statusids all_statusids = LLBTree.LLSet(self._status_mapping.keys()) return LLBTree.difference(all_statusids, blacklisted_statusids)
def setup_human_and_content_mappings(context): """0012 adds two new indexes""" tool = queryUtility(IMicroblogTool) if not hasattr(tool, '_is_content_mapping'): logger.info("Adding missing is_content mapping to %s" % repr(tool)) tool._is_content_mapping = LLBTree.LLTreeSet() if not hasattr(tool, '_is_human_mapping'): logger.info("Adding missing is_human mapping to %s" % repr(tool)) tool._is_human_mapping = LLBTree.LLTreeSet() logger.info("Indexing statusupdates into is_content and is_human indexes") for status in tool.values(limit=None): tool._idx_is_content(status) tool._idx_is_human(status) commit()
def _query_mapping(self, mapping, keys): """ Calculate the union of all statusids indexed in <mapping> on any of the <keys>. Always returns an LLTreeSet ready for further processing. """ if not keys: return LLBTree.LLTreeSet() elif isinstance(keys, (str, unicode)): # convert single key to list keys = [keys] # calculate the union set of matching ids across all tags # silently discards all non-existing key ids treesets = [mapping.get(id) for id in keys if id in mapping.keys()] return LLBTree.multiunion(treesets)
def user_keys(self, users, min=None, max=None, limit=100, tag=None): if not users: return () if users == str(users): # single user optimization userid = users mapping = self._user_mapping.get(userid) if not mapping: return () else: # collection of user LLTreeSet treesets = (self._user_mapping.get(userid) for userid in users if userid in self._user_mapping.keys()) mapping = reduce(LLBTree.union, treesets, LLBTree.TreeSet()) if tag: if tag not in self._tag_mapping: return () mapping = LLBTree.intersection(mapping, self._tag_mapping[tag]) return longkeysortreverse(mapping, min, max, limit)
def mention_keys(self, mentions, min=None, max=None, limit=100, tag=None): if not mentions: return () if tag and tag not in self._tag_mapping: return () if mentions == str(mentions): # single mention optimization mention = mentions mapping = self._mentions_mapping.get(mention) if not mapping: return () else: # collection of LLTreeSet treesets = (self._mentions_mapping.get(mention) for mention in mentions if mention in self._mentions_mapping.keys()) mapping = reduce(LLBTree.union, treesets, LLBTree.TreeSet()) # returns unchanged mapping if tag is None mapping = self._keys_tag(tag, mapping) mapping = self.secure(mapping) return longkeysortreverse(mapping, min, max, limit)
def context_keys(self, context, min=None, max=None, limit=100, tag=None, nested=True, mention=None): if tag and tag not in self._tag_mapping: return () if nested: # hits portal_catalog nested_uuids = [uuid for uuid in self.nested_uuids(context) if uuid in self._uuid_mapping] if not nested_uuids: return () else: # used in test_statuscontainer_context for non-integration tests uuid = self._context2uuid(context) if uuid not in self._uuid_mapping: return () nested_uuids = [uuid] # tag and uuid filters handle None inputs gracefully keyset_tag = self._keys_tag(tag, self.allowed_status_keys()) # mention and uuid filters handle None inputs gracefully keyset_mention = self._keys_tag(mention, keyset_tag) # calculate the tag+mention+uuid intersection for each uuid context keyset_uuids = [self._keys_uuid(_uuid, keyset_mention) for _uuid in nested_uuids] # merge the intersections merged_set = LLBTree.multiunion(keyset_uuids) merged_set = self.secure(merged_set) return longkeysortreverse(merged_set, min, max, limit)
def user_keys(self, users, min=None, max=None, limit=100, tag=None): if not users: return () if tag and tag not in self._tag_mapping: return () if users == str(users): # single user optimization userid = users mapping = self._user_mapping.get(userid) if not mapping: return () else: # collection of user LLTreeSet treesets = (self._user_mapping.get(userid) for userid in users if userid in self._user_mapping.keys()) mapping = reduce(LLBTree.union, treesets, LLBTree.TreeSet()) # returns unchanged mapping if tag is None mapping = self._keys_tag(tag, mapping) mapping = self.secure(mapping) return longkeysortreverse(mapping, min, max, limit)
def context_keys(self, context, min=None, max=None, limit=100, tag=None, nested=True): self._check_permission("read") if tag and tag not in self._tag_mapping: return () if nested: # hits portal_catalog nested_uuids = [uuid for uuid in self.nested_uuids(context) if uuid in self._uuid_mapping] if not nested_uuids: return () else: # used in test_statuscontainer_context for non-integration tests uuid = self._context2uuid(context) if uuid not in self._uuid_mapping: return () nested_uuids = [uuid] # tag and uuid filters handle None inputs gracefully keyset_tag = self._keys_tag(tag, self.allowed_status_keys()) # calculate the tag+uuid intersection for each uuid context keyset_uuids = [self._keys_uuid(uuid, keyset_tag) for uuid in nested_uuids] # merge the intersections merged_set = LLBTree.multiunion(keyset_uuids) return longkeysortreverse(merged_set, min, max, limit)
def _idx_context(self, status): uuid = status.context_uuid if uuid: # If the key was already in the collection, there is no change # create tag treeset if not already present self._uuid_mapping.insert(uuid, LLBTree.LLTreeSet()) self._uuid_mapping[uuid].insert(status.id)
def _idx_tag(self, status): for tag in [unicode(tag) for tag in status.tags]: # If the key was already in the collection, there is no change # create tag treeset if not already present self._tag_mapping.insert(tag, LLBTree.LLTreeSet()) # add status id to tag treeset self._tag_mapping[tag].insert(status.id)
def _idx_user(self, status): userid = unicode(status.userid) # If the key was already in the collection, there is no change # create user treeset if not already present self._user_mapping.insert(userid, LLBTree.LLTreeSet()) # add status id to user treeset self._user_mapping[userid].insert(status.id)
def longkeysortreverse(btreeish, minv=None, maxv=None, limit=None): """Performance optimized keyspace accessor. Returns an iterable of btreeish keys, reverse sorted by key. Expects a btreeish with long(microsec) keys. """ try: accessor = btreeish.keys except AttributeError: accessor = LLBTree.TreeSet(btreeish).keys i = 0 if minv or maxv: # no optimization keys = [x for x in accessor(min=minv, max=maxv)] keys.sort() keys.reverse() for key in keys: yield key i += 1 if i == limit: return else: # first run: last hour tmax = long(time.time() * 1e6) tmin = long(tmax - 3600 * 1e6) keys = [x for x in accessor(min=tmin, max=tmax)] keys.sort() keys.reverse() for key in keys: yield key i += 1 if i == limit: return # second run: last day until last hour tmax = tmin tmin = long(tmax - 23 * 3600 * 1e6) keys = [x for x in accessor(min=tmin, max=tmax)] keys.sort() keys.reverse() for key in keys: yield key i += 1 if i == limit: return # final run: everything else tmax = tmin keys = [x for x in accessor(max=tmax)] keys.sort() keys.reverse() for key in keys: yield key i += 1 if i == limit: return
def _idx_mentions(self, status): if not getattr(status, 'mentions', False): return mentions = status.mentions.keys() for mention in mentions: # If the key was already in the collection, there is no change # create tag treeset if not already present self._mentions_mapping.insert(mention, LLBTree.LLTreeSet()) self._mentions_mapping[mention].insert(status.id)
def _idx_threadid(self, status): if not getattr(status, 'thread_id', False): return tread_id = status.thread_id if tread_id: # If the key was already in the collection, there is no change # create tag treeset if not already present self._threadid_mapping.insert(tread_id, LLBTree.LLTreeSet()) self._threadid_mapping[tread_id].insert(status.id)
def keys(self, min=None, max=None, limit=100, tags=None, users=None): # secure if tags is None and users is None: matches = self.allowed_status_keys() else: matches = self.secure( LLBTree.union( self._query_mapping(self._tag_mapping, tags), self._query_mapping(self._user_mapping, users), )) return longkeysortreverse(matches, min, max, limit)
def keys(self, min=None, max=None, limit=100, tag=None): self._check_permission("read") if tag: if tag not in self._tag_mapping: return () mapping = LLBTree.intersection( LLBTree.LLTreeSet(self._status_mapping.keys()), self._tag_mapping[tag]) else: mapping = self._status_mapping return longkeysortreverse(mapping, min, max, limit)
def _allowed_status_keys(self, uuid_blacklist=[]): if not uuid_blacklist: return self._status_mapping.keys() else: # for each uid, expand uid into set of statusids blacklisted_treesets = ( self._uuid_mapping.get(uuid) for uuid in uuid_blacklist if uuid in self._uuid_mapping.keys() ) # merge sets of blacklisted statusids into single blacklist blacklisted_statusids = reduce(LLBTree.union, blacklisted_treesets, LLBTree.TreeSet()) # subtract blacklisted statusids from all statusids all_statusids = LLBTree.LLSet(self._status_mapping.keys()) return LLBTree.difference(all_statusids, blacklisted_statusids)
def _idx_tag(self, status): """ Update the `StatusContainer` tag index with any new tags :param status: a `StatusUpdate` object """ if status.tags is None: return for tag in [unicode(tag) for tag in status.tags]: # If the key was already in the collection, there is no change # create tag treeset if not already present self._tag_mapping.insert(tag, LLBTree.LLTreeSet()) # add status id to tag treeset self._tag_mapping[tag].insert(status.id)
def longkeysortreverse(btreeish, minv=None, maxv=None, limit=None): """Performance optimized keyspace accessor. Returns an iterable of btreeish keys, reverse sorted by key. Expects a btreeish with long(microsec) keys. In case a limit, but neither minv nor maxv is given, optimizates by not sorting the whole keyspace, but instead heuristically chunk the keyspace and sort only chunks, until the limit is reached. The reason for this is that we want the most recent slice, which is last in the accessor, so we cannot just start iterating the slice. Basically we want to iterate backwards. """ try: accessor = btreeish.keys except AttributeError: accessor = LLBTree.TreeSet(btreeish).keys if minv or limit is None: return _longkeysortreverse_direct(accessor, minv, maxv, limit) else: return _longkeysortreverse_optimized(accessor, maxv, limit)
def _keys_uuid(self, uuid, keyset): if uuid is None: return keyset return LLBTree.intersection( LLBTree.LLTreeSet(keyset), self._uuid_mapping[uuid])
def _keys_tag(self, tag, keyset): if tag is None: return keyset return LLBTree.intersection( LLBTree.LLTreeSet(keyset), self._tag_mapping[tag])
def secure(self, keyset): """Filter keyset to return only keys the current user may see.""" return LLBTree.intersection( LLBTree.LLTreeSet(keyset), LLBTree.LLTreeSet(self.allowed_status_keys()))
def _keys_mention(self, mention, keyset): if mention is None: return keyset return LLBTree.intersection( LLBTree.LLTreeSet(keyset), self._mentions_mapping[mention])
def _keys_mention(self, mention, keyset): if mention is None: return keyset return LLBTree.intersection(LLBTree.LLTreeSet(keyset), self._mentions_mapping[mention])
def _keys_tag(self, tag, keyset): if tag is None: return keyset return LLBTree.intersection(LLBTree.LLTreeSet(keyset), self._tag_mapping[tag])
def _keys_uuid(self, uuid, keyset): if uuid is None: return keyset return LLBTree.intersection(LLBTree.LLTreeSet(keyset), self._uuid_mapping[uuid])