def _apply_index(self, request): record = parseIndexRequest(request, self.id) try: qstart, qend = record.keys except TypeError: return None minint = BTrees.family64.minint maxint = BTrees.family64.maxint qstart = min(maxint, max(minint, qstart)) qend = max(minint, min(maxint, qend)) # start in inside range start = multiunion(self._since_index.values(max=qstart)) end = multiunion(self._until_index.values(min=qstart)) start_into = intersection(start, end) # end inside range start = multiunion(self._since_index.values(max=qend)) end = multiunion(self._until_index.values(min=qend)) end_into = intersection(start, end) # start before range and end after range start = multiunion(self._since_index.values(min=qstart)) end = multiunion(self._until_index.values(max=qend)) start_before_end_after = intersection(start, end) result = union(start_into, end_into) result = union(result, start_before_end_after) return multiunion(map(self._index.__getitem__, result)), (self.id,)
def _apply_index( self, request, cid='' ): """ Apply the index to query parameters given in 'request', which should be a mapping object. If the request does not contain the needed parametrs, then return None. If the request contains a parameter with the name of the column + "_usage", snif for information on how to handle applying the index. Otherwise return two objects. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ record = parseIndexRequest( request, self.getId() ) if record.keys is None: return None term = self._convertDateTime( record.keys[0] ) # # Aggregate sets for each bucket separately, to avoid # large-small union penalties. # #until_only = IISet() #map( until_only.update, self._until_only.values( term ) ) # XXX use multi-union until_only = multiunion( self._until_only.values( term ) ) #since_only = IISet() #map( since_only.update, self._since_only.values( None, term ) ) # XXX use multi-union since_only = multiunion( self._since_only.values( None, term ) ) #until = IISet() #map( until.update, self._until.values( term ) ) # XXX use multi-union until = multiunion( self._until.values( term ) ) #since = IISet() #map( since.update, self._since.values( None, term ) ) # XXX use multi-union since = multiunion( self._since.values( None, term ) ) bounded = intersection( until, since ) # Merge from smallest to largest. #result = union( self._always, until_only ) result = union( bounded, until_only ) result = union( result, since_only ) #result = union( result, bounded ) result = union( result, self._always ) return result, ( self._since_field, self._until_field )
def _apply_index(self, request, cid=''): """ Apply the index to query parameters given in 'request', which should be a mapping object. If the request does not contain the needed parametrs, then return None. If the request contains a parameter with the name of the column + "_usage", snif for information on how to handle applying the index. Otherwise return two objects. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ record = parseIndexRequest(request, self.getId()) if record.keys is None: return None term = self._convertDateTime(record.keys[0]) # # Aggregate sets for each bucket separately, to avoid # large-small union penalties. # #until_only = IISet() #map( until_only.update, self._until_only.values( term ) ) # XXX use multi-union until_only = multiunion(self._until_only.values(term)) #since_only = IISet() #map( since_only.update, self._since_only.values( None, term ) ) # XXX use multi-union since_only = multiunion(self._since_only.values(None, term)) #until = IISet() #map( until.update, self._until.values( term ) ) # XXX use multi-union until = multiunion(self._until.values(term)) #since = IISet() #map( since.update, self._since.values( None, term ) ) # XXX use multi-union since = multiunion(self._since.values(None, term)) bounded = intersection(until, since) # Merge from smallest to largest. #result = union( self._always, until_only ) result = union(bounded, until_only) result = union(result, since_only) #result = union( result, bounded ) result = union(result, self._always) return result, (self._since_field, self._until_field)
def search(self,path,default_level=0): """ path is either a string representing a relative URL or a part of a relative URL or a tuple (path,level). level>=0 starts searching at the given level level<0 not implemented yet """ if isinstance(path,StringType): level = default_level else: level = int(path[1]) path = path[0] comps = self.splitPath(path) if len(comps) == 0: return IISet(self._unindex.keys()) if level >=0: results = [] for i in range(len(comps)): comp = comps[i] if not self._index.has_key(comp): return IISet() if not self._index[comp].has_key(level+i): return IISet() results.append( self._index[comp][level+i] ) res = results[0] for i in range(1,len(results)): res = intersection(res,results[i]) return res else: results = IISet() for level in range(0,self._depth + 1): ids = None error = 0 for cn in range(0,len(comps)): comp = comps[cn] try: ids = intersection(ids,self._index[comp][level+cn]) except KeyError: error = 1 if error==0: results = union(results,ids) return results
def unionResultSets(sets): """ perform intersection of ResultSets """ docids = DocidList() words = WordList() for set in sets: docids = union(docids, set.docids) words.extend(set.words) return ResultSet(docids, words)
def get(self, pattern): """ Query the lexicon for words matching a pattern.""" # single word pattern produce a slicing problem below. # Because the splitter throws away single characters we can # return an empty tuple here. if len(pattern) == 1: return () wc_set = [self.multi_wc, self.single_wc] digrams = [] globbing = 0 for i in range(len(pattern)): if pattern[i] in wc_set: globbing = 1 continue if i == 0: digrams.insert(i, (self.eow + pattern[i])) digrams.append((pattern[i] + pattern[i + 1])) else: try: if pattern[i + 1] not in wc_set: digrams.append(pattern[i] + pattern[i + 1]) except IndexError: digrams.append((pattern[i] + self.eow)) if not globbing: result = self._lexicon.get(pattern, None) if result is None: return () return (result, ) ## now get all of the intsets that contain the result digrams result = None for digram in digrams: result = union(result, self._digrams.get(digram, None)) if not result: return () else: ## now we have narrowed the list of possible candidates ## down to those words which contain digrams. However, ## some words may have been returned that match digrams, ## but do not match 'pattern'. This is because some words ## may contain all matching digrams, but in the wrong ## order. expr = re.compile(self.createRegex(pattern)) words = [] hits = IISet() for x in result: if expr.match(self._inverseLex[x]): hits.insert(x) return hits
def get(self, pattern): """ Query the lexicon for words matching a pattern.""" # single word pattern produce a slicing problem below. # Because the splitter throws away single characters we can # return an empty tuple here. if len(pattern)==1: return () wc_set = [self.multi_wc, self.single_wc] digrams = [] globbing = 0 for i in range(len(pattern)): if pattern[i] in wc_set: globbing = 1 continue if i == 0: digrams.insert(i, (self.eow + pattern[i]) ) digrams.append((pattern[i] + pattern[i+1])) else: try: if pattern[i+1] not in wc_set: digrams.append( pattern[i] + pattern[i+1] ) except IndexError: digrams.append( (pattern[i] + self.eow) ) if not globbing: result = self._lexicon.get(pattern, None) if result is None: return () return (result, ) ## now get all of the intsets that contain the result digrams result = None for digram in digrams: result=union(result, self._digrams.get(digram, None)) if not result: return () else: ## now we have narrowed the list of possible candidates ## down to those words which contain digrams. However, ## some words may have been returned that match digrams, ## but do not match 'pattern'. This is because some words ## may contain all matching digrams, but in the wrong ## order. expr = re.compile(self.createRegex(pattern)) words = [] hits = IISet() for x in result: if expr.match(self._inverseLex[x]): hits.insert(x) return hits
def _eval(self,context): csq = self._classifySubqueries() if csq['empty']: return context._getObjectIds() sqs= csq['lookup'] + csq['complex'] + csq['indexed'] + csq['notQ'] if not sqs: return IISet() if len(sqs) >= 4: return multiunion([q._eval(context) for q in sqs]) r = None for q in sqs: r = union(r,q._eval(context)) return r
def _eval(self, context): csq = self._classifySubqueries() if csq['empty']: return context._getObjectIds() sqs = csq['lookup'] + csq['complex'] + csq['indexed'] + csq['notQ'] if not sqs: return IISet() if len(sqs) >= 4: return multiunion([q._eval(context) for q in sqs]) r = None for q in sqs: r = union(r, q._eval(context)) return r
def getDocumentsForWordIds(self, wordidlist): r = DocidList() for wordid in wordidlist: try: docids = self._wid2doc[wordid] except (TypeError, KeyError): continue r = union(r, docids) return r
def testFunkyKeyIteration(self): # The internal set iteration protocol allows "iterating over" a # a single key as if it were a set. N = 100 union, mkset = self.union, self.mkset slow = mkset() for i in range(N): slow = union(slow, mkset([i])) fast = self.multiunion(range(N)) # acts like N distinct singleton sets self.assertEqual(len(slow), N) self.assertEqual(len(fast), N) self.assertEqual(list(slow), list(fast)) self.assertEqual(list(fast), range(N))
def _eval(self, context): csq = self._classifySubqueries() if csq["empty"]: return context._getObjectIds() sqs = csq["lookup"] + csq["complex"] + csq["indexed"] + csq["notQ"] if not sqs: return IISet() if len(sqs) >= 4: return multiunion([q._eval(context) for q in sqs]) r = None for q in sqs: r = union(r, q._eval(context)) return r
def testFunkyKeyIteration(self): # The internal set iteration protocol allows "iterating over" a # a single key as if it were a set. N = 100 union, mkset = self.union, self.mkset slow = mkset() for i in range(N): slow = union(slow, mkset([i])) fast = self.multiunion(range(N)) # acts like N distinct singleton sets self.assertEqual(len(slow), N) self.assertEqual(len(fast), N) self.assertEqual(list(slow.keys()), list(fast.keys())) self.assertEqual(list(fast.keys()), range(N))
def query_index(self, record, resultset=None): index = self._index indexed = self._index_value for key in record.keys: if bool(key) is bool(indexed): # If we match the indexed value, check index return intersection(index, resultset) else: # Otherwise, remove from resultset or _unindex if resultset is None: return union(difference(self._unindex, index), IISet([])) else: return difference(resultset, index) return IISet()
def _apply_index(self, request, resultset=None): record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None index = self._index indexed = self._index_value for key in record.keys: if bool(key) is bool(indexed): # If we match the indexed value, check index return (intersection(index, resultset), (self.id,)) else: # Otherwise, remove from resultset or _unindex if resultset is None: return (union(difference(self._unindex, index), IISet([])), (self.id,)) else: return (difference(resultset, index), (self.id,)) return (IISet(), (self.id,))
def _apply_index(self, request, resultset=None): record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None index = self._index for key in record.keys: if key: # If True, check index return (intersection(index, resultset), (self.id, )) else: # Otherwise, remove from resultset or _unindex if resultset is None: return (union(difference(self._unindex, index), IISet([])), (self.id, )) else: return (difference(resultset, index), (self.id, )) return (IISet(), (self.id, ))
def _apply_index_with_range_dict_results(index, low_value=None, high_value=None): """ return an IISet of the rids matching the range in the index also return a dict matching each rid to the value of the index for that rid """ index_items = index.items(low_value, high_value) r_set = None r_dict = dict() for k, kset in index_items: if isinstance(kset, int): r_dict[kset] = k kset = IISet((kset,)) else: for kitem in kset: r_dict[kitem] = k r_set = union(r_set, kset) if isinstance(r_set, int): r_set = IISet((r_set, )) if r_set is None: r_set = IISet() return r_set, r_dict
def _apply_index(self, request, cid=''): record = parseIndexRequest(request, self.getId(), self.query_options) if record.keys is None: return None catalog = getToolByName(self, 'portal_catalog') geoIndex = catalog._catalog.getIndex(self.geoindex_id) geoRequest = {} geoRequest[self.geoindex_id] = { 'query': record.keys, 'range': record.range} geo_response = geoIndex._apply_index(geoRequest, raw=True) paths = {} for item in geo_response: paths[int(item['id'])] = item['properties']['path'] rolesIndex = catalog._catalog.getIndex('allowedRolesAndUsers') user = _getAuthenticatedUser(self) perms_set = rolesIndex._apply_index( {'allowedRolesAndUsers': catalog._listAllowedRolesAndUsers(user)} )[0] r = intersection(perms_set, IISet(paths.keys())) if isinstance(r, int): r = IISet((r,)) if r is None: return IISet(), (self.getId(),) else: url_tool = getToolByName(self, 'portal_url') portal_path = url_tool.getPortalObject().getPhysicalPath() root = list(portal_path) def up(path): return '/'.join(root + path.strip('/').split('/')[:-1]) return union( r, IISet([catalog.getrid(up(paths[lid])) for lid in r]) ), (self.getId(),)
def search(self, path, default_level=0): """ path is either a string representing a relative URL or a part of a relative URL or a tuple (path,level). level >= 0 starts searching at the given level level < 0 not implemented yet """ if isinstance(path, StringType): level = default_level else: level = int(path[1]) path = path[0] comps = filter(None, path.split('/')) if len(comps) == 0: return IISet(self._unindex.keys()) results = None if level >= 0: for i, comp in enumerate(comps): if not self._index.has_key(comp): return IISet() if not self._index[comp].has_key(level + i): return IISet() results = intersection(results, self._index[comp][level + i]) else: for level in range(self._depth + 1): ids = None for i, comp in enumerate(comps): try: ids = intersection(ids, self._index[comp][level + i]) except KeyError: break else: results = union(results, ids) return results
def _apply_index_with_range_dict_results(index, low_value=None, high_value=None): """ return an IISet of the rids matching the range in the index also return a dict matching each rid to the value of the index for that rid """ index_items = index.items(low_value, high_value) r_set = None r_dict = dict() for k, kset in index_items: if isinstance(kset, int): r_dict[kset] = k kset = IISet((kset, )) else: for kitem in kset: r_dict[kitem] = k r_set = union(r_set, kset) if isinstance(r_set, int): r_set = IISet((r_set, )) if r_set is None: r_set = IISet() return r_set, r_dict
from BTrees.IIBTree import IISet, union, intersection, difference def make_choice(data, per): data_len = len(data) return [choice(data) for i in range(0, data_len * float(per) / 100.0)] for max in (500, 2500, 5000, 10000, 25000, 50000, 100000): data = range(max) for p1, p2 in ((25, 25), (25, 50), (25, 75), (25, 100), (50, 50), (50, 75), (50, 100), (75, 75), (75, 100), (100, 100)): d1 = IISet(make_choice(data, p1)) d2 = IISet(make_choice(data, p2)) ts = time.time() union(d1, d2) tu = time.time() - ts ts = time.time() intersection(d1, d2) ti = time.time() - ts ts = time.time() difference(d1, d2) td = time.time() - ts print '%6d %3d:%3d %6.6f %6.6f %6.6f' % (max, p1, p2, tu, ti, td)
opr, opr_args=opr[0], opr[1:] if opr=="range": # range search if 'min' in opr_args: lo = min(keys) else: lo = None if 'max' in opr_args: hi = max(keys) else: hi = None if hi: setlist = index.items(lo,hi) else: setlist = index.items(lo) for k, set in setlist: if type(set) is IntType: set = IISet((set,)) r = union(r, set) else: # not a range search for key in keys: set=index.get(key, None) if set is not None: if type(set) is IntType: set = IISet((set,)) r = union(r, set) if type(r) is IntType: r=IISet((r,)) if r is None: return IISet(), (id,) else: return r, (id,) def hasUniqueValuesFor(self, name):
def extendedpathindex_search(self, path, default_level=0, depth=-1, navtree=0, navtree_start=0, tmpres=None): """ path is either a string representing a relative URL or a part of a relative URL or a tuple (path,level). default_level specifies the level to use when no more specific level has been passed in with the path. level >= 0 starts searching at the given level level < 0 finds matches at *any* level depth let's you limit the results to items at most depth levels deeper than the matched path. depth == 0 means no subitems are included at all, with depth == 1 only direct children are included, etc. depth == -1, the default, returns all children at any depth. navtree is treated as a boolean; if it evaluates to True, not only the query match is returned, but also each container in the path. If depth is greater than 0, also all siblings of those containers, as well as the siblings of the match are included as well, plus *all* documents at the starting level. navtree_start limits what containers are included in a navtree search. If greater than 0, only containers (and possibly their siblings) at that level and up will be included in the resultset. """ if isinstance(path, basestring): level = default_level else: level = int(path[1]) path = path[0] if level < 0: # Search at every level, return the union of all results return multiunion( [self.search(path, level, depth, navtree, navtree_start) for level in xrange(self._depth + 1)]) comps = filter(None, path.split('/')) if navtree and depth == -1: # Navtrees don't do recursive depth = 1 # # Optimisations # pathlength = level + len(comps) - 1 if navtree and navtree_start > min(pathlength + depth, self._depth): # This navtree_start excludes all items that match the depth return IISet() if pathlength > self._depth: # Our search is for a path longer than anything in the index return IISet() if level == 0 and depth in (0, 1): # We have easy indexes for absolute paths where # we are looking for depth 0 or 1 result sets if navtree: # Optimized absolute path navtree and breadcrumbs cases result = [] add = lambda x: x is not None and result.append(x) if depth == 1: # Navtree case, all sibling elements along the path convert = multiunion index = self._index_parents else: # Breadcrumbs case, all direct elements along the path convert = IISet index = self._index_items # Collect all results along the path for i in range(len(comps), navtree_start - 1, -1): parent_path = '/' + '/'.join(comps[:i]) add(index.get(parent_path)) return convert(result) if not path.startswith('/'): path = '/' + path if depth == 0: # Specific object search res = self._index_items.get(path) return res and IISet([res]) or IISet() else: # Single depth search return self._index_parents.get(path, IISet()) # Avoid using the root set # as it is common for all objects anyway and add overhead # There is an assumption about all indexed values having the # same common base path if level == 0: indexpath = list(filter(None, self.getPhysicalPath())) minlength = min(len(indexpath), len(comps)) # Truncate path to first different element for i in xrange(minlength): if indexpath[i] != comps[i]: break level += 1 comps = comps[level:] if not comps and depth == -1: # Recursive search for everything return IISet(self._unindex) # # Core application of the indexes # pathset = None depthset = None # For limiting depth if navtree and depth > 0: # Include the elements up to the matching path depthset = multiunion([ self._index.get(None, {}).get(i, IISet()) for i in range(min(navtree_start, level), max(navtree_start, level) + 1)]) indexedcomps = enumerate(comps) if not navtree: # Optimize relative-path searches by starting with the # presumed smaller sets at the end of the path first # We can't do this for the navtree case because it needs # the bigger rootset to include siblings along the way. indexedcomps = list(indexedcomps) indexedcomps.reverse() for i, comp in indexedcomps: # Find all paths that have comp at the given level res = self._index.get(comp, {}).get(i + level) if res is None: # Non-existing path; navtree is inverse, keep going pathset = IISet() if not navtree: return pathset pathset = intersection(pathset, res) if navtree and i + level >= navtree_start: depthset = union(depthset, intersection(pathset, self._index.get(None, {}).get(i + level))) if depth >= 0: # Limit results to those that terminate within depth levels start = len(comps) - 1 if navtree: start = max(start, (navtree_start - level)) depthset = multiunion(filter(None, [depthset] + [ intersection(pathset, self._index.get(None, {}).get(i + level)) for i in xrange(start, start + depth + 1)])) if navtree or depth >= 0: return depthset return pathset
def _apply_index(self, request, resultset=None): """Apply the index to query parameters given in 'request'. The argument should be a mapping object. If the request does not contain the needed parameters, then None is returned. If the request contains a parameter with the name of the column and this parameter is either a Record or a class instance then it is assumed that the parameters of this index are passed as attribute (Note: this is the recommended way to pass parameters since Zope 2.4) Otherwise two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. The resultset argument contains the resultset, as already calculated by ZCatalog's search method. """ if not request.has_key(self._id): # 'in' doesn't work with this object return IITreeSet(self._uid2end.keys()), () start = self._get_position(request, 'start') end = self._get_position(request, 'end') used_fields = () # We don't want the events who end before the start. In other # words we want to find those evens whose end >= the start query, # or None as None means they have infinite recurrence. try: maxkey = self._end2uid.maxKey() except ValueError: # No events at all return IITreeSet(), used_fields if start is None or maxkey is None: # start is None, so we need to search right from the start; or # (amazingly) all events have infinite recurrence. # This means we must return *all* uids. start_uids = IITreeSet(self._uid2end.keys()) else: used_fields += (self.start_attr,) #start_uids = IITreeSet() start = start.utctimetuple() try: minkey = self._end2uid.minKey(start) # Events that end on exactly the same same time as the # search period start should not be included: if minkey == start: excludemin = True else: excludemin = False start_uids = multiunion(self._end2uid.values(minkey, maxkey, excludemin=excludemin)) except ValueError: # No ending events start_uids = IITreeSet() # Include open ended events, if any if self._end2uid.has_key(None): start_uids = union(start_uids, self._end2uid[None]) # XXX At this point an intersection with the resultset might be # beneficial. It would stop us from calculating the recurrence # of ids that won't be returned. It could be done after the # intersection with end_uids below as well, performance tests will tell. # We also do not want the events whose start come after the end query. # In other words, we find all events where start <= end. if end is not None: end = end.utctimetuple() try: minkey = self._start2uid.minKey() end_uids = multiunion(self._start2uid.values(minkey, end)) used_fields += (self.end_attr,) except ValueError: # No events return IITreeSet(), used_fields result = intersection(start_uids, end_uids) else: # No end specified, take all: result = start_uids return self._finalize_index(result, start, end, used_fields)
def search(self, path, default_level=0, depth=-1, navtree=0, navtree_start=0): """ path is either a string representing a relative URL or a part of a relative URL or a tuple (path,level). level >= 0 starts searching at the given level level < 0 not implemented yet """ if isinstance(path, basestring): startlevel = default_level else: startlevel = int(path[1]) path = path[0] absolute_path = isinstance(path, basestring) and path.startswith('/') comps = filter(None, path.split('/')) orig_comps = [''] + comps[:] if depth > 0: raise ValueError("Can't do depth searches anymore") if not comps: comps = ['dmd'] startlevel = 1 else: if comps[0] == getCSEConf().get('virtualroot', '').replace('/', ''): comps = comps[1:] if comps[0] == 'zport': comps = comps[1:] if comps[0] != 'dmd': raise ValueError("Depth searches must start with 'dmd'") startlevel = len(comps) if len(comps) == 0: if depth == -1 and not navtree: return IISet(self._unindex.keys()) # Make sure that we get depth = 1 if in navtree mode # unless specified otherwise orig_depth = depth if depth == -1: depth = 0 or navtree # Optimized navtree starting with absolute path if absolute_path and navtree and depth == 1 and default_level == 0: set_list = [] # Insert root element if navtree_start >= len(orig_comps): navtree_start = 0 # create a set of parent paths to search for i in range(len(orig_comps), navtree_start, -1): parent_path = '/'.join(orig_comps[:i]) parent_path = parent_path and parent_path or '/' try: set_list.append(self._index_parents[parent_path]) except KeyError: pass return multiunion(set_list) # Optimized breadcrumbs elif absolute_path and navtree and depth == 0 and default_level == 0: item_list = IISet() # Insert root element if navtree_start >= len(orig_comps): navtree_start = 0 # create a set of parent paths to search for i in range(len(orig_comps), navtree_start, -1): parent_path = '/'.join(orig_comps[:i]) parent_path = parent_path and parent_path or '/' try: item_list.insert(self._index_items[parent_path]) except KeyError: pass return item_list # Specific object search elif absolute_path and orig_depth == 0 and default_level == 0: try: return IISet([self._index_items[path]]) except KeyError: return IISet() # Single depth search elif absolute_path and orig_depth == 1 and default_level == 0: # only get objects contained in requested folder try: return self._index_parents[path] except KeyError: return IISet() # Sitemaps, relative paths, and depth queries elif startlevel >= 0: pathset = None # Same as pathindex navset = None # For collecting siblings along the way depthset = None # For limiting depth if navtree and depth and \ self._index.has_key(None) and \ self._index[None].has_key(startlevel): navset = self._index[None][startlevel] for level in range(startlevel, startlevel + len(comps)): if level <= len(comps): comp = "/".join(comps[:level]) if (not self._index.has_key(comp) or not self._index[comp].has_key(level)): # Navtree is inverse, keep going even for # nonexisting paths if navtree: pathset = IISet() else: return IISet() else: return self._index[comp][level] if navtree and depth and \ self._index.has_key(None) and \ self._index[None].has_key(level+depth): navset = union( navset, intersection(pathset, self._index[None][level + depth])) if level - startlevel >= len(comps) or navtree: if (self._index.has_key(None) and self._index[None].has_key(level)): depthset = union( depthset, intersection(pathset, self._index[None][level])) if navtree: return union(depthset, navset) or IISet() elif depth: return depthset or IISet() else: return pathset or IISet() else: results = IISet() for level in range(0, self._depth + 1): ids = None error = 0 for cn in range(0, len(comps)): comp = comps[cn] try: ids = intersection(ids, self._index[comp][level + cn]) except KeyError: error = 1 if error == 0: results = union(results, ids) return results
def union(self, *args): from BTrees.IIBTree import union return union(*args)
def search(self, path, default_level=0, depth=-1, navtree=0, navtree_start=0): """ path is either a string representing a relative URL or a part of a relative URL or a tuple (path,level). level >= 0 starts searching at the given level level < 0 not implemented yet """ if isinstance(path, basestring): startlevel = default_level else: startlevel = int(path[1]) path = path[0] absolute_path = isinstance(path, basestring) and path.startswith('/') comps = filter(None, path.split('/')) orig_comps = [''] + comps[:] # Optimization - avoid using the root set # as it is common for all objects anyway and add overhead # There is an assumption about catalog/index having # the same container as content if default_level == 0: indexpath = list(filter(None, self.getPhysicalPath())) while min(len(indexpath), len(comps)): if indexpath[0] == comps[0]: del indexpath[0] del comps[0] startlevel += 1 else: break if len(comps) == 0: if depth == -1 and not navtree: return IISet(self._unindex.keys()) # Make sure that we get depth = 1 if in navtree mode # unless specified otherwise orig_depth = depth if depth == -1: depth = 0 or navtree # Optimized navtree starting with absolute path if absolute_path and navtree and depth == 1 and default_level==0: set_list = [] # Insert root element if navtree_start >= len(orig_comps): navtree_start = 0 # create a set of parent paths to search for i in range(len(orig_comps), navtree_start, -1): parent_path = '/'.join(orig_comps[:i]) parent_path = parent_path and parent_path or '/' try: set_list.append(self._index_parents[parent_path]) except KeyError: pass return multiunion(set_list) # Optimized breadcrumbs elif absolute_path and navtree and depth == 0 and default_level==0: item_list = IISet() # Insert root element if navtree_start >= len(orig_comps): navtree_start = 0 # create a set of parent paths to search for i in range(len(orig_comps), navtree_start, -1): parent_path = '/'.join(orig_comps[:i]) parent_path = parent_path and parent_path or '/' try: item_list.insert(self._index_items[parent_path]) except KeyError: pass return item_list # Specific object search elif absolute_path and orig_depth == 0 and default_level == 0: try: return IISet([self._index_items[path]]) except KeyError: return IISet() # Single depth search elif absolute_path and orig_depth == 1 and default_level == 0: # only get objects contained in requested folder try: return self._index_parents[path] except KeyError: return IISet() # Sitemaps, relative paths, and depth queries elif startlevel >= 0: pathset = None # Same as pathindex navset = None # For collecting siblings along the way depthset = None # For limiting depth if navtree and depth and \ self._index.has_key(None) and \ self._index[None].has_key(startlevel): navset = self._index[None][startlevel] for level in range(startlevel, startlevel+len(comps) + depth): if level-startlevel < len(comps): comp = comps[level-startlevel] if not self._index.has_key(comp) or not self._index[comp].has_key(level): # Navtree is inverse, keep going even for # nonexisting paths if navtree: pathset = IISet() else: return IISet() else: pathset = intersection(pathset, self._index[comp][level]) if navtree and depth and \ self._index.has_key(None) and \ self._index[None].has_key(level+depth): navset = union(navset, intersection(pathset, self._index[None][level+depth])) if level-startlevel >= len(comps) or navtree: if self._index.has_key(None) and self._index[None].has_key(level): depthset = union(depthset, intersection(pathset, self._index[None][level])) if navtree: return union(depthset, navset) or IISet() elif depth: return depthset or IISet() else: return pathset or IISet() else: results = IISet() for level in range(0,self._depth + 1): ids = None error = 0 for cn in range(0,len(comps)): comp = comps[cn] try: ids = intersection(ids,self._index[comp][level+cn]) except KeyError: error = 1 if error==0: results = union(results,ids) return results
def extendedpathindex_search(self, path, default_level=0, depth=-1, navtree=0, navtree_start=0, tmpres=None): """ path is either a string representing a relative URL or a part of a relative URL or a tuple (path,level). default_level specifies the level to use when no more specific level has been passed in with the path. level >= 0 starts searching at the given level level < 0 finds matches at *any* level depth let's you limit the results to items at most depth levels deeper than the matched path. depth == 0 means no subitems are included at all, with depth == 1 only direct children are included, etc. depth == -1, the default, returns all children at any depth. navtree is treated as a boolean; if it evaluates to True, not only the query match is returned, but also each container in the path. If depth is greater than 0, also all siblings of those containers, as well as the siblings of the match are included as well, plus *all* documents at the starting level. navtree_start limits what containers are included in a navtree search. If greater than 0, only containers (and possibly their siblings) at that level and up will be included in the resultset. """ if isinstance(path, basestring): level = default_level else: level = int(path[1]) path = path[0] if level < 0: # Search at every level, return the union of all results return multiunion([ self.search(path, level, depth, navtree, navtree_start) for level in xrange(self._depth + 1) ]) comps = filter(None, path.split('/')) if navtree and depth == -1: # Navtrees don't do recursive depth = 1 # # Optimisations # pathlength = level + len(comps) - 1 if navtree and navtree_start > min(pathlength + depth, self._depth): # This navtree_start excludes all items that match the depth return IISet() if pathlength > self._depth: # Our search is for a path longer than anything in the index return IISet() if level == 0 and depth in (0, 1): # We have easy indexes for absolute paths where # we are looking for depth 0 or 1 result sets if navtree: # Optimized absolute path navtree and breadcrumbs cases result = [] add = lambda x: x is not None and result.append(x) if depth == 1: # Navtree case, all sibling elements along the path convert = multiunion index = self._index_parents else: # Breadcrumbs case, all direct elements along the path convert = IISet index = self._index_items # Collect all results along the path for i in range(len(comps), navtree_start - 1, -1): parent_path = '/' + '/'.join(comps[:i]) add(index.get(parent_path)) return convert(result) if not path.startswith('/'): path = '/' + path if depth == 0: # Specific object search res = self._index_items.get(path) return res and IISet([res]) or IISet() else: # Single depth search return self._index_parents.get(path, IISet()) # Avoid using the root set # as it is common for all objects anyway and add overhead # There is an assumption about all indexed values having the # same common base path if level == 0: indexpath = list(filter(None, self.getPhysicalPath())) minlength = min(len(indexpath), len(comps)) # Truncate path to first different element for i in xrange(minlength): if indexpath[i] != comps[i]: break level += 1 comps = comps[level:] if not comps and depth == -1: # Recursive search for everything return IISet(self._unindex) # # Core application of the indexes # pathset = tmpres # Same as pathindex depthset = None # For limiting depth if navtree and depth > 0: # Include the elements up to the matching path depthset = multiunion([ self._index.get(None, {}).get(i, IISet()) for i in range(min(navtree_start, level), max(navtree_start, level) + 1) ]) indexedcomps = enumerate(comps) if not navtree: # Optimize relative-path searches by starting with the # presumed smaller sets at the end of the path first # We can't do this for the navtree case because it needs # the bigger rootset to include siblings along the way. indexedcomps = list(indexedcomps) indexedcomps.reverse() for i, comp in indexedcomps: # Find all paths that have comp at the given level res = self._index.get(comp, {}).get(i + level) if res is None: # Non-existing path; navtree is inverse, keep going pathset = IISet() if not navtree: return pathset pathset = intersection(pathset, res) if navtree and i + level >= navtree_start: depthset = union( depthset, intersection(pathset, self._index.get(None, {}).get(i + level))) if depth >= 0: # Limit results to those that terminate within depth levels start = len(comps) - 1 if navtree: start = max(start, (navtree_start - level)) depthset = multiunion( filter(None, [depthset] + [ intersection(pathset, self._index.get(None, {}).get(i + level)) for i in xrange(start, start + depth + 1) ])) if navtree or depth >= 0: return depthset return pathset
def search(self, path, default_level=0, depth=-1, navtree=0, navtree_start=0): """ path is either a string representing a relative URL or a part of a relative URL or a tuple (path,level). level >= 0 starts searching at the given level level < 0 not implemented yet """ if isinstance(path, basestring): startlevel = default_level else: startlevel = int(path[1]) path = path[0] absolute_path = isinstance(path, basestring) and path.startswith('/') comps = filter(None, path.split('/')) orig_comps = [''] + comps[:] # Optimization - avoid using the root set # as it is common for all objects anyway and add overhead # There is an assumption about catalog/index having # the same container as content if default_level == 0: indexpath = list(filter(None, self.getPhysicalPath())) while min(len(indexpath), len(comps)): if indexpath[0] == comps[0]: del indexpath[0] del comps[0] startlevel += 1 else: break if len(comps) == 0: if depth == -1 and not navtree: return IISet(self._unindex.keys()) # Make sure that we get depth = 1 if in navtree mode # unless specified otherwise orig_depth = depth if depth == -1: depth = 0 or navtree # Optimized navtree starting with absolute path if absolute_path and navtree and depth == 1 and default_level == 0: set_list = [] # Insert root element if navtree_start >= len(orig_comps): navtree_start = 0 # create a set of parent paths to search for i in range(len(orig_comps), navtree_start, -1): parent_path = '/'.join(orig_comps[:i]) parent_path = parent_path and parent_path or '/' try: set_list.append(self._index_parents[parent_path]) except KeyError: pass return multiunion(set_list) # Optimized breadcrumbs elif absolute_path and navtree and depth == 0 and default_level == 0: item_list = IISet() # Insert root element if navtree_start >= len(orig_comps): navtree_start = 0 # create a set of parent paths to search for i in range(len(orig_comps), navtree_start, -1): parent_path = '/'.join(orig_comps[:i]) parent_path = parent_path and parent_path or '/' try: item_list.insert(self._index_items[parent_path]) except KeyError: pass return item_list # Specific object search elif absolute_path and orig_depth == 0 and default_level == 0: try: return IISet([self._index_items[path]]) except KeyError: return IISet() # Single depth search elif absolute_path and orig_depth == 1 and default_level == 0: # only get objects contained in requested folder try: return self._index_parents[path] except KeyError: return IISet() # Sitemaps, relative paths, and depth queries elif startlevel >= 0: pathset = None # Same as pathindex navset = None # For collecting siblings along the way depthset = None # For limiting depth if navtree and depth and \ self._index.has_key(None) and \ self._index[None].has_key(startlevel): navset = self._index[None][startlevel] for level in range(startlevel, startlevel + len(comps) + depth): if level - startlevel < len(comps): comp = comps[level - startlevel] if not self._index.has_key( comp) or not self._index[comp].has_key(level): # Navtree is inverse, keep going even for # nonexisting paths if navtree: pathset = IISet() else: return IISet() else: pathset = intersection(pathset, self._index[comp][level]) if navtree and depth and \ self._index.has_key(None) and \ self._index[None].has_key(level+depth): navset = union( navset, intersection(pathset, self._index[None][level + depth])) if level - startlevel >= len(comps) or navtree: if self._index.has_key(None) and self._index[None].has_key( level): depthset = union( depthset, intersection(pathset, self._index[None][level])) if navtree: return union(depthset, navset) or IISet() elif depth: return depthset or IISet() else: return pathset or IISet() else: results = IISet() for level in range(0, self._depth + 1): ids = None error = 0 for cn in range(0, len(comps)): comp = comps[cn] try: ids = intersection(ids, self._index[comp][level + cn]) except KeyError: error = 1 if error == 0: results = union(results, ids) return results
def search(self, path, default_level=0, depth=-1, navtree=0, navtree_start=0): """ path is either a string representing a relative URL or a part of a relative URL or a tuple (path,level). level >= 0 starts searching at the given level level < 0 not implemented yet """ if isinstance(path, basestring): startlevel = default_level else: startlevel = int(path[1]) path = path[0] absolute_path = isinstance(path, basestring) and path.startswith('/') comps = filter(None, path.split('/')) orig_comps = [''] + comps[:] if depth > 0: raise ValueError, "Can't do depth searches anymore" if not comps: comps = ['dmd'] startlevel = 1 elif comps[0] == 'zport': comps = comps[1:] elif comps[0] != 'dmd': raise ValueError, "Depth searches must start with 'dmd'" startlevel = len(comps) #startlevel = len(comps)-1 if len(comps) > 1 else 1 if len(comps) == 0: if depth == -1 and not navtree: return IISet(self._unindex.keys()) # Make sure that we get depth = 1 if in navtree mode # unless specified otherwise orig_depth = depth if depth == -1: depth = 0 or navtree # Optimized navtree starting with absolute path if absolute_path and navtree and depth == 1 and default_level==0: set_list = [] # Insert root element if navtree_start >= len(orig_comps): navtree_start = 0 # create a set of parent paths to search for i in range(len(orig_comps), navtree_start, -1): parent_path = '/'.join(orig_comps[:i]) parent_path = parent_path and parent_path or '/' try: set_list.append(self._index_parents[parent_path]) except KeyError: pass return multiunion(set_list) # Optimized breadcrumbs elif absolute_path and navtree and depth == 0 and default_level==0: item_list = IISet() # Insert root element if navtree_start >= len(orig_comps): navtree_start = 0 # create a set of parent paths to search for i in range(len(orig_comps), navtree_start, -1): parent_path = '/'.join(orig_comps[:i]) parent_path = parent_path and parent_path or '/' try: item_list.insert(self._index_items[parent_path]) except KeyError: pass return item_list # Specific object search elif absolute_path and orig_depth == 0 and default_level == 0: try: return IISet([self._index_items[path]]) except KeyError: return IISet() # Single depth search elif absolute_path and orig_depth == 1 and default_level == 0: # only get objects contained in requested folder try: return self._index_parents[path] except KeyError: return IISet() # Sitemaps, relative paths, and depth queries elif startlevel >= 0: pathset = None # Same as pathindex navset = None # For collecting siblings along the way depthset = None # For limiting depth if navtree and depth and \ self._index.has_key(None) and \ self._index[None].has_key(startlevel): navset = self._index[None][startlevel] for level in range(startlevel, startlevel+len(comps)): if level <= len(comps): comp = "/".join(comps[:level]) if (not self._index.has_key(comp) or not self._index[comp].has_key(level)): # Navtree is inverse, keep going even for # nonexisting paths if navtree: pathset = IISet() else: return IISet() else: return self._index[comp][level] if navtree and depth and \ self._index.has_key(None) and \ self._index[None].has_key(level+depth): navset = union(navset, intersection(pathset, self._index[None][level+depth])) if level-startlevel >= len(comps) or navtree: if (self._index.has_key(None) and self._index[None].has_key(level)): depthset = union(depthset, intersection(pathset, self._index[None][level])) if navtree: return union(depthset, navset) or IISet() elif depth: return depthset or IISet() else: return pathset or IISet() else: results = IISet() for level in range(0,self._depth + 1): ids = None error = 0 for cn in range(0,len(comps)): comp = comps[cn] try: ids = intersection(ids,self._index[comp][level+cn]) except KeyError: error = 1 if error==0: results = union(results,ids) return results
from BTrees.IIBTree import IISet, union, intersection, difference def make_choice(data, per): data_len = len(data) return [choice(data) for i in range(0, data_len*float(per)/100.0)] for max in (500, 2500, 5000, 10000, 25000, 50000, 100000): data = range(max) for p1,p2 in ((25,25), (25,50), (25,75), (25,100), (50,50), (50,75), (50,100), (75,75), (75,100), (100,100)): d1 = IISet(make_choice(data, p1)) d2 = IISet(make_choice(data, p2)) ts = time.time() union(d1, d2) tu = time.time() - ts ts = time.time() intersection(d1, d2) ti = time.time() - ts ts = time.time() difference(d1, d2) td = time.time() - ts print '%6d %3d:%3d %6.6f %6.6f %6.6f' % (max, p1, p2, tu, ti, td)