Example #1
0
def searchIndices(indices, query, period, lastId=None, reverse=False, fields=None, limit=100):
    """
    Search the specified indices for events matching the specified query.

    :param indices: A list of indices to search.
    :type indices: A list of objects implementing :class:`terane.bier.index.IIndex`
    :param query: The programmatic query to use for searching the indices.
    :type query: An object implementing :class:`terane.bier.searching.IQuery`
    :param period: The period within which the search is constrained.
    :type period: :class:`terane.bier.searching.Period`
    :param lastId: The real key to start iterating from.
    :type lastId: :class:`terane.bier.evid.EVID`
    :param reverse: If True, then reverse the order of events.
    :type reverse: bool
    :param fields: If not None, then only return the specified fields of each event. 
    :type fields: list or None
    :param limit: Only returned the specified number of events.
    :type limit: int
    :returns: A CooperativeTask which contains a Deferred and manages the search task.
    :rtype: :class:`twisted.internet.task.CooperativeTask`
    """
    start = time.time()
    # determine the evids to use as start and end keys
    if reverse == False:
        startId, endId = period.getRange()
    else:
        endId, startId = period.getRange()
    if lastId != None:
        if not lastId in period:
            raise SearcherError("lastId %s is not within period" % lastId)
        startId = lastId
    # search each index separately, then merge the results
    try:
        searchers = []
        postingLists = []
        for index in indices:
            if not IIndex.providedBy(index):
                raise TypeError("index does not implement IIndex")
            # we create a copy of the original query, which can possibly be optimized
            # with index-specific knowledge.
            _query = copy.deepcopy(query)
            try:
                _query = _query.optimizeMatcher(index)
            except NotImplementedError, e:
                raise SearcherError(str(e))
            logger.debug("optimized query for index '%s': %s" % (index.name, str(_query)))
            # if the query optimized out entirely, then skip to the next index
            if _query == None:
                continue
            # get the posting list to iterate through
            searcher = index.newSearcher()
            if not ISearcher.providedBy(searcher):
                raise TypeError("searcher does not implement ISearcher")
            postingList = _query.iterMatches(searcher, startId, endId)
            if not IPostingList.providedBy(postingList):
                raise TypeError("posting list does not implement IPostingList")
            searchers.append(searcher)
            postingLists.append(postingList)
        # return a cooperative task
        return cooperate(ResultSet(searchers, postingLists, start, reverse, fields, limit))
Example #2
0
 def next(self):
     start = time.time()
     searchers = []
     postingLists = []
     try:
         # get a searcher and posting list for each index
         for index in self._indices:
             # we create a copy of the original query, which can possibly be optimized
             # with index-specific knowledge.
             query = copy.deepcopy(self._query)
             # get the posting list to iterate through
             searcher = yield index.newSearcher()
             if not ISearcher.providedBy(searcher):
                 raise TypeError("searcher does not implement ISearcher")
             query = yield query.optimizeMatcher(searcher)
             logger.debug("optimized query for index '%s': %s" % (index.name,str(query)))
             # if the query optimized out entirely, then skip to the next index
             if query == None:
                 yield searcher.close()
                 continue
             postingList = yield query.iterMatches(searcher, self._startId, self._endId)
             if not IPostingList.providedBy(postingList):
                 raise TypeError("posting list does not implement IPostingList")
             searchers.append(searcher)
             postingLists.append(postingList)
         if len(postingLists) == 0:
             raise StopIteration()
         # loop forever until we reach the search limit, we exhaust all of our
         # posting lists, or we encounter an exception
         currPostings = [(None,None,None) for i in range(len(postingLists))]
         smallestList = 0
         lastId = None
         compar = (lambda x,y: cmp(y,x)) if self._reverse else cmp 
         while True:
             # if we have reached our limit
             if len(self.events) == self._limit:
                 self.runtime = time.time() - start
                 raise StopIteration()
             # check each child iter for the lowest evid
             for currList in range(len(postingLists)):
                 if currList == None:
                     smallestList = 0
                 # if the postingList is None, then its closed
                 if postingLists[currList] == None:
                     # FIXME: close the posting list and searcher
                     continue
                 # if current posting for this posting list was not consumed
                 if currPostings[currList] != (None,None,None):
                     continue
                 # otherwise get the next posting from the posting list
                 currPostings[currList] = yield postingLists[currList].nextPosting()
                 # if the next posting for this posting list is (None,None,None),
                 # then we are done with this posting list
                 if currPostings[currList] == (None,None,None):
                     postingList = postingLists[currList]
                     yield postingList.close()
                     postingLists[currList] = None
                     continue
                 # if the evid equals the last evid returned, then ignore it
                 if lastId != None and currPostings[currList][0] == lastId:
                     currPostings[currList] = (None,None,None)
                     continue
                 # we don't compare the first evid with itself
                 if currList == 0:
                     continue
                 # if the evid is the current smallest evid, then remember it
                 if (currPostings[smallestList] == (None,None,None) or
                   compar(currPostings[currList][0], currPostings[smallestList][0]) < 0):
                     smallestList = currList
             # get the next posting
             currList = None
             evid,_,store = currPostings[smallestList]
             # stop iterating if there are no more results
             if evid == None:
                 self.runtime = time.time() - start
                 raise StopIteration()
             # remember the last evid
             lastId = evid
             # forget the evid so we don't return it again
             currPostings[smallestList] = (None,None,None)
             # retrieve the event
             if not IEventStore.providedBy(store):
                 raise TypeError("store does not implement IEventStore")
             event = yield store.getEvent(evid)
             defaultfield, defaultvalue, fields = event
             if defaultfield not in self.fields:
                 self.fields.append(defaultfield)
             # keep a record of all field names found in the results
             for fieldname in fields.keys():
                 if fieldname not in self.fields:
                     self.fields.append(fieldname)
             # filter out unwanted fields
             if self._fields != None:
                 fields = dict([(k,v) for k,v in fields.items() if k in self._fields])
             self.events.append(((evid.ts,evid.offset), defaultfield, defaultvalue, fields))
             logger.trace("added event %s to results" % evid)
     finally:
         for postingList in postingLists:
             if postingList != None:
                 yield postingList.close()
         for searcher in searchers:
             yield searcher.close()
Example #3
0
 def next(self):
     start = time.time()
     searchers = []
     postingLists = []
     try:
         # get a searcher and posting list for each index
         for index in self._indices:
             # we create a copy of the original query, which can possibly be optimized
             # with index-specific knowledge.
             query = copy.deepcopy(self._query)
             # get the posting list to iterate through
             searcher = yield index.newSearcher()
             if not ISearcher.providedBy(searcher):
                 raise TypeError("searcher does not implement ISearcher")
             query = yield query.optimizeMatcher(searcher)
             logger.debug("optimized query for index '%s': %s" %
                          (index.name, str(query)))
             # if the query optimized out entirely, then skip to the next index
             if query == None:
                 yield searcher.close()
                 continue
             postingList = yield query.iterMatches(searcher, self._startId,
                                                   self._endId)
             if not IPostingList.providedBy(postingList):
                 raise TypeError(
                     "posting list does not implement IPostingList")
             searchers.append(searcher)
             postingLists.append(postingList)
         if len(postingLists) == 0:
             raise StopIteration()
         # loop forever until we reach the search limit, we exhaust all of our
         # posting lists, or we encounter an exception
         currPostings = [(None, None, None)
                         for i in range(len(postingLists))]
         smallestList = 0
         lastId = None
         compar = (lambda x, y: cmp(y, x)) if self._reverse else cmp
         while True:
             # if we have reached our limit
             if len(self.events) == self._limit:
                 self.runtime = time.time() - start
                 raise StopIteration()
             # check each child iter for the lowest evid
             for currList in range(len(postingLists)):
                 if currList == None:
                     smallestList = 0
                 # if the postingList is None, then its closed
                 if postingLists[currList] == None:
                     # FIXME: close the posting list and searcher
                     continue
                 # if current posting for this posting list was not consumed
                 if currPostings[currList] != (None, None, None):
                     continue
                 # otherwise get the next posting from the posting list
                 currPostings[currList] = yield postingLists[
                     currList].nextPosting()
                 # if the next posting for this posting list is (None,None,None),
                 # then we are done with this posting list
                 if currPostings[currList] == (None, None, None):
                     postingList = postingLists[currList]
                     yield postingList.close()
                     postingLists[currList] = None
                     continue
                 # if the evid equals the last evid returned, then ignore it
                 if lastId != None and currPostings[currList][0] == lastId:
                     currPostings[currList] = (None, None, None)
                     continue
                 # we don't compare the first evid with itself
                 if currList == 0:
                     continue
                 # if the evid is the current smallest evid, then remember it
                 if (currPostings[smallestList] == (None, None, None)
                         or compar(currPostings[currList][0],
                                   currPostings[smallestList][0]) < 0):
                     smallestList = currList
             # get the next posting
             currList = None
             evid, _, store = currPostings[smallestList]
             # stop iterating if there are no more results
             if evid == None:
                 self.runtime = time.time() - start
                 raise StopIteration()
             # remember the last evid
             lastId = evid
             # forget the evid so we don't return it again
             currPostings[smallestList] = (None, None, None)
             # retrieve the event
             if not IEventStore.providedBy(store):
                 raise TypeError("store does not implement IEventStore")
             event = yield store.getEvent(evid)
             defaultfield, defaultvalue, fields = event
             if defaultfield not in self.fields:
                 self.fields.append(defaultfield)
             # keep a record of all field names found in the results
             for fieldname in fields.keys():
                 if fieldname not in self.fields:
                     self.fields.append(fieldname)
             # filter out unwanted fields
             if self._fields != None:
                 fields = dict([(k, v) for k, v in fields.items()
                                if k in self._fields])
             self.events.append(((evid.ts, evid.offset), defaultfield,
                                 defaultvalue, fields))
             logger.trace("added event %s to results" % evid)
     finally:
         for postingList in postingLists:
             if postingList != None:
                 yield postingList.close()
         for searcher in searchers:
             yield searcher.close()