def search(self, query, limit=5000, sortedby=None, reverse=False): """Runs the query represented by the query object and returns a Results object. See the help for :meth:`~Searcher.find` for information on the parameters. :param query: a :class:`whoosh.query.Query` object. :rtype: :class:`Results` """ ixreader = self.ixreader t = now() if sortedby is not None: if isinstance(sortedby, basestring): sorter = scoring.FieldSorter(sortedby) elif isinstance(sortedby, (list, tuple)): sorter = scoring.MultiFieldSorter( [FieldSorter(fn) for fn in sortedby]) elif isinstance(sortedby, Sorter): sorter = sortedby else: raise ValueError( "sortedby argument must be a string, list, or Sorter (%r)" % sortedby) scored_list = sorter.order(self, query.docs(self), reverse=reverse) scores = None docvector = BitVector(ixreader.doc_count_all(), source=scored_list) if len(scored_list) > limit: scored_list = list(scored_list)[:limit] else: # Sort by scores topdocs = TopDocs(limit, ixreader.doc_count_all()) final = self.weighting.final topdocs.add_all((docnum, final(self, docnum, score)) for docnum, score in query.doc_scores(self)) best = topdocs.best() if best: # topdocs.best() returns a list like # [(docnum, score), (docnum, score), ... ] # This unpacks that into two lists: docnums and scores scored_list, scores = zip(*topdocs.best()) else: scored_list = [] scores = [] docvector = topdocs.docs t = now() - t return Results(self, query, scored_list, docvector, runtime=t, scores=scores)
def search(self, query, limit = 5000, sortedby = None, reverse = False): """Runs the query represented by the query object and returns a Results object. See the help for :meth:`~Searcher.find` for information on the parameters. :param query: a :class:`whoosh.query.Query` object. :rtype: :class:`Results` """ ixreader = self.ixreader t = now() if sortedby is not None: if isinstance(sortedby, basestring): sorter = scoring.FieldSorter(sortedby) elif isinstance(sortedby, (list, tuple)): sorter = scoring.MultiFieldSorter([FieldSorter(fn) for fn in sortedby]) elif isinstance(sortedby, Sorter): sorter = sortedby else: raise ValueError("sortedby argument must be a string, list, or Sorter (%r)" % sortedby) scored_list = sorter.order(self, query.docs(self), reverse = reverse) scores = None docvector = BitVector(ixreader.doc_count_all(), source = scored_list) if len(scored_list) > limit: scored_list = list(scored_list)[:limit] else: # Sort by scores topdocs = TopDocs(limit, ixreader.doc_count_all()) final = self.weighting.final topdocs.add_all((docnum, final(self, docnum, score)) for docnum, score in query.doc_scores(self)) best = topdocs.best() if best: # topdocs.best() returns a list like # [(docnum, score), (docnum, score), ... ] # This unpacks that into two lists: docnums and scores scored_list, scores = zip(*topdocs.best()) else: scored_list = [] scores = [] docvector = topdocs.docs t = now() - t return Results(self, query, scored_list, docvector, runtime = t, scores = scores)
def sort_query(self, query, sortedby, reverse=False): if isinstance(sortedby, basestring): sorter = self._field_sorter(sortedby) elif isinstance(sortedby, (list, tuple)): sorter = scoring.MultiFieldSorter([self._field_sorter(fname) for fname in sortedby]) elif isinstance(sortedby, Sorter): sorter = sortedby else: raise ValueError("sortedby argument (%R) must be a string, list," " or Sorter" % sortedby) t = now() sorted_docs = list(sorter.order(self, query.docs(self), reverse=reverse)) runtime = now() - t return Results(self, query, sorted_docs, None, runtime)
def search(self, query, limit = 5000, weighting = None, sortedby = None, reverse = False): """Runs the query represented by the query object and returns a Results object. :query: a query.Query object representing the search query. You can translate a query string into a query object with e.g. qparser.QueryParser. :limit: the maximum number of documents to score. If you're only interested in the top N documents, you can set limit=N to limit the scoring for a faster search. :weighting: if this parameter is not None, use this weighting object to score the results instead of the default. :sortedby: if this parameter is not None, the results are sorted instead of scored. If this value is a string, the results are sorted by the field named in the string. If this value is a list or tuple, it is assumed to be a sequence of strings and the results are sorted by the fieldnames in the sequence. Otherwise 'sortedby' should be a scoring.Sorter object. The fields you want to sort by must be indexed. For example, to sort the results by the 'path' field:: searcher.search(q, sortedby = "path") To sort the results by the 'path' field and then the 'category' field:: searcher.search(q, sortedby = ("path", "category")) To use a sorting object:: searcher.search(q, sortedby = scoring.NullSorter) :reverse: if 'sortedby' is not None, this reverses the direction of the sort. """ doc_reader = self.doc_reader t = time.time() if sortedby is not None: if isinstance(sortedby, basestring): sortedby = scoring.FieldSorter(sortedby) elif isinstance(sortedby, (list, tuple)): sortedby = scoring.MultiFieldSorter(sortedby) elif callable(sortedby): sortedby = sortedby() scored_list = sortedby.order(self, query.docs(self), reverse = reverse) docvector = BitVector(doc_reader.doc_count_all(), source = scored_list) if len(scored_list) > limit: scored_list = list(scored_list)[:limit] else: # Sort by scores topdocs = TopDocs(limit, doc_reader.doc_count_all()) topdocs.add_all(query.doc_scores(self, weighting = weighting or self.weighting)) scored_list = topdocs.best() docvector = topdocs.docs t = time.time() - t return Results(self, query, scored_list, docvector, runtime = t)
def docnums(self, query): """Returns a set-like object containing the document numbers that match the given query. """ return set(query.docs(self))
def search(self, query, limit=5000, sortedby=None, reverse=False, minscore=0.0001): """Runs the query represented by the ``query`` object and returns a Results object. :param query: a :class:`whoosh.query.Query` object. :param limit: the maximum number of documents to score. If you're only interested in the top N documents, you can set limit=N to limit the scoring for a faster search. :param sortedby: if this parameter is not None, the results are sorted instead of scored. If this value is a string, the results are sorted by the field named in the string. If this value is a list or tuple, it is assumed to be a sequence of strings and the results are sorted by the fieldnames in the sequence. Otherwise 'sortedby' should be a scoring.Sorter object. The fields you want to sort by must be indexed. For example, to sort the results by the 'path' field:: searcher.find(q, sortedby = "path") To sort the results by the 'path' field and then the 'category' field:: searcher.find(q, sortedby = ("path", "category")) To use a sorting object:: searcher.find(q, sortedby = scoring.FieldSorter("path", key=mykeyfn)) Using a string or tuple simply instantiates a :class:`whoosh.scoring.FieldSorter` or :class:`whoosh.scoring.MultiFieldSorter` object for you. To get a custom sort order, instantiate your own ``FieldSorter`` with a ``key`` argument, or write a custom :class:`whoosh.scoring.Sorter` class. FieldSorter and MultiFieldSorter cache the document order, using 4 bytes times the number of documents in the index, and taking time to cache. To increase performance, instantiate your own sorter and re-use it (but remember you need to recreate it if the index changes). :param reverse: if ``sortedby`` is not None, this reverses the direction of the sort. :param minscore: the minimum score to include in the results. :rtype: :class:`Results` """ ixreader = self.ixreader t = now() if sortedby is not None: if isinstance(sortedby, basestring): sorter = scoring.FieldSorter(sortedby) elif isinstance(sortedby, (list, tuple)): sorter = scoring.MultiFieldSorter([FieldSorter(fn) for fn in sortedby]) elif isinstance(sortedby, Sorter): sorter = sortedby else: raise ValueError("sortedby argument must be a string, list, or Sorter (%r)" % sortedby) scored_list = sorter.order(self, query.docs(self), reverse=reverse) scores = None docvector = BitVector(ixreader.doc_count_all(), source=scored_list) if len(scored_list) > limit: scored_list = list(scored_list)[:limit] else: # Sort by scores topdocs = TopDocs(limit, ixreader.doc_count_all()) final = self.weighting.final topdocs.add_all( ((docnum, final(self, docnum, score)) for docnum, score in query.doc_scores(self)), minscore ) best = topdocs.best() if best: # topdocs.best() returns a list like # [(docnum, score), (docnum, score), ... ] # This unpacks that into two lists: docnums and scores scored_list, scores = zip(*topdocs.best()) else: scored_list = [] scores = [] docvector = topdocs.docs t = now() - t return Results(self, query, scored_list, docvector, runtime=t, scores=scores)
def search(self, query, limit=5000, sortedby=None, reverse=False, minscore=0.0001): """Runs the query represented by the ``query`` object and returns a Results object. :param query: a :class:`whoosh.query.Query` object. :param limit: the maximum number of documents to score. If you're only interested in the top N documents, you can set limit=N to limit the scoring for a faster search. :param sortedby: if this parameter is not None, the results are sorted instead of scored. If this value is a string, the results are sorted by the field named in the string. If this value is a list or tuple, it is assumed to be a sequence of strings and the results are sorted by the fieldnames in the sequence. Otherwise 'sortedby' should be a scoring.Sorter object. The fields you want to sort by must be indexed. For example, to sort the results by the 'path' field:: searcher.find(q, sortedby = "path") To sort the results by the 'path' field and then the 'category' field:: searcher.find(q, sortedby = ("path", "category")) To use a sorting object:: searcher.find(q, sortedby = scoring.FieldSorter("path", key=mykeyfn)) Using a string or tuple simply instantiates a :class:`whoosh.scoring.FieldSorter` or :class:`whoosh.scoring.MultiFieldSorter` object for you. To get a custom sort order, instantiate your own ``FieldSorter`` with a ``key`` argument, or write a custom :class:`whoosh.scoring.Sorter` class. FieldSorter and MultiFieldSorter cache the document order, using 4 bytes times the number of documents in the index, and taking time to cache. To increase performance, instantiate your own sorter and re-use it (but remember you need to recreate it if the index changes). :param reverse: if ``sortedby`` is not None, this reverses the direction of the sort. :param minscore: the minimum score to include in the results. :rtype: :class:`Results` """ ixreader = self.ixreader t = now() if sortedby is not None: if isinstance(sortedby, basestring): sorter = scoring.FieldSorter(sortedby) elif isinstance(sortedby, (list, tuple)): sorter = scoring.MultiFieldSorter( [FieldSorter(fn) for fn in sortedby]) elif isinstance(sortedby, Sorter): sorter = sortedby else: raise ValueError( "sortedby argument must be a string, list, or Sorter (%r)" % sortedby) scored_list = sorter.order(self, query.docs(self), reverse=reverse) scores = None docvector = BitVector(ixreader.doc_count_all(), source=scored_list) if len(scored_list) > limit: scored_list = list(scored_list)[:limit] else: # Sort by scores topdocs = TopDocs(limit, ixreader.doc_count_all()) final = self.weighting.final topdocs.add_all(((docnum, final(self, docnum, score)) for docnum, score in query.doc_scores(self)), minscore) best = topdocs.best() if best: # topdocs.best() returns a list like # [(docnum, score), (docnum, score), ... ] # This unpacks that into two lists: docnums and scores scored_list, scores = zip(*topdocs.best()) else: scored_list = [] scores = [] docvector = topdocs.docs t = now() - t return Results(self, query, scored_list, docvector, runtime=t, scores=scores)
def search(self, query, limit=5000, weighting=None, sortedby=None, reverse=False): """Runs the query represented by the query object and returns a Results object. :query: a query.Query object representing the search query. You can translate a query string into a query object with e.g. qparser.QueryParser. :limit: the maximum number of documents to score. If you're only interested in the top N documents, you can set limit=N to limit the scoring for a faster search. :weighting: if this parameter is not None, use this weighting object to score the results instead of the default. :sortedby: if this parameter is not None, the results are sorted instead of scored. If this value is a string, the results are sorted by the field named in the string. If this value is a list or tuple, it is assumed to be a sequence of strings and the results are sorted by the fieldnames in the sequence. Otherwise 'sortedby' should be a scoring.Sorter object. The fields you want to sort by must be indexed. For example, to sort the results by the 'path' field:: searcher.search(q, sortedby = "path") To sort the results by the 'path' field and then the 'category' field:: searcher.search(q, sortedby = ("path", "category")) To use a sorting object:: searcher.search(q, sortedby = scoring.NullSorter) :reverse: if 'sortedby' is not None, this reverses the direction of the sort. """ doc_reader = self.doc_reader t = time.time() if sortedby is not None: if isinstance(sortedby, basestring): sortedby = scoring.FieldSorter(sortedby) elif isinstance(sortedby, (list, tuple)): sortedby = scoring.MultiFieldSorter(sortedby) elif callable(sortedby): sortedby = sortedby() scored_list = sortedby.order(self, query.docs(self), reverse=reverse) docvector = BitVector(doc_reader.doc_count_all(), source=scored_list) if len(scored_list) > limit: scored_list = list(scored_list)[:limit] else: # Sort by scores topdocs = TopDocs(limit, doc_reader.doc_count_all()) topdocs.add_all( query.doc_scores(self, weighting=weighting or self.weighting)) scored_list = topdocs.best() docvector = topdocs.docs t = time.time() - t return Results(self, query, scored_list, docvector, runtime=t)