Example #1
0
    def search(self, query, limit=5000, sortedby=None, reverse=False):
        """Runs the query represented by the query object and returns a Results object.
        
        See the help for :meth:`~Searcher.find` for information on the parameters.
        
        :param query: a :class:`whoosh.query.Query` object.
        :rtype: :class:`Results`
        """

        ixreader = self.ixreader

        t = now()
        if sortedby is not None:
            if isinstance(sortedby, basestring):
                sorter = scoring.FieldSorter(sortedby)
            elif isinstance(sortedby, (list, tuple)):
                sorter = scoring.MultiFieldSorter(
                    [FieldSorter(fn) for fn in sortedby])
            elif isinstance(sortedby, Sorter):
                sorter = sortedby
            else:
                raise ValueError(
                    "sortedby argument must be a string, list, or Sorter (%r)"
                    % sortedby)

            scored_list = sorter.order(self, query.docs(self), reverse=reverse)
            scores = None
            docvector = BitVector(ixreader.doc_count_all(), source=scored_list)
            if len(scored_list) > limit:
                scored_list = list(scored_list)[:limit]
        else:
            # Sort by scores
            topdocs = TopDocs(limit, ixreader.doc_count_all())
            final = self.weighting.final
            topdocs.add_all((docnum, final(self, docnum, score))
                            for docnum, score in query.doc_scores(self))

            best = topdocs.best()
            if best:
                # topdocs.best() returns a list like
                # [(docnum, score), (docnum, score), ... ]
                # This unpacks that into two lists: docnums and scores
                scored_list, scores = zip(*topdocs.best())
            else:
                scored_list = []
                scores = []

            docvector = topdocs.docs
        t = now() - t

        return Results(self,
                       query,
                       scored_list,
                       docvector,
                       runtime=t,
                       scores=scores)
Example #2
0
 def search(self, query, limit = 5000, sortedby = None, reverse = False):
     """Runs the query represented by the query object and returns a Results object.
     
     See the help for :meth:`~Searcher.find` for information on the parameters.
     
     :param query: a :class:`whoosh.query.Query` object.
     :rtype: :class:`Results`
     """
     
     ixreader = self.ixreader
     
     t = now()
     if sortedby is not None:
         if isinstance(sortedby, basestring):
             sorter = scoring.FieldSorter(sortedby)
         elif isinstance(sortedby, (list, tuple)):
             sorter = scoring.MultiFieldSorter([FieldSorter(fn) for fn in sortedby])
         elif isinstance(sortedby, Sorter):
             sorter = sortedby
         else:
             raise ValueError("sortedby argument must be a string, list, or Sorter (%r)" % sortedby)
         
         scored_list = sorter.order(self, query.docs(self), reverse = reverse)
         scores = None
         docvector = BitVector(ixreader.doc_count_all(), source = scored_list)
         if len(scored_list) > limit:
             scored_list = list(scored_list)[:limit]
     else:
         # Sort by scores
         topdocs = TopDocs(limit, ixreader.doc_count_all())
         final = self.weighting.final
         topdocs.add_all((docnum, final(self, docnum, score))
                         for docnum, score in query.doc_scores(self))
         
         best = topdocs.best()
         if best:
             # topdocs.best() returns a list like
             # [(docnum, score), (docnum, score), ... ]
             # This unpacks that into two lists: docnums and scores
             scored_list, scores = zip(*topdocs.best())
         else:
             scored_list = []
             scores = []
         
         docvector = topdocs.docs
     t = now() - t
     
     return Results(self, query, scored_list, docvector, runtime = t, scores = scores)
Example #3
0
    def sort_query(self, query, sortedby, reverse=False):
        if isinstance(sortedby, basestring):
            sorter = self._field_sorter(sortedby)
        elif isinstance(sortedby, (list, tuple)):
            sorter = scoring.MultiFieldSorter([self._field_sorter(fname)
                                               for fname in sortedby])
        elif isinstance(sortedby, Sorter):
            sorter = sortedby
        else:
            raise ValueError("sortedby argument (%R) must be a string, list,"
                             " or Sorter" % sortedby)

        t = now()
        sorted_docs = list(sorter.order(self, query.docs(self), reverse=reverse))
        runtime = now() - t
        
        return Results(self, query, sorted_docs, None, runtime)
Example #4
0
    def sort_query(self, query, sortedby, reverse=False):
        if isinstance(sortedby, basestring):
            sorter = self._field_sorter(sortedby)
        elif isinstance(sortedby, (list, tuple)):
            sorter = scoring.MultiFieldSorter([self._field_sorter(fname)
                                               for fname in sortedby])
        elif isinstance(sortedby, Sorter):
            sorter = sortedby
        else:
            raise ValueError("sortedby argument (%R) must be a string, list,"
                             " or Sorter" % sortedby)

        t = now()
        sorted_docs = list(sorter.order(self, query.docs(self), reverse=reverse))
        runtime = now() - t
        
        return Results(self, query, sorted_docs, None, runtime)
Example #5
0
 def search(self, query, limit = 5000,
            weighting = None,
            sortedby = None, reverse = False):
     """Runs the query represented by the query object and returns a Results object.
     
     :query: a query.Query object representing the search query. You can translate
         a query string into a query object with e.g. qparser.QueryParser.
     :limit: the maximum number of documents to score. If you're only interested in
         the top N documents, you can set limit=N to limit the scoring for a faster
         search.
     :weighting: if this parameter is not None, use this weighting object to score the
         results instead of the default.
     :sortedby: if this parameter is not None, the results are sorted instead of scored.
         If this value is a string, the results are sorted by the field named in the string.
         If this value is a list or tuple, it is assumed to be a sequence of strings and the
         results are sorted by the fieldnames in the sequence. Otherwise 'sortedby' should be
         a scoring.Sorter object.
         
         The fields you want to sort by must be indexed.
         
         For example, to sort the results by the 'path' field::
         
             searcher.search(q, sortedby = "path")
             
         To sort the results by the 'path' field and then the 'category' field::
             
             searcher.search(q, sortedby = ("path", "category"))
             
         To use a sorting object::
         
             searcher.search(q, sortedby = scoring.NullSorter)
     
     :reverse: if 'sortedby' is not None, this reverses the direction of the sort.
     """
     
     doc_reader = self.doc_reader
     
     t = time.time()
     if sortedby is not None:
         if isinstance(sortedby, basestring):
             sortedby = scoring.FieldSorter(sortedby)
         elif isinstance(sortedby, (list, tuple)):
             sortedby = scoring.MultiFieldSorter(sortedby)
         elif callable(sortedby):
             sortedby = sortedby()
         
         scored_list = sortedby.order(self, query.docs(self), reverse = reverse)
         docvector = BitVector(doc_reader.doc_count_all(),
                               source = scored_list)
         if len(scored_list) > limit:
             scored_list = list(scored_list)[:limit]
     else:
         # Sort by scores
         topdocs = TopDocs(limit, doc_reader.doc_count_all())
         topdocs.add_all(query.doc_scores(self, weighting = weighting or self.weighting))
         scored_list = topdocs.best()
         docvector = topdocs.docs
     t = time.time() - t
         
     return Results(self,
                    query,
                    scored_list,
                    docvector,
                    runtime = t)
Example #6
0
 def docnums(self, query):
     """Returns a set-like object containing the document numbers that
     match the given query.
     """
     
     return set(query.docs(self))
Example #7
0
    def search(self, query, limit=5000, sortedby=None, reverse=False, minscore=0.0001):
        """Runs the query represented by the ``query`` object and returns a
        Results object.
        
        :param query: a :class:`whoosh.query.Query` object.
        :param limit: the maximum number of documents to score. If you're only
            interested in the top N documents, you can set limit=N to limit the
            scoring for a faster search.
        :param sortedby: if this parameter is not None, the results are sorted
            instead of scored. If this value is a string, the results are
            sorted by the field named in the string. If this value is a list or
            tuple, it is assumed to be a sequence of strings and the results
            are sorted by the fieldnames in the sequence. Otherwise 'sortedby'
            should be a scoring.Sorter object.
            
            The fields you want to sort by must be indexed.
            
            For example, to sort the results by the 'path' field::
            
                searcher.find(q, sortedby = "path")
                
            To sort the results by the 'path' field and then the 'category'
            field::
                
                searcher.find(q, sortedby = ("path", "category"))
                
            To use a sorting object::
            
                searcher.find(q, sortedby = scoring.FieldSorter("path", key=mykeyfn))
            
            Using a string or tuple simply instantiates a
            :class:`whoosh.scoring.FieldSorter` or
            :class:`whoosh.scoring.MultiFieldSorter` object for you. To get a
            custom sort order, instantiate your own ``FieldSorter`` with a
            ``key`` argument, or write a custom :class:`whoosh.scoring.Sorter`
            class.
            
            FieldSorter and MultiFieldSorter cache the document order, using 4
            bytes times the number of documents in the index, and taking time
            to cache. To increase performance, instantiate your own sorter and
            re-use it (but remember you need to recreate it if the index
            changes).
        
        :param reverse: if ``sortedby`` is not None, this reverses the
            direction of the sort.
        :param minscore: the minimum score to include in the results.
        :rtype: :class:`Results`
        """

        ixreader = self.ixreader

        t = now()
        if sortedby is not None:
            if isinstance(sortedby, basestring):
                sorter = scoring.FieldSorter(sortedby)
            elif isinstance(sortedby, (list, tuple)):
                sorter = scoring.MultiFieldSorter([FieldSorter(fn) for fn in sortedby])
            elif isinstance(sortedby, Sorter):
                sorter = sortedby
            else:
                raise ValueError("sortedby argument must be a string, list, or Sorter (%r)" % sortedby)

            scored_list = sorter.order(self, query.docs(self), reverse=reverse)
            scores = None
            docvector = BitVector(ixreader.doc_count_all(), source=scored_list)
            if len(scored_list) > limit:
                scored_list = list(scored_list)[:limit]
        else:
            # Sort by scores
            topdocs = TopDocs(limit, ixreader.doc_count_all())
            final = self.weighting.final
            topdocs.add_all(
                ((docnum, final(self, docnum, score)) for docnum, score in query.doc_scores(self)), minscore
            )

            best = topdocs.best()
            if best:
                # topdocs.best() returns a list like
                # [(docnum, score), (docnum, score), ... ]
                # This unpacks that into two lists: docnums and scores
                scored_list, scores = zip(*topdocs.best())
            else:
                scored_list = []
                scores = []

            docvector = topdocs.docs
        t = now() - t

        return Results(self, query, scored_list, docvector, runtime=t, scores=scores)
Example #8
0
    def search(self,
               query,
               limit=5000,
               sortedby=None,
               reverse=False,
               minscore=0.0001):
        """Runs the query represented by the ``query`` object and returns a
        Results object.
        
        :param query: a :class:`whoosh.query.Query` object.
        :param limit: the maximum number of documents to score. If you're only
            interested in the top N documents, you can set limit=N to limit the
            scoring for a faster search.
        :param sortedby: if this parameter is not None, the results are sorted
            instead of scored. If this value is a string, the results are
            sorted by the field named in the string. If this value is a list or
            tuple, it is assumed to be a sequence of strings and the results
            are sorted by the fieldnames in the sequence. Otherwise 'sortedby'
            should be a scoring.Sorter object.
            
            The fields you want to sort by must be indexed.
            
            For example, to sort the results by the 'path' field::
            
                searcher.find(q, sortedby = "path")
                
            To sort the results by the 'path' field and then the 'category'
            field::
                
                searcher.find(q, sortedby = ("path", "category"))
                
            To use a sorting object::
            
                searcher.find(q, sortedby = scoring.FieldSorter("path", key=mykeyfn))
            
            Using a string or tuple simply instantiates a
            :class:`whoosh.scoring.FieldSorter` or
            :class:`whoosh.scoring.MultiFieldSorter` object for you. To get a
            custom sort order, instantiate your own ``FieldSorter`` with a
            ``key`` argument, or write a custom :class:`whoosh.scoring.Sorter`
            class.
            
            FieldSorter and MultiFieldSorter cache the document order, using 4
            bytes times the number of documents in the index, and taking time
            to cache. To increase performance, instantiate your own sorter and
            re-use it (but remember you need to recreate it if the index
            changes).
        
        :param reverse: if ``sortedby`` is not None, this reverses the
            direction of the sort.
        :param minscore: the minimum score to include in the results.
        :rtype: :class:`Results`
        """

        ixreader = self.ixreader

        t = now()
        if sortedby is not None:
            if isinstance(sortedby, basestring):
                sorter = scoring.FieldSorter(sortedby)
            elif isinstance(sortedby, (list, tuple)):
                sorter = scoring.MultiFieldSorter(
                    [FieldSorter(fn) for fn in sortedby])
            elif isinstance(sortedby, Sorter):
                sorter = sortedby
            else:
                raise ValueError(
                    "sortedby argument must be a string, list, or Sorter (%r)"
                    % sortedby)

            scored_list = sorter.order(self, query.docs(self), reverse=reverse)
            scores = None
            docvector = BitVector(ixreader.doc_count_all(), source=scored_list)
            if len(scored_list) > limit:
                scored_list = list(scored_list)[:limit]
        else:
            # Sort by scores
            topdocs = TopDocs(limit, ixreader.doc_count_all())
            final = self.weighting.final
            topdocs.add_all(((docnum, final(self, docnum, score))
                             for docnum, score in query.doc_scores(self)),
                            minscore)

            best = topdocs.best()
            if best:
                # topdocs.best() returns a list like
                # [(docnum, score), (docnum, score), ... ]
                # This unpacks that into two lists: docnums and scores
                scored_list, scores = zip(*topdocs.best())
            else:
                scored_list = []
                scores = []

            docvector = topdocs.docs
        t = now() - t

        return Results(self,
                       query,
                       scored_list,
                       docvector,
                       runtime=t,
                       scores=scores)
Example #9
0
 def docnums(self, query):
     """Returns a set-like object containing the document numbers that
     match the given query.
     """
     
     return set(query.docs(self))
Example #10
0
    def search(self,
               query,
               limit=5000,
               weighting=None,
               sortedby=None,
               reverse=False):
        """Runs the query represented by the query object and returns a Results object.
        
        :query: a query.Query object representing the search query. You can translate
            a query string into a query object with e.g. qparser.QueryParser.
        :limit: the maximum number of documents to score. If you're only interested in
            the top N documents, you can set limit=N to limit the scoring for a faster
            search.
        :weighting: if this parameter is not None, use this weighting object to score the
            results instead of the default.
        :sortedby: if this parameter is not None, the results are sorted instead of scored.
            If this value is a string, the results are sorted by the field named in the string.
            If this value is a list or tuple, it is assumed to be a sequence of strings and the
            results are sorted by the fieldnames in the sequence. Otherwise 'sortedby' should be
            a scoring.Sorter object.
            
            The fields you want to sort by must be indexed.
            
            For example, to sort the results by the 'path' field::
            
                searcher.search(q, sortedby = "path")
                
            To sort the results by the 'path' field and then the 'category' field::
                
                searcher.search(q, sortedby = ("path", "category"))
                
            To use a sorting object::
            
                searcher.search(q, sortedby = scoring.NullSorter)
        
        :reverse: if 'sortedby' is not None, this reverses the direction of the sort.
        """

        doc_reader = self.doc_reader

        t = time.time()
        if sortedby is not None:
            if isinstance(sortedby, basestring):
                sortedby = scoring.FieldSorter(sortedby)
            elif isinstance(sortedby, (list, tuple)):
                sortedby = scoring.MultiFieldSorter(sortedby)
            elif callable(sortedby):
                sortedby = sortedby()

            scored_list = sortedby.order(self,
                                         query.docs(self),
                                         reverse=reverse)
            docvector = BitVector(doc_reader.doc_count_all(),
                                  source=scored_list)
            if len(scored_list) > limit:
                scored_list = list(scored_list)[:limit]
        else:
            # Sort by scores
            topdocs = TopDocs(limit, doc_reader.doc_count_all())
            topdocs.add_all(
                query.doc_scores(self, weighting=weighting or self.weighting))
            scored_list = topdocs.best()
            docvector = topdocs.docs
        t = time.time() - t

        return Results(self, query, scored_list, docvector, runtime=t)