Ejemplo n.º 1
0
def test_regular_or():
    ix = get_index()
    with ix.searcher() as s:
        oq = Or([Term("text", "bravo"), Term("text", "alfa")])
        m = oq.matcher(s)
        while m.is_active():
            orig = s.stored_fields(m.id())["text"]
            for span in m.spans():
                v = orig[span.start]
                assert v == "bravo" or v == "alfa"
            m.next()
Ejemplo n.º 2
0
    def matcher(self, searcher, context=None):
        fieldname = self.fieldname
        constantscore = self.constantscore

        reader = searcher.reader()
        qs = [Term(fieldname, word) for word in self._btexts(reader)]
        if not qs:
            return matching.NullMatcher()

        if len(qs) == 1:
            # If there's only one term, just use it
            q = qs[0]
        elif constantscore or len(qs) > self.TOO_MANY_CLAUSES:
            # If there's so many clauses that an Or search would take forever,
            # trade memory for time and just find all the matching docs and
            # serve them as one ListMatcher
            fmt = searcher.schema[fieldname].format
            doc_to_values = defaultdict(list)
            doc_to_weights = defaultdict(float)
            for q in qs:
                m = q.matcher(searcher)
                while m.is_active():
                    docnum = m.id()
                    doc_to_values[docnum].append(m.value())
                    if not constantscore:
                        doc_to_weights[docnum] += m.weight()
                    m.next()

            docnums = sorted(doc_to_values.keys())
            # This is a list of lists of value strings -- ListMatcher will
            # actually do the work of combining multiple values if the user
            # asks for them
            values = [doc_to_values[docnum] for docnum in docnums]

            kwargs = {"values": values, "format": fmt}
            if constantscore:
                kwargs["all_weights"] = self.boost
            else:
                kwargs["weights"] = [doc_to_weights[docnum]
                                     for docnum in docnums]

            #return matching.ListMatcher(docnums, term=term, **kwargs)
            return matching.ListMatcher(docnums, **kwargs)
        else:
            # The default case: Or the terms together
            from whoosh.query import Or
            q = Or(qs)

        m = q.matcher(searcher, context)
        #m = matching.SingleTermMatcher(m, term)
        return m
Ejemplo n.º 3
0
    def matcher(self, searcher, weighting=None):
        fieldname = self.fieldname
        constantscore = self.constantscore
        reader = searcher.reader()
        qs = [Term(fieldname, word) for word in self._words(reader)]
        if not qs:
            return matching.NullMatcher()

        if len(qs) == 1:
            # If there's only one term, just use it
            q = qs[0]
        elif constantscore or len(qs) > self.TOO_MANY_CLAUSES:
            # If there's so many clauses that an Or search would take forever,
            # trade memory for time and just find all the matching docs serve
            # them up as one or more ListMatchers
            fmt = searcher.schema[fieldname].format
            doc_to_values = defaultdict(list)
            doc_to_weights = defaultdict(float)
            for q in qs:
                m = q.matcher(searcher)
                while m.is_active():
                    docnum = m.id()
                    doc_to_values[docnum].append(m.value())
                    if not constantscore:
                        doc_to_weights[docnum] += m.weight()
                    m.next()

            docnums = sorted(doc_to_values.keys())
            # This is a list of lists of value strings -- ListMatcher will
            # actually do the work of combining multiple values if the user
            # asks for them
            values = [doc_to_values[docnum] for docnum in docnums]

            kwargs = {"values": values, "format": fmt}
            if constantscore:
                kwargs["all_weights"] = self.boost
            else:
                kwargs["weights"] = [
                    doc_to_weights[docnum] for docnum in docnums
                ]

            return matching.ListMatcher(docnums, **kwargs)
        else:
            # The default case: Or the terms together
            from whoosh.query import Or
            q = Or(qs)

        return q.matcher(searcher, weighting=weighting)