def test_union():
    s1 = matching.ListMatcher([1, 2, 3, 4, 5, 6, 7, 8])
    s2 = matching.ListMatcher([2, 4, 8, 10, 20, 30])
    s3 = matching.ListMatcher([10, 100, 200])
    target = [1, 2, 3, 4, 5, 6, 7, 8, 10, 20, 30, 100, 200]
    um = matching.UnionMatcher(s1, matching.UnionMatcher(s2, s3))
    assert target == list(um.all_ids())
def test_listmatcher():
    ids = [1, 2, 5, 9, 10]

    lm = matching.ListMatcher(ids)
    ls = []
    while lm.is_active():
        ls.append((lm.id(), lm.score()))
        lm.next()
    assert ls == [(1, 1.0), (2, 1.0), (5, 1.0), (9, 1.0), (10, 1.0)]

    lm = matching.ListMatcher(ids)
    assert list(lm.all_ids()) == ids

    lm = matching.ListMatcher(ids, position=3)
    ls = []
    while lm.is_active():
        ls.append(lm.id())
        lm.next()
    assert ls == [9, 10]

    lm = matching.ListMatcher(ids)
    for _ in xrange(3):
        lm.next()
    lm = lm.copy()
    ls = []
    while lm.is_active():
        ls.append(lm.id())
        lm.next()
    assert ls == [9, 10]
Beispiel #3
0
def test_replacements():
    sc = scoring.WeightScorer(0.25)
    a = matching.ListMatcher([1, 2, 3], [0.25, 0.25, 0.25], scorer=sc)
    b = matching.ListMatcher([1, 2, 3], [0.25, 0.25, 0.25], scorer=sc)
    um = matching.UnionMatcher(a, b)

    a2 = a.replace(0.5)
    assert_equal(a2.__class__, matching.NullMatcherClass)

    um2 = um.replace(0.5)
    assert_equal(um2.__class__, matching.IntersectionMatcher)
    um2 = um.replace(0.6)
    assert_equal(um2.__class__, matching.NullMatcherClass)

    wm = matching.WrappingMatcher(um, boost=2.0)
    wm = wm.replace(0.5)
    assert_equal(wm.__class__, matching.WrappingMatcher)
    assert_equal(wm.boost, 2.0)
    assert_equal(wm.child.__class__, matching.IntersectionMatcher)

    ls1 = matching.ListMatcher([1, 2, 3], [0.1, 0.1, 0.1],
                               scorer=scoring.WeightScorer(0.1))
    ls2 = matching.ListMatcher([1, 2, 3], [0.2, 0.2, 0.2],
                               scorer=scoring.WeightScorer(0.2))
    ls3 = matching.ListMatcher([1, 2, 3], [0.3, 0.3, 0.3],
                               scorer=scoring.WeightScorer(0.3))
    mm = matching.MultiMatcher([ls1, ls2, ls3], [0, 4, 8])
    mm = mm.replace(0.25)
    assert_equal(mm.current, 2)

    dm = matching.DisjunctionMaxMatcher(ls1, ls2)
    dm = dm.replace(0.15)
    assert dm is ls2
def test_arrayunion():
    l1 = matching.ListMatcher([10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
    l2 = matching.ListMatcher([100, 200, 300, 400, 500, 600])
    aum = matching.ArrayUnionMatcher([l1, l2], 600, partsize=5)
    assert aum.id() == 10
    aum.skip_to(45)
    assert aum.id() == 50
    aum.skip_to(550)
    assert aum.id() == 600
def test_arrayunion2():
    l1 = matching.ListMatcher([1, 2])
    l2 = matching.ListMatcher([1, 2, 10, 20])
    l3 = matching.ListMatcher([1, 5, 10, 50])
    aum = matching.ArrayUnionMatcher([l1, l2, l3], 51, partsize=2)

    assert aum.id() == 1
    assert not l1.is_active()
    aum.skip_to(50)
    assert aum.id() == 50
def test_union_scores():
    s1 = matching.ListMatcher([1, 2, 3])
    s2 = matching.ListMatcher([2, 4, 8])
    s3 = matching.ListMatcher([2, 3, 8])
    target = [(1, 1.0), (2, 3.0), (3, 2.0), (4, 1.0), (8, 2.0)]
    um = matching.UnionMatcher(s1, matching.UnionMatcher(s2, s3))
    result = []
    while um.is_active():
        result.append((um.id(), um.score()))
        um.next()
    assert target == result
def test_wrapper():
    wm = matching.WrappingMatcher(matching.ListMatcher([1, 2, 5, 9, 10]),
                                  boost=2.0)
    ls = []
    while wm.is_active():
        ls.append((wm.id(), wm.score()))
        wm.next()
    assert ls == [(1, 2.0), (2, 2.0), (5, 2.0), (9, 2.0), (10, 2.0)]

    ids = [1, 2, 5, 9, 10]
    wm = matching.WrappingMatcher(matching.ListMatcher(ids), boost=2.0)
    assert list(wm.all_ids()) == ids
def test_andnot():
    lm1 = matching.ListMatcher([1, 4, 10, 20, 90])
    lm2 = matching.ListMatcher([0, 4, 20])
    anm = matching.AndNotMatcher(lm1, lm2)
    ls = []
    while anm.is_active():
        ls.append((anm.id(), anm.score()))
        anm.next()
    assert ls == [(1, 1.0), (10, 1.0), (90, 1.0)]

    echo_lm = matching.ListMatcher([0, 1, 2, 3, 4])
    bravo_lm = matching.ListMatcher([0, 1])
    anm = matching.AndNotMatcher(echo_lm, bravo_lm)
    assert list(anm.all_ids()) == [2, 3, 4]

    lm1 = matching.ListMatcher([1, 4, 10, 20, 90])
    lm2 = matching.ListMatcher([0, 4, 20])
    anm = matching.AndNotMatcher(lm1, lm2)
    assert list(anm.all_ids()) == [1, 10, 90]

    lm1 = matching.ListMatcher([1, 4, 10, 20, 90])
    lm2 = matching.ListMatcher([0, 4, 20])
    anm = matching.AndNotMatcher(lm1, lm2)
    anm.next()
    anm.next()
    anm = anm.copy()
    ls = []
    while anm.is_active():
        ls.append(anm.id())
        anm.next()
    assert ls == [90]
Beispiel #9
0
    def matcher(self, searcher, weighting=None):
        fieldname = self.fieldname
        reader = searcher.reader()

        if fieldname in (None, "", "*"):
            # This takes into account deletions
            doclist = array("I", reader.all_doc_ids())
        elif (reader.supports_caches()
              and reader.fieldcache_available(fieldname)):
            # If the reader has a field cache, use it to quickly get the list
            # of documents that have a value for this field
            fc = reader.fieldcache(self.fieldname)
            doclist = array("I",
                            (docnum
                             for docnum, ordinal in fc.ords() if ordinal != 0))
        else:
            # This is a hacky hack, but just create an in-memory set of all the
            # document numbers of every term in the field. This is SLOOOW for
            # large indexes
            doclist = set()
            for text in searcher.lexicon(fieldname):
                pr = searcher.postings(fieldname, text)
                doclist.update(pr.all_ids())
            doclist = sorted(doclist)

        return matching.ListMatcher(doclist, all_weights=self.boost)
Beispiel #10
0
 def matcher(self, searcher, weighting=None):
     m = self.child.matcher(searcher)
     if isinstance(m, matching.NullMatcherClass):
         return m
     else:
         ids = array("I", m.all_ids())
         return matching.ListMatcher(ids, all_weights=self.score,
                                     term=m.term())
def test_inverse():
    s = matching.ListMatcher([1, 5, 10, 11, 13])
    inv = matching.InverseMatcher(s, 15)
    ids = []
    while inv.is_active():
        ids.append(inv.id())
        inv.next()
    assert ids == [0, 2, 3, 4, 6, 7, 8, 9, 12, 14]
def test_filter():
    lm = lambda: matching.ListMatcher(list(range(2, 10)))

    fm = matching.FilterMatcher(lm(), frozenset([3, 9]))
    assert list(fm.all_ids()) == [3, 9]

    fm = matching.FilterMatcher(lm(), frozenset([1, 5, 9, 13]))
    assert list(fm.all_ids()) == [5, 9]
def test_random_andnot():
    testcount = 100
    rangesize = 100

    rng = list(range(rangesize))

    for _ in xrange(testcount):
        negs = sorted(sample(rng, randint(0, rangesize - 1)))
        negset = frozenset(negs)
        matched = [n for n in rng if n not in negset]

        pos = matching.ListMatcher(rng)
        neg = matching.ListMatcher(negs)

        anm = matching.AndNotMatcher(pos, neg)
        ids = list(anm.all_ids())
        assert ids == matched
def test_listmatcher_skip_to_quality_identical_scores():
    ids = [1, 2, 5, 9, 10]
    lm = matching.ListMatcher(ids, scorer=WeightScorer(1.0))
    lm.skip_to_quality(0.3)
    ls = []
    while lm.is_active():
        ls.append((lm.id(), lm.score()))
        lm.next()
    assert ls == [(1, 1.0), (2, 1.0), (5, 1.0), (9, 1.0), (10, 1.0)]
def test_inverse_skip():
    s = matching.ListMatcher([1, 5, 10, 11, 13])
    inv = matching.InverseMatcher(s, 15)
    inv.skip_to(8)

    ids = []
    while inv.is_active():
        ids.append(inv.id())
        inv.next()
    assert ids == [8, 9, 12, 14]
def test_exclude():
    em = matching.FilterMatcher(matching.ListMatcher([1, 2, 5, 9, 10]),
                                frozenset([2, 9]), exclude=True)
    assert list(em.all_ids()) == [1, 5, 10]

    em = matching.FilterMatcher(matching.ListMatcher([1, 2, 5, 9, 10]),
                                frozenset([2, 9]), exclude=True)
    assert list(em.all_ids()) == [1, 5, 10]

    em = matching.FilterMatcher(matching.ListMatcher([1, 2, 5, 9, 10]),
                                frozenset([2, 9]), exclude=True)
    em.next()
    em.next()
    em = em.copy()
    ls = []
    while em.is_active():
        ls.append(em.id())
        em.next()
    assert ls == [10]
Beispiel #17
0
    def matcher(self, searcher, context=None):
        from whoosh.searching import SearchContext

        context = context or SearchContext()
        m = self.child.matcher(searcher, context)
        if context.needs_current or isinstance(m, matching.NullMatcherClass):
            return m
        else:
            ids = array("I", m.all_ids())
            return matching.ListMatcher(ids,
                                        all_weights=self.score,
                                        term=m.term())
def test_empty_andnot():
    pos = matching.NullMatcher()
    neg = matching.NullMatcher()
    anm = matching.AndNotMatcher(pos, neg)
    assert not anm.is_active()
    assert not list(anm.all_ids())

    pos = matching.ListMatcher([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    neg = matching.NullMatcher()
    ans = matching.AndNotMatcher(pos, neg)
    ids = list(ans.all_ids())
    assert ids == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
def test_simple_intersection():
    lm1 = matching.ListMatcher([1, 4, 10, 20, 90])
    lm2 = matching.ListMatcher([0, 4, 20])
    im = matching.IntersectionMatcher(lm1, lm2)
    ls = []
    while im.is_active():
        ls.append((im.id(), im.score()))
        im.next()
    assert ls == [(4, 2.0), (20, 2.0)]

    lm1 = matching.ListMatcher([1, 4, 10, 20, 90])
    lm2 = matching.ListMatcher([0, 4, 20])
    im = matching.IntersectionMatcher(lm1, lm2)
    assert list(im.all_ids()) == [4, 20]

    lm1 = matching.ListMatcher([1, 4, 10, 20, 90])
    lm2 = matching.ListMatcher([0, 4, 20])
    im = matching.IntersectionMatcher(lm1, lm2)
    im.next()
    im.next()
    im = im.copy()
    ls = []
    while im.is_active():
        ls.append(im.id())
        im.next()
    assert not ls
def test_require():
    lm1 = matching.ListMatcher([1, 4, 10, 20, 90])
    lm2 = matching.ListMatcher([0, 4, 20])
    rm = matching.RequireMatcher(lm1, lm2)
    ls = []
    while rm.is_active():
        ls.append((rm.id(), rm.score()))
        rm.next()
    assert ls == [(4, 1.0), (20, 1.0)]

    lm1 = matching.ListMatcher([1, 4, 10, 20, 90])
    lm2 = matching.ListMatcher([0, 4, 20])
    rm = matching.RequireMatcher(lm1, lm2)
    assert list(rm.all_ids()) == [4, 20]

    lm1 = matching.ListMatcher([1, 4, 10, 20, 90])
    lm2 = matching.ListMatcher([0, 4, 20])
    rm = matching.RequireMatcher(lm1, lm2)
    rm.next()
    rm.next()
    rm = rm.copy()
    ls = []
    while rm.is_active():
        ls.append(rm.id())
        rm.next()
    assert not ls
def test_andmaybe():
    lm1 = matching.ListMatcher([1, 4, 10, 20, 90])
    lm2 = matching.ListMatcher([0, 4, 20])
    amm = matching.AndMaybeMatcher(lm1, lm2)
    ls = []
    while amm.is_active():
        ls.append((amm.id(), amm.score()))
        amm.next()
    assert ls == [(1, 1.0), (4, 2.0), (10, 1.0), (20, 2.0), (90, 1.0)]

    lm1 = matching.ListMatcher([1, 4, 10, 20, 90])
    lm2 = matching.ListMatcher([0, 4, 20])
    amm = matching.AndMaybeMatcher(lm1, lm2)
    assert list(amm.all_ids()) == [1, 4, 10, 20, 90]

    lm1 = matching.ListMatcher([1, 4, 10, 20, 90])
    lm2 = matching.ListMatcher([0, 4, 20])
    amm = matching.AndMaybeMatcher(lm1, lm2)
    amm.next()
    amm.next()
    amm = amm.copy()
    ls = []
    while amm.is_active():
        ls.append(amm.id())
        amm.next()
    assert ls == [10, 20, 90]
def test_simple_union():
    lm1 = matching.ListMatcher([1, 4, 10, 20, 90])
    lm2 = matching.ListMatcher([0, 4, 20])
    um = matching.UnionMatcher(lm1, lm2)
    ls = []
    while um.is_active():
        ls.append((um.id(), um.score()))
        um.next()
    assert ls == [(0, 1.0), (1, 1.0), (4, 2.0), (10, 1.0), (20, 2.0), (90, 1.0)]

    lm1 = matching.ListMatcher([1, 4, 10, 20, 90])
    lm2 = matching.ListMatcher([0, 4, 20])
    um = matching.UnionMatcher(lm1, lm2)
    assert list(um.all_ids()) == [0, 1, 4, 10, 20, 90]

    lm1 = matching.ListMatcher([1, 4, 10, 20, 90])
    lm2 = matching.ListMatcher([0, 4, 20])
    um = matching.UnionMatcher(lm1, lm2)
    um.next()
    um.next()
    um = um.copy()
    ls = []
    while um.is_active():
        ls.append(um.id())
        um.next()
    assert ls == [4, 10, 20, 90]
Beispiel #23
0
def create_matchers():
    id1 = [i for i in range(1000)]
    id2 = [i + 1 for i in range(1000)]
    id3 = [i * 2 + i % 5 for i in range(1000)]
    id4 = [i * i for i in range(1000)]
    id5 = [1001 - i for i in range(1000)]
    id6 = [i * 3 // 2 for i in range(1000)]
    vl1 = [0.1 for i in range(1000)]
    vl2 = [0.2 for i in range(1000)]
    vl3 = [0.3 for i in range(1000)]
    vl4 = [0.4 for i in range(1000)]
    vl5 = [0.5 for i in range(1000)]
    vl6 = [0.6 for i in range(1000)]
    sc1 = scoring.WeightScorer(0.15)
    sc2 = scoring.WeightScorer(0.25)
    sc3 = scoring.WeightScorer(0.35)
    sc4 = scoring.WeightScorer(0.45)
    sc5 = scoring.WeightScorer(0.55)
    sc6 = scoring.WeightScorer(0.65)
    ls1 = matching.ListMatcher(id1, vl1, sc1)
    ls2 = matching.ListMatcher(id2, vl2, sc2)
    ls3 = matching.ListMatcher(id3, vl3, sc3)
    ls4 = matching.ListMatcher(id4, vl4, sc4)
    ls5 = matching.ListMatcher(id5, vl5, sc5)
    ls6 = matching.ListMatcher(id6, vl6, sc6)
    um1 = matching.UnionMatcher(ls1, ls2)
    um2 = matching.UnionMatcher(ls3, ls4)
    um3 = matching.UnionMatcher(ls5, ls6)
    inv = matching.InverseMatcher(um3, 15)
    mm = matching.MultiMatcher([um1, um2, inv], [0, 9, 18])
    return mm
Beispiel #24
0
    def matcher(self, searcher, weighting=None):
        fieldname = self.fieldname
        constantscore = self.constantscore
        reader = searcher.reader()
        qs = [Term(fieldname, word) for word in self._words(reader)]
        if not qs:
            return matching.NullMatcher()

        if len(qs) == 1:
            # If there's only one term, just use it
            q = qs[0]
        elif constantscore or len(qs) > self.TOO_MANY_CLAUSES:
            # If there's so many clauses that an Or search would take forever,
            # trade memory for time and just find all the matching docs serve
            # them up as one or more ListMatchers
            fmt = searcher.schema[fieldname].format
            doc_to_values = defaultdict(list)
            doc_to_weights = defaultdict(float)
            for q in qs:
                m = q.matcher(searcher)
                while m.is_active():
                    docnum = m.id()
                    doc_to_values[docnum].append(m.value())
                    if not constantscore:
                        doc_to_weights[docnum] += m.weight()
                    m.next()

            docnums = sorted(doc_to_values.keys())
            # This is a list of lists of value strings -- ListMatcher will
            # actually do the work of combining multiple values if the user
            # asks for them
            values = [doc_to_values[docnum] for docnum in docnums]

            kwargs = {"values": values, "format": fmt}
            if constantscore:
                kwargs["all_weights"] = self.boost
            else:
                kwargs["weights"] = [
                    doc_to_weights[docnum] for docnum in docnums
                ]

            return matching.ListMatcher(docnums, **kwargs)
        else:
            # The default case: Or the terms together
            from whoosh.query import Or
            q = Or(qs)

        return q.matcher(searcher, weighting=weighting)
def test_random_union():
    testcount = 100
    rangelimits = (2, 10)
    clauselimits = (2, 10)

    vals = list(range(100))

    for _ in xrange(testcount):
        target = set()
        matchers = []
        for _ in xrange(randint(*clauselimits)):
            nums = sample(vals, randint(*rangelimits))
            target = target.union(nums)
            matchers.append(matching.ListMatcher(sorted(nums)))
        target = sorted(target)
        um = make_binary_tree(matching.UnionMatcher, matchers)
        assert list(um.all_ids()) == target
Beispiel #26
0
    def matcher(self, searcher, context=None):
        fieldname = self.fieldname
        reader = searcher.reader()

        if fieldname in (None, "", "*"):
            # This takes into account deletions
            doclist = array("I", reader.all_doc_ids())
        else:
            # This is a hacky hack, but just create an in-memory set of all the
            # document numbers of every term in the field. This is SLOOOW for
            # large indexes
            doclist = set()
            for text in searcher.lexicon(fieldname):
                pr = searcher.postings(fieldname, text)
                doclist.update(pr.all_ids())
            doclist = sorted(doclist)

        return matching.ListMatcher(doclist, all_weights=self.boost)