Exemple #1
0
def test_nocachefield_segments():
    schema = fields.Schema(a=fields.ID(stored=True))
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(a=u("bravo"))
    w.add_document(a=u("echo"))
    w.add_document(a=u("juliet"))
    w.commit()
    w = ix.writer()
    w.add_document(a=u("kilo"))
    w.add_document(a=u("foxtrot"))
    w.add_document(a=u("charlie"))
    w.commit(merge=False)
    w = ix.writer()
    w.delete_by_term("a", u("echo"))
    w.add_document(a=u("alfa"))
    w.add_document(a=u("india"))
    w.add_document(a=u("delta"))
    w.commit(merge=False)

    with ix.searcher() as s:
        q = query.TermRange("a", u("bravo"), u("k"))
        facet = sorting.FieldFacet("a", reverse=True)

        r = s.search(q, sortedby=facet)
        assert [hit["a"] for hit in r] == [
            "juliet", "india", "foxtrot", "delta", "charlie", "bravo"
        ]

        mq = query.Or(
            [query.Term("a", u("bravo")),
             query.Term("a", u("delta"))])
        anq = query.AndNot(q, mq)
        r = s.search(anq, sortedby=facet)
        assert [hit["a"]
                for hit in r] == ["juliet", "india", "foxtrot", "charlie"]

        mq = query.Or(
            [query.Term("a", u("bravo")),
             query.Term("a", u("delta"))])
        r = s.search(q, mask=mq, sortedby=facet)
        assert [hit["a"]
                for hit in r] == ["juliet", "india", "foxtrot", "charlie"]

        fq = query.Or([
            query.Term("a", u("alfa")),
            query.Term("a", u("charlie")),
            query.Term("a", u("echo")),
            query.Term("a", u("india")),
        ])
        r = s.search(query.Every(), filter=fq, sortedby=facet)
        assert [hit["a"] for hit in r] == ["india", "charlie", "alfa"]

        nq = query.Not(
            query.Or([query.Term("a", u("alfa")),
                      query.Term("a", u("india"))]))
        r = s.search(query.Every(), filter=nq, sortedby=facet)
        assert [hit["a"] for hit in r] == [
            "kilo", "juliet", "foxtrot", "delta", "charlie", "bravo"
        ]
Exemple #2
0
def test_or():
    _run_query(query.Or([query.Term("value", u("red")),
                         query.Term("name", u("yellow"))]),
               [u("A"), u("D"), u("E")])
    # Missing
    _run_query(query.Or([query.Term("value", u("ochre")),
                         query.Term("name", u("glonk"))]),
               [])
    _run_query(query.Or([]), [])
Exemple #3
0
 def _Toplevel(self, node, fieldname):
     queries = [self._eval(s, fieldname) for s in node]
     reqds = [q[0] for q in queries if isinstance(q, tuple)]
     if reqds:
         nots = [q for q in queries if isinstance(q, query.Not)]
         opts = [q for q in queries
                 if not isinstance(q, query.Not) and not isinstance(q, tuple)]
         return query.AndMaybe([query.And(reqds + nots), query.Or(opts)])
     else:
         return query.Or(queries)
Exemple #4
0
 def finalize(self):
     self._subqueries = []
     if self.allowed:
         self.a = query.Or(self.allowed)
     else:
         self.a = query.NullQuery
     if self.denied:
         self.b = query.Or(self.denied)
     else:
         self.b = query.NullQuery
     self._subqueries = (self.a, self.b)
Exemple #5
0
 def parse(self, input):
     """Parses the input string and returns a Query object/tree.
     
     This method may return None if the input string does not result in any
     valid queries. It may also raise a variety of exceptions if the input
     string is malformed.
     
     :input: the unicode string to parse.
     """
     
     required = []
     optional = []
     gramsize = max(self.minchars, min(self.maxchars, len(input)))
     if gramsize > len(input):
         return None
     
     discardspaces = self.discardspaces
     for t in self.analyzerclass(gramsize)(input):
         gram = t.text
         if " " in gram:
             if not discardspaces:
                 optional.append(gram)
         else:
             required.append(gram)
     
     if required:
         fieldname = self.fieldname
         andquery = query.And([query.Term(fieldname, g) for g in required])
         if optional:
             orquery = query.Or([query.Term(fieldname, g) for g in optional])
             return query.AndMaybe([andquery, orquery])
         else:
             return andquery
     else:
         return None
def test_lengths():
    schema = fields.Schema(id=fields.STORED, text=fields.TEXT)
    ix = RamStorage().create_index(schema)

    w = ix.writer()
    w.add_document(id=1, text=u("alfa bravo charlie delta echo"))
    w.add_document(id=2, text=u("bravo charlie delta echo foxtrot"))
    w.add_document(id=3, text=u("charlie needle echo foxtrot golf"))
    w.add_document(id=4, text=u("delta echo foxtrot golf hotel"))
    w.add_document(id=5, text=u("echo needle needle hotel india"))
    w.add_document(id=6, text=u("foxtrot golf hotel india juliet"))
    w.add_document(id=7, text=u("golf needle india juliet kilo"))
    w.add_document(id=8, text=u("hotel india juliet needle lima"))
    w.commit()

    with ix.searcher() as s:
        q = query.Or([
            query.Term("text", u("needle")),
            query.Term("text", u("charlie"))
        ])
        r = s.search(q, limit=2)
        assert not r.has_exact_length()
        assert r.estimated_length() == 7
        assert r.estimated_min_length() == 3
        assert r.scored_length() == 2
        assert len(r) == 6
Exemple #7
0
def test_no_parents():
    schema = fields.Schema(id=fields.STORED,
                           kind=fields.ID,
                           name=fields.ID(stored=True))
    k = u("alfa")
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(id=0, kind=k, name=u("one"))
        w.add_document(id=1, kind=k, name=u("two"))
        w.add_document(id=2, kind=k, name=u("three"))
        w.add_document(id=3, kind=k, name=u("four"))
        w.add_document(id=4, kind=k, name=u("one"))
        w.add_document(id=5, kind=k, name=u("two"))
        w.add_document(id=6, kind=k, name=u("three"))
        w.add_document(id=7, kind=k, name=u("four"))
        w.add_document(id=8, kind=k, name=u("one"))
        w.add_document(id=9, kind=k, name=u("two"))
        w.add_document(id=10, kind=k, name=u("three"))
        w.add_document(id=11, kind=k, name=u("four"))

    with ix.searcher() as s:
        pq = query.Term("kind", "bravo")
        cq = query.Or([query.Term("name", "two"), query.Term("name", "four")])
        q = query.NestedParent(pq, cq)
        r = s.search(q)
        assert r.is_empty()
Exemple #8
0
def test_everything_is_a_parent():
    schema = fields.Schema(id=fields.STORED,
                           kind=fields.ID,
                           name=fields.ID(stored=True))
    k = u("alfa")
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(id=0, kind=k, name=u("one"))
        w.add_document(id=1, kind=k, name=u("two"))
        w.add_document(id=2, kind=k, name=u("three"))
        w.add_document(id=3, kind=k, name=u("four"))
        w.add_document(id=4, kind=k, name=u("one"))
        w.add_document(id=5, kind=k, name=u("two"))
        w.add_document(id=6, kind=k, name=u("three"))
        w.add_document(id=7, kind=k, name=u("four"))
        w.add_document(id=8, kind=k, name=u("one"))
        w.add_document(id=9, kind=k, name=u("two"))
        w.add_document(id=10, kind=k, name=u("three"))
        w.add_document(id=11, kind=k, name=u("four"))

    with ix.searcher() as s:
        pq = query.Term("kind", k)
        cq = query.Or([query.Term("name", "two"), query.Term("name", "four")])
        q = query.NestedParent(pq, cq)
        r = s.search(q)
        assert [hit["id"] for hit in r] == [1, 3, 5, 7, 9, 11]
Exemple #9
0
    def update_document(self, **fields):
        """Adds or replaces a document. At least one of the fields for which you
        supply values must be marked as 'unique' in the index's schema.
        
        The keyword arguments map field names to the values to index/store.
        
        For fields that are both indexed and stored, you can specify an alternate
        value to store using a keyword argument in the form "_stored_<fieldname>".
        For example, if you have a field named "title" and you want to index the
        text "a b c" but store the text "e f g", use keyword arguments like this::
        
            update_document(title=u"a b c", _stored_title=u"e f g")
        """

        # Check which of the supplied fields are unique
        unique_fields = [
            name for name, field in self.index.schema.fields()
            if name in fields and field.unique
        ]
        if not unique_fields:
            raise IndexingError("None of the fields in %r are unique" %
                                fields.keys())

        # Delete documents in which the supplied unique fields match
        from whoosh import query
        delquery = query.Or(
            [query.Term(name, fields[name]) for name in unique_fields])
        delquery = delquery.normalize()
        self.delete_by_query(delquery)

        # Add the given fields
        self.add_document(**fields)
Exemple #10
0
    def suggest(self, text, number=3, usescores=False):
        """Returns a list of suggested alternative spellings of 'text'. You must
        add words to the dictionary (using add_field, add_words, and/or add_scored_words)
        before you can use this.
        
        :param text: The word to check.
        :param number: The maximum number of suggestions to return.
        :param usescores: Use the per-word score to influence the suggestions.
        :rtype: list
        """

        grams = defaultdict(list)
        for size in xrange(self.mingram, self.maxgram + 1):
            key = "gram%s" % size
            nga = analysis.NgramAnalyzer(size)
            for t in nga(text):
                grams[key].append(t.text)

        queries = []
        for size in xrange(self.mingram, min(self.maxgram + 1, len(text))):
            key = "gram%s" % size
            gramlist = grams[key]
            queries.append(
                query.Term("start%s" % size,
                           gramlist[0],
                           boost=self.booststart))
            queries.append(
                query.Term("end%s" % size, gramlist[-1], boost=self.boostend))
            for gram in gramlist:
                queries.append(query.Term(key, gram))

        q = query.Or(queries)
        ix = self.index()

        s = searching.Searcher(ix)
        try:
            results = s.search(q)

            length = len(results)
            if len(results) > number * 2:
                length = len(results) // 2
            fieldlist = results[:length]

            suggestions = [(fs["word"], fs["score"]) for fs in fieldlist
                           if fs["word"] != text]

            if usescores:

                def keyfn(a):
                    return 0 - (1 / distance(text, a[0])) * a[1]
            else:

                def keyfn(a):
                    return distance(text, a[0])

            suggestions.sort(key=keyfn)
        finally:
            s.close()

        return [word for word, _ in suggestions[:number]]
Exemple #11
0
def test_filter():
    schema = fields.Schema(id=fields.STORED, path=fields.ID, text=fields.TEXT)
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(id=1, path=u("/a/1"), text=u("alfa bravo charlie"))
    w.add_document(id=2, path=u("/b/1"), text=u("bravo charlie delta"))
    w.add_document(id=3, path=u("/c/1"), text=u("charlie delta echo"))
    w.commit(merge=False)
    w = ix.writer()
    w.add_document(id=4, path=u("/a/2"), text=u("delta echo alfa"))
    w.add_document(id=5, path=u("/b/2"), text=u("echo alfa bravo"))
    w.add_document(id=6, path=u("/c/2"), text=u("alfa bravo charlie"))
    w.commit(merge=False)
    w = ix.writer()
    w.add_document(id=7, path=u("/a/3"), text=u("bravo charlie delta"))
    w.add_document(id=8, path=u("/b/3"), text=u("charlie delta echo"))
    w.add_document(id=9, path=u("/c/3"), text=u("delta echo alfa"))
    w.commit(merge=False)

    with ix.searcher() as s:
        fq = query.Or([query.Prefix("path", "/a"),
                       query.Prefix("path", "/b")])
        r = s.search(query.Term("text", "alfa"), filter=fq)
        assert_equal([d["id"] for d in r], [1, 4, 5])

        r = s.search(query.Term("text", "bravo"), filter=fq)
        assert_equal([d["id"] for d in r], [1, 2, 5, 7, ])
Exemple #12
0
def test_fieldboost():
    schema = fields.Schema(id=fields.STORED, a=fields.TEXT, b=fields.TEXT)
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(id=0, a=u("alfa bravo charlie"), b=u("echo foxtrot india"))
    w.add_document(id=1, a=u("delta bravo charlie"), b=u("alfa alfa alfa"))
    w.add_document(id=2, a=u("alfa alfa alfa"), b=u("echo foxtrot india"))
    w.add_document(id=3, a=u("alfa sierra romeo"), b=u("alfa tango echo"))
    w.add_document(id=4, a=u("bravo charlie delta"), b=u("alfa foxtrot india"))
    w.add_document(id=5, a=u("alfa alfa echo"), b=u("tango tango tango"))
    w.add_document(id=6, a=u("alfa bravo echo"), b=u("alfa alfa tango"))
    w.commit()

    def field_booster(fieldname, factor=2.0):
        "Returns a function which will boost the given field in a query tree"
        def booster_fn(obj):
            if obj.is_leaf() and obj.field() == fieldname:
                obj = copy.deepcopy(obj)
                obj.boost *= factor
                return obj
            else:
                return obj
        return booster_fn

    with ix.searcher() as s:
        q = query.Or([query.Term("a", u("alfa")),
                      query.Term("b", u("alfa"))])
        q = q.accept(field_booster("a", 100.0))
        assert_equal(text_type(q), text_type("(a:alfa^100.0 OR b:alfa)"))
        r = s.search(q)
        assert_equal([hit["id"] for hit in r], [2, 5, 6, 3, 0, 1, 4])
Exemple #13
0
def test_boost_phrase():
    schema = fields.Schema(title=fields.TEXT(field_boost=5.0, stored=True),
                           text=fields.TEXT)
    ix = RamStorage().create_index(schema)
    domain = u("alfa bravo charlie delta").split()
    w = ix.writer()
    for ls in permutations(domain):
        t = u(" ").join(ls)
        w.add_document(title=t, text=t)
    w.commit()

    q = query.Or([query.Term("title", u("alfa")),
                  query.Term("title", u("bravo")),
                  query.Phrase("text", [u("bravo"), u("charlie"), u("delta")])
                  ])

    def boost_phrases(q):
        if isinstance(q, query.Phrase):
            q.boost *= 1000.0
            return q
        else:
            return q.apply(boost_phrases)
    q = boost_phrases(q)

    with ix.searcher() as s:
        r = s.search(q, limit=None)
        for hit in r:
            if "bravo charlie delta" in hit["title"]:
                assert hit.score > 100.0
Exemple #14
0
def test_deleteall():
    schema = fields.Schema(text=fields.TEXT)
    with TempIndex(schema, "deleteall") as ix:
        w = ix.writer()
        domain = u("alfa bravo charlie delta echo").split()
        for i, ls in enumerate(permutations(domain)):
            w.add_document(text=u(" ").join(ls))
            if not i % 10:
                w.commit()
                w = ix.writer()
        w.commit()

        # This is just a test, don't use this method to delete all docs IRL!
        doccount = ix.doc_count_all()
        w = ix.writer()
        for docnum in xrange(doccount):
            w.delete_document(docnum)
        w.commit()

        with ix.searcher() as s:
            r = s.search(
                query.Or([
                    query.Term("text", u("alfa")),
                    query.Term("text", u("bravo"))
                ]))
            assert len(r) == 0

        ix.optimize()
        assert ix.doc_count_all() == 0

        with ix.reader() as r:
            assert list(r) == []
Exemple #15
0
    def parse(self, input):
        required = []
        optional = []
        gramsize = max(self.minchars, min(self.maxchars, len(input)))
        if gramsize > len(input):
            return None

        discardspaces = self.discardspaces
        for t in self.analyzerclass(gramsize)(input):
            gram = t.text
            if " " in gram:
                if not discardspaces:
                    optional.append(gram)
            else:
                required.append(gram)

        if required:
            fieldname = self.fieldname
            andquery = query.And([query.Term(fieldname, g) for g in required])
            if optional:
                orquery = query.Or(
                    [query.Term(fieldname, g) for g in optional])
                return query.AndMaybe([andquery, orquery])
            else:
                return andquery
        else:
            return None
Exemple #16
0
    def suggestions_and_scores(self, text, weighting=None):
        if weighting is None:
            weighting = scoring.TF_IDF()

        grams = defaultdict(list)
        for size in xrange(self.mingram, self.maxgram + 1):
            key = "gram%s" % size
            nga = analysis.NgramAnalyzer(size)
            for t in nga(text):
                grams[key].append(t.text)

        queries = []
        for size in xrange(self.mingram, min(self.maxgram + 1, len(text))):
            key = "gram%s" % size
            gramlist = grams[key]
            queries.append(
                query.Term("start%s" % size,
                           gramlist[0],
                           boost=self.booststart))
            queries.append(
                query.Term("end%s" % size, gramlist[-1], boost=self.boostend))
            for gram in gramlist:
                queries.append(query.Term(key, gram))

        q = query.Or(queries)
        ix = self.index()
        s = ix.searcher(weighting=weighting)
        try:
            result = s.search(q, limit=None)
            return [(fs["word"], fs["score"], result.score(i))
                    for i, fs in enumerate(result) if fs["word"] != text]
        finally:
            s.close()
Exemple #17
0
 def test_can_parse_keyword_resolved(self):
     parsed_query = self.parser.parse("$resolved")
     self.assertEqual(
         parsed_query,
         query.Or([
             query.Term('status', 'resolved'),
             query.Term('status', 'closed')
         ]))
Exemple #18
0
    def GET(self):
        search_term = self.request.get_param("s")

        all_tags = r.table(rm.Recipe.table)\
            .concat_map(lambda doc: doc["tags"])\
            .distinct()\
            .coerce_to('array').run()

        self.view.data = {"tags": all_tags, "recipes": None}

        if search_term:
            if "recipe:" in search_term:
                parts = search_term.split(" ")
                for part in parts:
                    if "recipe:" in part:
                        recipe = rm.Recipe.find(part[7:])

                        if recipe is not None:
                            return Redirect("/recipes/{}".format(part[7:]))

            search_term = search_term.replace("tag:", "tags:")

            searcher = RecipeSearcher()

            if self.session.id:
                allow = q.Or([
                    q.And([
                        q.Term("user", self.session.id),
                        q.Term("deleted", False),
                        q.Term("reported", False)
                    ]),
                    q.And([
                        q.Term("public", True),
                        q.Term("deleted", False),
                        q.Term("reported", False)
                    ])
                ])

            else:
                allow = q.And([
                    q.Term("public", True),
                    q.Term("deleted", False),
                    q.Term("reported", False)
                ])

            ids = searcher.search(search_term, collection=True, allow=allow)
            if ids is not None:
                ids.fetch()

                page = Paginate(ids,
                                self.request,
                                "title",
                                sort_direction_default="desc")
                self.view.data = {"recipes": page}

            self.view.template = "public/recipes/search/results"

        return self.view
Exemple #19
0
 def test_can_parse_meta_keywords_that_resolve_to_meta_keywords(self):
     parsed_query = self.parser.parse("$unresolved")
     self.assertEqual(
         parsed_query,
         query.Not(
             query.Or([
                 query.Term('status', 'resolved'),
                 query.Term('status', 'closed')
             ])))
Exemple #20
0
 def related(self, kitab, vrr, nodeIdNum):
     dn, kt = self.keyterms(kitab, vrr, nodeIdNum)
     if not dn: return None
     for t, r in kt:
         print "term=", t, " @ rank=", r
     q = query.Or([query.Term("content", t) for (t, r) in kt])
     results = self.indexer.searcher().search(q, limit=10)
     for i, fields in enumerate(results):
         if results.docnum(i) != dn:
             print fields['kitab'], "\t\t", str(
                 fields['nodeIdNum']), "\t\t", fields['title']
 def parse(filt):
     if filt.query_type == Filter.Q_APPROX:
         mp = qparser.MultifieldParser(filt.get_fields(), schema=schema)
         return mp.parse(unicode(filt.query_string))
     elif filt.query_type == Filter.Q_EXACT:
         s = cls.get_index().searcher()
         qs = filt.query_string
         f = lambda d: qs in [
             d.get(field) for field in filt.get_fields()
         ]
         ids = [unicode(d['id']) for d in filter(f, s.documents())]
         return query.Or([query.Term('id', iden) for iden in ids])
Exemple #22
0
    def suggestions_and_scores(self, text, weighting=None):
        """Returns a list of possible alternative spellings of 'text', as
        ('word', score, weight) triples, where 'word' is the suggested
        word, 'score' is the score that was assigned to the word using
        :meth:`SpellChecker.add_field` or :meth:`SpellChecker.add_scored_words`,
        and 'weight' is the score the word received in the search for the
        original word's ngrams.
        
        You must add words to the dictionary (using add_field, add_words,
        and/or add_scored_words) before you can use this.
        
        This is a lower-level method, in case an expert user needs access to
        the raw scores, for example to implement a custom suggestion ranking
        algorithm. Most people will want to call :meth:`~SpellChecker.suggest`
        instead, which simply returns the top N valued words.
        
        :param text: The word to check.
        :rtype: list
        """

        if weighting is None:
            weighting = TF_IDF()

        grams = defaultdict(list)
        for size in xrange(self.mingram, self.maxgram + 1):
            key = "gram%s" % size
            nga = analysis.NgramAnalyzer(size)
            for t in nga(text):
                grams[key].append(t.text)

        queries = []
        for size in xrange(self.mingram, min(self.maxgram + 1, len(text))):
            key = "gram%s" % size
            gramlist = grams[key]
            queries.append(
                query.Term("start%s" % size,
                           gramlist[0],
                           boost=self.booststart))
            queries.append(
                query.Term("end%s" % size, gramlist[-1], boost=self.boostend))
            for gram in gramlist:
                queries.append(query.Term(key, gram))

        q = query.Or(queries)
        ix = self.index()
        s = ix.searcher(weighting=weighting)
        try:
            result = s.search(q)
            return [(fs["word"], fs["score"], result.score(i))
                    for i, fs in enumerate(result) if fs["word"] != text]
        finally:
            s.close()
Exemple #23
0
 def get_filter(self, querydict):
     """
     Generates a Whoosh query filter reflecting which facets are currently selected.
     Takes `querydict` - a MultiDict with current HTTP GET params.
     """
     terms = []
     for field in self.get_fields():
         # user-provided values concerning a given field
         values = querydict.getlist('filter_' + field)
         if values:
             subterms = [query.Term(field, val) for val in values]
             terms.append(query.Or(subterms))
     return query.And(terms)
Exemple #24
0
    def test_can_parse_complex_query(self):
        parsed_query = self.parser.parse("content:test $ticket $unresolved")

        self.assertEqual(
            parsed_query,
            query.And([
                query.Term('content', 'test'),
                query.Term('type', 'ticket'),
                query.Not(
                    query.Or([
                        query.Term('status', 'resolved'),
                        query.Term('status', 'closed')
                    ]))
            ]))
def test_or_nots2():
    # Issue #286
    schema = fields.Schema(a=fields.KEYWORD(stored=True),
                           b=fields.KEYWORD(stored=True))
    st = RamStorage()
    ix = st.create_index(schema)
    with ix.writer() as w:
        w.add_document(b=u("bravo"))

    with ix.searcher() as s:
        q = query.Or([query.Term("a", "alfa"),
                      query.Not(query.Term("b", "alfa"))
                      ])
        r = s.search(q)
        assert len(r) == 1
Exemple #26
0
    def search(self, backend, start, stop, score_field=None):
        # TODO: Handle MatchAll nested inside other search query classes.
        if isinstance(self.query, MatchAll):
            return self.queryset[start:stop]

        config = backend.get_config()
        queryset = self.queryset

        models = get_descendant_models(queryset.model)
        search_kwargs = {
            'filter':
            wquery.Or(
                [wquery.Term(DJANGO_CT, get_model_ct(m)) for m in models]),
            'limit':
            None
        }

        searcher = backend.index.searcher()
        results = searcher.search(
            backend.parser.parse(self.build_whoosh_query(config=config)),
            **search_kwargs)
        # Results are returned by order of relevance, OrderedDict used to keep track
        score_map = OrderedDict([(r['django_id'], r.score) for r in results])
        searcher.close()

        django_id_ls = score_map.keys()
        if not django_id_ls:
            return queryset.none()

        # Retrieve the results from the db, but preserve the order by score
        preserved_order = Case(
            *[When(pk=pk, then=pos) for pos, pk in enumerate(django_id_ls)])
        queryset = queryset.filter(
            pk__in=django_id_ls).order_by(preserved_order)

        # support search on specific fields
        if self.fields:
            q = self.build_database_filter()
            queryset = queryset.filter(q)

        queryset = queryset.distinct()[start:stop]

        # Add score annotations if required
        if score_field:
            for obj in queryset:
                setattr(obj, score_field, score_map.get(str(obj.pk)))

        return queryset
Exemple #27
0
    def _query(self):
        more_like_doc_id = int(self.query_params['more_like_id'])
        content = Document.objects.get(id=more_like_doc_id).content

        docnum = self.searcher.document_number(id=more_like_doc_id)
        kts = self.searcher.key_terms_from_text('content',
                                                content,
                                                numterms=20,
                                                model=classify.Bo1Model,
                                                normalize=False)
        q = query.Or([
            query.Term('content', word, boost=weight) for word, weight in kts
        ])
        mask = {docnum}

        return q, mask
Exemple #28
0
def query_page(ix, page, querystring, more_like_doc_id, more_like_doc_content):
    searcher = ix.searcher()
    try:
        if querystring:
            qp = MultifieldParser(
                ["content", "title", "correspondent", "tag", "type"],
                ix.schema)
            qp.add_plugin(DateParserPlugin())
            str_q = qp.parse(querystring)
            corrected = searcher.correct_query(str_q, querystring)
        else:
            str_q = None
            corrected = None

        if more_like_doc_id:
            docnum = searcher.document_number(id=more_like_doc_id)
            kts = searcher.key_terms_from_text('content',
                                               more_like_doc_content,
                                               numterms=20,
                                               model=classify.Bo1Model,
                                               normalize=False)
            more_like_q = query.Or([
                query.Term('content', word, boost=weight)
                for word, weight in kts
            ])
            result_page = searcher.search_page(more_like_q,
                                               page,
                                               filter=str_q,
                                               mask={docnum})
        elif str_q:
            result_page = searcher.search_page(str_q, page)
        else:
            raise ValueError(
                "Either querystring or more_like_doc_id is required.")

        result_page.results.fragmenter = highlight.ContextFragmenter(
            surround=50)
        result_page.results.formatter = JsonFormatter()

        if corrected and corrected.query != str_q:
            corrected_query = corrected.string
        else:
            corrected_query = None

        yield result_page, corrected_query
    finally:
        searcher.close()
Exemple #29
0
def test_contains():
    schema = fields.Schema(text=fields.TEXT)
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(text=u("alfa sierra tango"))
    w.add_document(text=u("bravo charlie delta"))
    w.add_document(text=u("charlie delta echo"))
    w.add_document(text=u("delta echo foxtrot"))
    w.commit()

    q = query.Or([query.Term("text", "bravo"), query.Term("text", "charlie")])
    r = ix.searcher().search(q, terms=True)
    for hit in r:
        assert not hit.contains_term("text", "alfa")
        assert (hit.contains_term("text", "bravo")
                or hit.contains_term("text", "charlie"))
        assert not hit.contains_term("text", "foxtrot")
def test_reverse_collapse():
    from whoosh import sorting

    schema = fields.Schema(title=fields.TEXT(stored=True),
                           content=fields.TEXT,
                           path=fields.ID(stored=True),
                           tags=fields.KEYWORD,
                           order=fields.NUMERIC(stored=True))

    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(title=u"First document",
                       content=u"This is my document!",
                       path=u"/a",
                       tags=u"first",
                       order=20.0)
        w.add_document(title=u"Second document",
                       content=u"This is the second example.",
                       path=u"/b",
                       tags=u"second",
                       order=12.0)
        w.add_document(title=u"Third document",
                       content=u"Examples are many.",
                       path=u"/c",
                       tags=u"third",
                       order=15.0)
        w.add_document(title=u"Thirdish document",
                       content=u"Examples are too many.",
                       path=u"/d",
                       tags=u"third",
                       order=25.0)

    with ix.searcher() as s:
        q = query.Every('content')
        r = s.search(q)
        assert [hit["path"] for hit in r] == ["/a", "/b", "/c", "/d"]

        q = query.Or([
            query.Term("title", "document"),
            query.Term("content", "document"),
            query.Term("tags", "document")
        ])
        cf = sorting.FieldFacet("tags")
        of = sorting.FieldFacet("order", reverse=True)
        r = s.search(q, collapse=cf, collapse_order=of, terms=True)
        assert [hit["path"] for hit in r] == ["/a", "/b", "/d"]