Ejemplos de Term en Python, ejemplos de whoosh.query.Term en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: test_collector.py Proyecto: kirollosd/whoosh-official

def test_reverse_collapse():
    from whoosh import sorting

    schema = fields.Schema(title=fields.TEXT(stored=True),
                           content=fields.TEXT,
                           path=fields.ID(stored=True),
                           tags=fields.KEYWORD,
                           order=fields.NUMERIC(stored=True))

    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(title=u"First document",
                       content=u"This is my document!",
                       path=u"/a",
                       tags=u"first",
                       order=20.0)
        w.add_document(title=u"Second document",
                       content=u"This is the second example.",
                       path=u"/b",
                       tags=u"second",
                       order=12.0)
        w.add_document(title=u"Third document",
                       content=u"Examples are many.",
                       path=u"/c",
                       tags=u"third",
                       order=15.0)
        w.add_document(title=u"Thirdish document",
                       content=u"Examples are too many.",
                       path=u"/d",
                       tags=u"third",
                       order=25.0)

    with ix.searcher() as s:
        q = query.Every('content')
        r = s.search(q)
        assert [hit["path"] for hit in r] == ["/a", "/b", "/c", "/d"]

        q = query.Or([
            query.Term("title", "document"),
            query.Term("content", "document"),
            query.Term("tags", "document")
        ])
        cf = sorting.FieldFacet("tags")
        of = sorting.FieldFacet("order", reverse=True)
        r = s.search(q, collapse=cf, collapse_order=of, terms=True)
        assert [hit["path"] for hit in r] == ["/a", "/b", "/d"]

Ejemplo n.º 2

0

Mostrar archivo

Archivo: test_results.py Proyecto: ws-os/oh-mainline

def test_contains():
    schema = fields.Schema(text=fields.TEXT)
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(text=u("alfa sierra tango"))
    w.add_document(text=u("bravo charlie delta"))
    w.add_document(text=u("charlie delta echo"))
    w.add_document(text=u("delta echo foxtrot"))
    w.commit()

    q = query.Or([query.Term("text", "bravo"), query.Term("text", "charlie")])
    r = ix.searcher().search(q, terms=True)
    for hit in r:
        assert not hit.contains_term("text", "alfa")
        assert (hit.contains_term("text", "bravo")
                or hit.contains_term("text", "charlie"))
        assert not hit.contains_term("text", "foxtrot")

Ejemplo n.º 3

0

Mostrar archivo

    def parse_query(self, fieldname, qstring, boost=1.0):
        from whoosh import query

        terms = [query.Term(fieldname, g)
                 for g in self.process_text(qstring, mode='query')]
        cls = query.Or if self.queryor else query.And

        return cls(terms, boost=boost)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: storage.py Proyecto: zypA13510/weblate

    def lookup(self, source_language, target_language, text, user, project,
               use_shared):
        langfilter = query.And([
            query.Term('source_language', source_language),
            query.Term('target_language', target_language),
            self.get_filter(user, project, use_shared, True),
        ])
        text_query = self.parser.parse(text)
        matches = self.searcher.search(text_query,
                                       filter=langfilter,
                                       limit=20000)

        for match in matches:
            similarity = self.comparer.similarity(text, match['source'])
            if similarity < 30:
                continue
            yield (match['source'], match['target'], similarity,
                   match['category'], match['origin'])

Ejemplo n.º 5

0

Mostrar archivo

def get_attachments_from_dms(community):
    svc = current_app.services['indexing']
    filters = wq.And([
        wq.Term('community_id', community.id),
        wq.Term('object_type', Document.entity_type)
    ])
    sortedby = whoosh.sorting.FieldFacet('created_at', reverse=True)
    documents = svc.search(u'', filter=filters, sortedby=sortedby, limit=50)

    attachments = []
    for doc in documents:
        url = url_for(doc)
        attachment = Attachment(url, doc['name'], doc['owner_name'],
                                doc['created_at'], doc.get('content_length'),
                                doc.get('content_type', u''))
        attachments.append(attachment)

    return attachments

Ejemplo n.º 6

0

Mostrar archivo

Archivo: storage.py Proyecto: sergey48k/weblate

    def lookup(self, source_language, target_language, text):
        langfilter = query.And([
            query.Term('source_language', source_language),
            query.Term('target_language', target_language),
        ])
        self.open_searcher()
        text_query = self.parser.parse(text)
        matches = self.searcher.search(
            text_query, filter=langfilter, limit=20000
        )

        for match in matches:
            similarity = self.comparer.similarity(text, match['source'])
            if similarity < 30:
                continue
            yield (
                match['source'], match['target'], similarity, match['origin']
            )

Ejemplo n.º 7

0

Mostrar archivo

Archivo: whoosh_backend.py Proyecto: tsanov/bloodhound

 def test_can_apply_filter_and_facet(self):
     self.whoosh_backend.add_doc(dict(id="1", type="ticket"))
     self.whoosh_backend.add_doc(dict(id="2", type="wiki"))
     result = self.whoosh_backend.query(query.Every(),
                                        filter=query.Term("type", "ticket"),
                                        facets=["type"])
     self.print_result(result)
     self.assertEqual(1, result.hits)
     self.assertEqual("ticket", result.docs[0]["type"])

Ejemplo n.º 8

0

Mostrar archivo

def test_unstored():
    schema = fields.Schema(text=fields.TEXT, tags=fields.KEYWORD)
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(text=u("alfa bravo charlie"), tags=u("delta echo"))
    w.commit()

    hit = ix.searcher().search(query.Term("text", "bravo"))[0]
    assert_raises(KeyError, hit.highlights, "tags")

Ejemplo n.º 9

0

Mostrar archivo

def test_memory_codec():
    from whoosh.codec import memory
    from whoosh.searching import Searcher

    ana = analysis.StemmingAnalyzer()
    schema = fields.Schema(a=fields.TEXT(vector=True),
                           b=fields.STORED,
                           c=fields.NUMERIC(stored=True, sortable=True),
                           d=fields.TEXT(analyzer=ana, spelling=True))

    codec = memory.MemoryCodec()
    with codec.writer(schema) as w:
        w.add_document(a=u("alfa bravo charlie"),
                       b="hello",
                       c=100,
                       d=u("quelling whining echoing"))
        w.add_document(a=u("bravo charlie delta"),
                       b=1000,
                       c=200,
                       d=u("rolling timing yelling"))
        w.add_document(a=u("charlie delta echo"),
                       b=5.5,
                       c=300,
                       d=u("using opening pulling"))
        w.add_document(a=u("delta echo foxtrot"),
                       b=True,
                       c=-100,
                       d=u("aching selling dipping"))
        w.add_document(a=u("echo foxtrot india"),
                       b=None,
                       c=-200,
                       d=u("filling going hopping"))

    reader = codec.reader(schema)
    s = Searcher(reader)

    assert ("a", "delta") in reader
    q = query.Term("a", "delta")
    r = s.search(q)
    assert len(r) == 3
    assert [hit["b"] for hit in r] == [1000, 5.5, True]

    assert (" ".join(
        s.field_terms("a")) == "alfa bravo charlie delta echo foxtrot india")

    cfield = schema["c"]
    c_sortables = cfield.sortable_terms(reader, "c")
    c_values = [cfield.from_bytes(t) for t in c_sortables]
    assert c_values, [-200, -100, 100, 200, 300]

    assert reader.has_column("c")
    c_values = list(reader.column_reader("c"))
    assert c_values == [100, 200, 300, -100, -200]

    assert s.has_vector(2, "a")
    v = s.vector(2, "a")
    assert " ".join(v.all_ids()) == "charlie delta echo"

Ejemplo n.º 10

0

Mostrar archivo

Archivo: test_queries.py Proyecto: altamir-bricks/whooshing-opendata

def test_sequence():
    schema = fields.Schema(id=fields.STORED, text=fields.TEXT)
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(id=0, text=u("alfa bravo charlie delta echo"))
        w.add_document(id=1, text=u("bravo charlie delta echo alfa"))
        w.add_document(id=2, text=u("charlie delta echo bravo"))
        w.add_document(id=3, text=u("delta echo charlie"))
        w.add_document(id=4, text=u("echo delta"))

    with ix.searcher() as s:
        seq = query.Sequence([query.Term("text", u("echo")),
                              query.Term("text", u("alfa"))])
        q = query.And([query.Term("text", "bravo"), seq])

        r = s.search(q, limit=4)
        assert len(r) == 1
        assert r[0]["id"] == 1

Ejemplo n.º 11

0

Mostrar archivo

def test_nested_skip():
    schema = fields.Schema(
        id=fields.ID(unique=True, stored=True),
        name=fields.TEXT(stored=True),
        name_ngrams=fields.NGRAMWORDS(minsize=4, field_boost=1.2),
        type=fields.TEXT,
    )

    domain = [(u"book_1", u"The Dark Knight Returns", u"book"),
              (u"chapter_1", u"The Dark Knight Returns", u"chapter"),
              (u"chapter_2", u"The Dark Knight Triumphant", u"chapter"),
              (u"chapter_3", u"Hunt the Dark Knight", u"chapter"),
              (u"chapter_4", u"The Dark Knight Falls", u"chapter")]

    with TempIndex(schema) as ix:
        with ix.writer() as w:
            for id, name, typ in domain:
                w.add_document(id=id, name=name, name_ngrams=name, type=typ)

        with ix.searcher() as s:
            all_parents = query.Term("type", "book")
            wanted_parents = query.Term("name", "dark")
            children_of_wanted_parents = query.NestedChildren(
                all_parents, wanted_parents)

            r1 = s.search(children_of_wanted_parents)
            assert r1.scored_length() == 4
            assert [hit["id"] for hit in r1
                    ] == ["chapter_1", "chapter_2", "chapter_3", "chapter_4"]

            wanted_children = query.And(
                [query.Term("type", "chapter"),
                 query.Term("name", "hunt")])

            r2 = s.search(wanted_children)
            assert r2.scored_length() == 1
            assert [hit["id"] for hit in r2] == ["chapter_3"]

            complex_query = query.And(
                [children_of_wanted_parents, wanted_children])

            r3 = s.search(complex_query)
            assert r3.scored_length() == 1
            assert [hit["id"] for hit in r3] == ["chapter_3"]

Ejemplo n.º 12

0

Mostrar archivo

def test_missing():
    schema = fields.Schema(kind=fields.ID,
                           name=fields.KEYWORD(scorable=True, stored=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        with w.group():
            w.add_document(kind=u("class"), name=u("Index"))
            w.add_document(kind=u("method"), name=u("add document"))
            w.add_document(kind=u("method"), name=u("add reader"))
            w.add_document(kind=u("method"), name=u("close"))
        with w.group():
            w.add_document(kind=u("class"), name=u("Accumulator"))
            w.add_document(kind=u("method"), name=u("add"))
            w.add_document(kind=u("method"), name=u("get result"))
        with w.group():
            w.add_document(kind=u("class"), name=u("Calculator"))
            w.add_document(kind=u("method"), name=u("add"))
            w.add_document(kind=u("method"), name=u("add all"))
            w.add_document(kind=u("method"), name=u("add some"))
            w.add_document(kind=u("method"), name=u("multiply"))
            w.add_document(kind=u("method"), name=u("close"))
        with w.group():
            w.add_document(kind=u("class"), name=u("Deleter"))
            w.add_document(kind=u("method"), name=u("add"))
            w.add_document(kind=u("method"), name=u("delete"))

    with ix.searcher() as s:
        q = query.NestedParent(query.Term("kind", "class"),
                               query.Term("name", "add"))

        r = s.search(q)
        assert [hit["name"] for hit in r
                ] == ["Calculator", "Index", "Accumulator", "Deleter"]

    with ix.writer() as w:
        w.delete_by_term("name", "Accumulator")
        w.delete_by_term("name", "Calculator")

    with ix.searcher() as s:
        pq = query.Term("kind", "class")
        assert len(list(pq.docs(s))) == 2
        q = query.NestedParent(pq, query.Term("name", "add"))
        r = s.search(q)
        assert [hit["name"] for hit in r] == ["Index", "Deleter"]

Ejemplo n.º 13

0

Mostrar archivo

    def document_numbers(self, **kw):
        """Returns a generator of the document numbers for documents
        matching the given keyword arguments, where the keyword keys are
        field names and the values are terms that must appear in the field.
        
        >>> docnums = list(searcher.document_numbers(emailto=u"*****@*****.**"))
        """

        q = query.And([query.Term(k, v) for k, v in kw.iteritems()])
        return q.docs(self)

Ejemplo n.º 14

0

Mostrar archivo

Archivo: storage.py Proyecto: zypA13510/weblate

 def get_filter(user, project, use_shared, use_file):
     """Create query to filter categories based on selection."""
     # Always include file imported memory
     if use_file:
         category_filter = [query.Term('category', CATEGORY_FILE)]
     else:
         category_filter = []
     # Per user memory
     if user:
         category_filter.append(
             query.Term('category', CATEGORY_USER_OFFSET + user.id))
     # Private project memory
     if project:
         category_filter.append(
             query.Term('category', CATEGORY_PRIVATE_OFFSET + project.id))
     # Shared memory
     if use_shared:
         category_filter.append(query.Term('category', CATEGORY_SHARED))
     return query.Or(category_filter)

Ejemplo n.º 15

0

Mostrar archivo

def test_current_terms():
    domain = u("alfa bravo charlie delta").split()
    schema = fields.Schema(text=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    for ls in permutations(domain, 3):
        w.add_document(text=" ".join(ls), _stored_text=ls)
    w.commit()

    with ix.searcher() as s:
        q = query.And(
            [query.Term("text", "alfa"),
             query.Term("text", "charlie")])
        m = q.matcher(s)

        while m.is_active():
            assert sorted(m.matching_terms()) == [("text", b("alfa")),
                                                  ("text", b("charlie"))]
            m.next()

Ejemplo n.º 16

0

Mostrar archivo

Archivo: test_results.py Proyecto: CuteCha/dssm-theano

def test_lengths2():
    schema = fields.Schema(text=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)
    count = 0
    for _ in xrange(3):
        w = ix.writer()
        for ls in permutations(u("alfa bravo charlie").split()):
            if "bravo" in ls and "charlie" in ls:
                count += 1
            w.add_document(text=u(" ").join(ls))
        w.commit(merge=False)

    with ix.searcher() as s:
        q = query.Or([query.Term("text", u("bravo")), query.Term("text", u("charlie"))])
        r = s.search(q, limit=None)
        assert len(r) == count

        r = s.search(q, limit=3)
        assert len(r) == count

Ejemplo n.º 17

0

Mostrar archivo

def test_or_nots1():
    # Issue #285
    schema = fields.Schema(a=fields.KEYWORD(stored=True),
                           b=fields.KEYWORD(stored=True))
    st = RamStorage()
    ix = st.create_index(schema)
    with ix.writer() as w:
        w.add_document(a=u("alfa"), b=u("charlie"))

    with ix.searcher() as s:
        q = query.And([
            query.Term("a", "alfa"),
            query.Or([
                query.Not(query.Term("b", "bravo")),
                query.Not(query.Term("b", "charlie"))
            ])
        ])
        r = s.search(q)
        assert len(r) == 1

Ejemplo n.º 18

0

Mostrar archivo

Archivo: test_ramindex.py Proyecto: ws-os/oh-mainline

def test_indexing():
    ix = make_index()
    with ix.searcher() as s:
        q = query.Term("text", "format")
        r = s.search(q)
        assert_equal(len(r), 2)
        assert_equal(r[0]["id"], "format")
        assert_equal(r[0]["subs"], 100)
        assert_equal(r[1]["id"], "vector")
        assert_equal(r[1]["subs"], 23)

Ejemplo n.º 19

0

Mostrar archivo

Archivo: searching.py Proyecto: nandankumar-drg/fsMgr

    def highlighted(self):
        engine = SearchEngine(self.indexpath)
        engine.open_index()
        searcher, _queryparser = engine.find()
        results = searcher.search(query.Term('content', self.q), limit=10)
        url = []; content = []
        for hit in results:
                url.append(hit["url"])
                content.append(hit.highlights("content"))
	return (url, content)

Ejemplo n.º 20

0

Mostrar archivo

Archivo: note_search_service.py Proyecto: marshallstan/note

 def search(self, keyword, notebook_id=None):
     with self.index.searcher() as searcher:
         query_parser = MultifieldParser(
             ["title", "snippet"], schema=self.index.schema).parse(keyword)
         notebook_filter = query.Term("notebook_id",
                                      notebook_id) if notebook_id else None
         results = searcher.search(query_parser,
                                   filter=notebook_filter,
                                   limit=None)
         return [res['note_id'] for res in results]

Ejemplo n.º 21

0

Mostrar archivo

Archivo: graphanalyze.py Proyecto: zigit/ferenda

    def eval_get_ranked_set_baseline(self, basefile):
        # Step 1: Read the saved keyterms for a subset of articles
        # (created by analyze_baseline_queries)
        g = Graph()
        g.parse(self.generic_path("keyterms", "analyzed", ".n3"), format="n3")

        articles = {}
        for (s, p, o) in g:
            if not str(s) in articles:
                articles[str(s)] = []
            articles[str(s)].append(str(o))

        # Step 2: Open the large whoosh index containing the text of
        # all cases. Then, create a query for each article based on
        # the keyterms.
        connector = query.Or
        indexdir = os.path.sep.join([self.config.datadir, 'ecj', 'index'])
        storage = FileStorage(indexdir)
        idx = storage.open_index()
        searcher = idx.searcher(weighting=scoring.BM25F())

        res = {}

        # for article in sorted(articles.keys()):
        for article in self._articles(basefile):
            terms = articles[article]
            rankedset = []
            #parser = qparser.QueryParser("content", idx.schema)
            #q = parser.parse(connector.join(terms))
            q = query.And([
                # query.Term("articles", article),
                connector([query.Term("content", x) for x in terms])
            ])
            # print q
            # self.log.debug("Article %s: %s", article, " or ".join(terms))
            results = searcher.search(q, limit=None)
            resultidx = 0
            # self.log.info("Keyterms for result: %r" % results.key_terms("content", docs=10, numterms=10))
            for result in results:
                reslbl = "%s (%s)" % (result['basefile'],
                                      results.score(resultidx))
                rankedset.append([result['basefile'], reslbl])
                # self.log.debug(u"\t%s: %2.2d" % (result['title'], results.score(resultidx)))
                resultidx += 1
            self.log.info(
                "Created baseline ranked set for %s: Top result %s (of %s)" %
                (article.split("/")[-1], rankedset[0][0], len(rankedset)))

            # return just a list of URIs, no scoring information. But the
            # full URI isnt available in the whoosh db, so we recreate it.
            res[article] = [
                "http://lagen.nu/ext/celex/%s" % x[0] for x in rankedset
            ]

        return res

Ejemplo n.º 22

0

Mostrar archivo

Archivo: test_results.py Proyecto: altamir-bricks/whooshing-opendata

def test_sorted_extend():
    from whoosh import sorting

    schema = fields.Schema(title=fields.TEXT(stored=True),
                           keywords=fields.TEXT,
                           num=fields.NUMERIC(stored=True, sortable=True))
    domain = u"alfa bravo charlie delta echo foxtrot golf hotel india".split()
    keys = u"juliet kilo lima november oskar papa quebec romeo".split()

    combined = 0
    tcount = 0
    kcount = 0
    with TempIndex(schema) as ix:
        with ix.writer() as w:
            for i, words in enumerate(permutations(domain, 3)):
                key = keys[i % (len(domain) - 1)]
                if "bravo" in words:
                    tcount += 1
                if key == "kilo":
                    kcount += 1
                if "bravo" in words or key == "kilo":
                    combined += 1

                w.add_document(title=u" ".join(words), keywords=key, num=i)

        with ix.searcher() as s:
            facet = sorting.MultiFacet([
                sorting.FieldFacet("num", reverse=True),
                sorting.ScoreFacet()
            ])

            r1 = s.search(query.Term("title", "bravo"),
                          limit=None,
                          sortedby=facet)
            r2 = s.search(query.Term("keywords", "kilo"),
                          limit=None,
                          sortedby=facet)

            assert len(r1) == tcount
            assert len(r2) == kcount
            r1.extend(r2)
            assert len(r1) == combined

Ejemplo n.º 23

0

Mostrar archivo

def test_multireader():
    sc = fields.Schema(id=fields.ID(stored=True), content=fields.TEXT)
    st = RamStorage()
    ix = st.create_index(sc)
    w = ix.writer()
    w.add_document(id=u("alfa"), content=u("alfa"))
    w.add_document(id=u("bravo"), content=u("bravo"))
    w.add_document(id=u("charlie"), content=u("charlie"))
    w.add_document(id=u("delta"), content=u("delta"))
    w.add_document(id=u("echo"), content=u("echo"))
    w.add_document(id=u("foxtrot"), content=u("foxtrot"))
    w.add_document(id=u("golf"), content=u("golf"))
    w.add_document(id=u("hotel"), content=u("hotel"))
    w.add_document(id=u("india"), content=u("india"))
    w.commit()

    with ix.searcher() as s:
        r = s.search(query.Term("content", u("bravo")))
        assert_equal(len(r), 1)
        assert_equal(r[0]["id"], "bravo")

    w = ix.writer()
    w.add_document(id=u("juliet"), content=u("juliet"))
    w.add_document(id=u("kilo"), content=u("kilo"))
    w.add_document(id=u("lima"), content=u("lima"))
    w.add_document(id=u("mike"), content=u("mike"))
    w.add_document(id=u("november"), content=u("november"))
    w.add_document(id=u("oscar"), content=u("oscar"))
    w.add_document(id=u("papa"), content=u("papa"))
    w.add_document(id=u("quebec"), content=u("quebec"))
    w.add_document(id=u("romeo"), content=u("romeo"))
    w.commit()
    assert_equal(len(ix._segments()), 2)

    #r = ix.reader()
    #assert r.__class__.__name__, "MultiReader")
    #pr = r.postings("content", u("bravo"))

    with ix.searcher() as s:
        r = s.search(query.Term("content", u("bravo")))
        assert_equal(len(r), 1)
        assert_equal(r[0]["id"], "bravo")

Ejemplo n.º 24

0

Mostrar archivo

 def related(self, kitab, vrr, nodeIdNum):
     dn, kt = self.keyterms(kitab, vrr, nodeIdNum)
     if not dn: return None
     for t, r in kt:
         print "term=", t, " @ rank=", r
     q = query.Or([query.Term("content", t) for (t, r) in kt])
     results = self.indexer.searcher().search(q, limit=10)
     for i, fields in enumerate(results):
         if results.docnum(i) != dn:
             print fields['kitab'], "\t\t", str(
                 fields['nodeIdNum']), "\t\t", fields['title']

Ejemplo n.º 25

0

Mostrar archivo

    def parse_query(self, fieldname, qstring, boost=1.0):
        from whoosh import query
        text = None
        if qstring in self.falses:
            text = self.strings[0]
        elif qstring in self.trues:
            text = self.strings[1]

        if text is None:
            return query.NullQuery
        return query.Term(fieldname, text, boost=boost)

Ejemplo n.º 26

0

Mostrar archivo

def query_a(r, list_a, e1_type, e2_type, index):
    idx = open_dir(index)
    entity1 = "<" + e1_type + ">" + r.e1 + "</" + e1_type + ">"
    entity2 = "<" + e2_type + ">" + r.e2 + "</" + e2_type + ">"
    t1 = query.Term("sentence", entity1)
    t2 = query.Term("sentence", r.patterns)
    t3 = query.Term("sentence", entity2)
    q1 = spans.SpanNear2([t1, t2, t3], slop=5, ordered=True)
    q2 = spans.SpanNear2([t1, t3], slop=5, ordered=True)

    with idx.searcher() as searcher:
        entities_r = searcher.search(q1)
        entities = searcher.search(q2)
        # TODO: fazer stemming ou normalização da palavra a usar no query
        if len(entities) > 0:
            pmi = float(len(entities_r)) / float(len(entities))
            # TODO: qual o melhor valor de threshold ?
            if pmi >= 0.5:
                #print entity1, '\t', r.patterns, '\t', entity2, pmi
                list_a.append(r)

Ejemplo n.º 27

0

Mostrar archivo

Archivo: fields.py Proyecto: oier/Yaki

    def parse_query(self, fieldname, qstring, boost=1.0):
        from whoosh import query
        from whoosh.support.times import is_ambiguous

        at = self._parse_datestring(qstring)
        if is_ambiguous(at):
            startnum = datetime_to_long(at.floor())
            endnum = datetime_to_long(at.ceil())
            return query.NumericRange(fieldname, startnum, endnum)
        else:
            return query.Term(fieldname, self.to_text(at), boost=boost)

Ejemplo n.º 28

0

Mostrar archivo

    def search(self, type, pattern, start=0, rows=10, default_field='text'):
        assert type in ('idea', 'user'), "Type %r not supported" % type
        with self._index.searcher() as searcher:
            q = QueryParser(default_field,
                            schema=self._index.schema).parse(pattern)

            results = searcher.search(q,
                                      limit=max(start + rows, 1),
                                      filter=query.Term('type', type))
            return ([item['id']
                     for item in results[start:start + rows]], len(results))

Ejemplo n.º 29

0

Mostrar archivo

 def run(self):
     print(self.name + " starting")
     for _ in xrange(10):
         ix = st.open_index()
         s = ix.searcher()
         q = query.Term("content", random.choice(domain))
         s.search(q, limit=10)
         s.close()
         ix.close()
         time.sleep(0.1)
     print(self.name + " done")

Ejemplo n.º 30

0

Mostrar archivo

Archivo: test_collector.py Proyecto: altamir-bricks/whooshing-opendata

def test_filter_results_count():
    schema = fields.Schema(id=fields.STORED, django_ct=fields.ID(stored=True),
                           text=fields.TEXT)
    with TempIndex(schema) as ix:
        with ix.writer() as w:
            w.add_document(id=1, django_ct=u("app.model1"),
                           text=u("alfa bravo charlie"))
            w.add_document(id=2, django_ct=u("app.model1"),
                           text=u("alfa bravo delta"))
            w.add_document(id=3, django_ct=u("app.model2"),
                           text=u("alfa charlie echo"))

        with ix.searcher() as s:
            q = query.Term("django_ct", u("app.model1"))
            r1 = s.search(q, limit=None)
            assert len(r1) == 2

            q = query.Term("text", u("alfa"))
            r2 = s.search(q, filter=r1, limit=1)
            assert len(r2) == 2