Exemple #1
0
def test_whole_noterms():
    schema = fields.Schema(text=fields.TEXT(stored=True), tag=fields.KEYWORD)
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(text=u("alfa bravo charlie delta echo foxtrot golf"),
                       tag=u("foo"))

    with ix.searcher() as s:
        r = s.search(query.Term("text", u("delta")))
        assert len(r) == 1

        r.fragmenter = highlight.WholeFragmenter()
        r.formatter = highlight.UppercaseFormatter()
        hi = r[0].highlights("text")
        assert hi == u("alfa bravo charlie DELTA echo foxtrot golf")

        r = s.search(query.Term("tag", u("foo")))
        assert len(r) == 1
        r.fragmenter = highlight.WholeFragmenter()
        r.formatter = highlight.UppercaseFormatter()
        hi = r[0].highlights("text")
        assert hi == u("")

        hi = r[0].highlights("text", minscore=0)
        assert hi == u("alfa bravo charlie delta echo foxtrot golf")
Exemple #2
0
def test_null_fragment():
    terms = frozenset(("bravo", "india"))
    sa = analysis.StandardAnalyzer()
    nf = highlight.WholeFragmenter()
    uc = highlight.UppercaseFormatter()
    htext = highlight.highlight(_doc, terms, sa, nf, uc)
    assert htext == "alfa BRAVO charlie delta echo foxtrot golf hotel INDIA juliet kilo lima"
Exemple #3
0
def highlight_all(result, field):
    text = result[field]
    return Markup(
        highlight.Highlighter(
            fragmenter=highlight.WholeFragmenter(),
            formatter=result.results.highlighter.formatter).highlight_hit(
                result, field, text=text)) or text
Exemple #4
0
def test_workflow_easy():
    schema = fields.Schema(id=fields.ID(stored=True),
                           title=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)

    w = ix.writer()
    w.add_document(id=u("1"), title=u("The man who wasn't there"))
    w.add_document(id=u("2"), title=u("The dog who barked at midnight"))
    w.add_document(id=u("3"), title=u("The invisible man"))
    w.add_document(id=u("4"), title=u("The girl with the dragon tattoo"))
    w.add_document(id=u("5"), title=u("The woman who disappeared"))
    w.commit()

    with ix.searcher() as s:
        # Parse the user query
        parser = qparser.QueryParser("title", schema=ix.schema)
        q = parser.parse(u("man"))
        r = s.search(q, terms=True)
        assert_equal(len(r), 2)

        r.fragmenter = highlight.WholeFragmenter()
        r.formatter = highlight.UppercaseFormatter()
        outputs = [hit.highlights("title") for hit in r]
        assert_equal(outputs, ["The invisible MAN",
                               "The MAN who wasn't there"])
Exemple #5
0
def test_html_escape():
    terms = frozenset(["bravo"])
    sa = analysis.StandardAnalyzer()
    wf = highlight.WholeFragmenter()
    hf = highlight.HtmlFormatter()
    htext = highlight.highlight(u('alfa <bravo "charlie"> delta'), terms, sa,
                                wf, hf)
    assert htext == 'alfa &lt;<strong class="match term0">bravo</strong> "charlie"&gt; delta'
Exemple #6
0
def find(query):
    with ix.searcher() as searcher:
        query = QueryParser("transcript", ix.schema).parse(query)
        results = searcher.search(query)
        results.fragmenter = highlight.WholeFragmenter()
        results.formatter = brf
        return [
            dict(url=result['url'],
                 summary=result.highlights('transcript'),
                 date=datetime.strftime(result['date'], '%b %d %Y'),
                 transcript=result['transcript']) for result in results
        ]
Exemple #7
0
def test_workflow_manual():
    schema = fields.Schema(id=fields.ID(stored=True),
                           title=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)

    w = ix.writer()
    w.add_document(id=u("1"), title=u("The man who wasn't there"))
    w.add_document(id=u("2"), title=u("The dog who barked at midnight"))
    w.add_document(id=u("3"), title=u("The invisible man"))
    w.add_document(id=u("4"), title=u("The girl with the dragon tattoo"))
    w.add_document(id=u("5"), title=u("The woman who disappeared"))
    w.commit()

    with ix.searcher() as s:
        # Parse the user query
        parser = qparser.QueryParser("title", schema=ix.schema)
        q = parser.parse(u("man"))

        # Extract the terms the user used in the field we're interested in
        terms = [
            text for fieldname, text in q.all_terms() if fieldname == "title"
        ]

        # Perform the search
        r = s.search(q)
        assert_equal(len(r), 2)

        # Use the same analyzer as the field uses. To be sure, you can
        # do schema[fieldname].analyzer. Be careful not to do this
        # on non-text field types such as DATETIME.
        analyzer = schema["title"].analyzer

        # Since we want to highlight the full title, not extract fragments,
        # we'll use WholeFragmenter.
        nf = highlight.WholeFragmenter()

        # In this example we'll simply uppercase the matched terms
        fmt = highlight.UppercaseFormatter()

        outputs = []
        for d in r:
            text = d["title"]
            outputs.append(highlight.highlight(text, terms, analyzer, nf, fmt))

        assert_equal(outputs,
                     ["The invisible MAN", "The MAN who wasn't there"])
def test_paged_highlights():
    schema = fields.Schema(text=fields.TEXT(stored=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(text=u("alfa bravo charlie delta echo foxtrot"))
        w.add_document(text=u("bravo charlie delta echo foxtrot golf"))
        w.add_document(text=u("charlie delta echo foxtrot golf hotel"))
        w.add_document(text=u("delta echo foxtrot golf hotel india"))
        w.add_document(text=u("echo foxtrot golf hotel india juliet"))
        w.add_document(text=u("foxtrot golf hotel india juliet kilo"))

    with ix.searcher() as s:
        q = query.Term("text", u("alfa"))
        page = s.search_page(q, 1, pagelen=3)

        page.results.fragmenter = highlight.WholeFragmenter()
        page.results.formatter = highlight.UppercaseFormatter()
        hi = page[0].highlights("text")
        assert hi == u("ALFA bravo charlie delta echo foxtrot")
Exemple #9
0
    def quote(bot, message, author):
        """
        Returns a quote. No argument returns a random quote.
        A text argument will search through the quote DB and return a random result.
        An argument of the form `id:69` will attempt to get the quote with the id of `69`.
        """
        buf = MessageBuffer()
        results = []

        if bot.index.doc_count() == 0:
            buf.add("No quotes have been added.")
            return buf

        with bot.index.searcher() as searcher:
            if message == "":
                i = randint(1, bot.index.doc_count())
                query = QueryParser("id", bot.index.schema).parse(str(i))
                results = searcher.search(query)
            else:
                query = QueryParser("quote", bot.index.schema).parse(message)
                results = searcher.search(query, limit=None)

            if len(results) > 0:
                results.formatter = BoldFormatter()
                results.fragmenter = highlight.WholeFragmenter()
                result = choice(results)
                quote = bot.santise_quote(
                    result.highlights("quote", minscore=0))
                buf.add(f"[{result['id']}] {quote}")
                if "submitter" in result.keys() and "submitted" in result.keys(
                ):
                    buf.add("")
                    buf.add(
                        f"*Submitted by {result['submitter']} on {result['submitted']}*."
                    )
            else:
                buf.add("No quote found.")

        return buf
Exemple #10
0
            results.formatter = EscapeSeqFormatter()

            # first run to extract context from tex files
            results.fragmenter = highlight.ContextFragmenter(maxchars=50,
                                                             surround=20)
            task_hl = []
            solution_hl = []

            wrapper = textwrap.TextWrapper(initial_indent='\t',
                                           subsequent_indent='\t')

            for i, res in enumerate(results, start=1):
                task_hl.append(wrapper.fill(hf(res, "task")))
                solution_hl.append(wrapper.fill(hf(res, "solution")))

            results.fragmenter = highlight.WholeFragmenter(charlimit=300)

            print('-' * 60)
            print(
                'found \x1b[37;1m%u\x1b[37;0m matching entries for query:\x1b[92;2m %s\x1b[0m\n'
                % (len(results), args.q))
            for i, res in enumerate(results, start=1):
                datestr = ''
                if res["lastupdate"] is not None:
                    datestr = res["lastupdate"].strftime("%Y-%m-%d")

                print('\x1b[37;1m(%2u) \x1b[91;22m%s \x1b[0m%s:' %
                      (i, hf(res, "folder_name"), hf(res, "maintainer")))
                print('\t\x1b[36m%s\x1b[33m %s\x1b[0m' %
                      (hf(res, "language"), datestr))