Python SentenceFragmenter Examples, whoosh.highlight.SentenceFragmenter Python Examples

Example #1

0

Show file

File: search.py Project: WillHalloward/TWI-Bot

async def search(query_str, ctx):
    ix = open_dir("indexdir")
    parser = QueryParser("content", ix.schema)
    parser.add_plugin(qparser.FuzzyTermPlugin())
    parser.add_plugin(GtLtPlugin())
    parser.add_plugin(DateParserPlugin())
    query = parser.parse(query_str)
    print(query)
    with ix.searcher(weighting=scoring.PL2) as searcher:
        results = searcher.search(query, limit=5)
        results.fragmenter = highlight.SentenceFragmenter()
        results.fragmenter.surround = 50
        results.fragmenter.maxchars = 10000
        results.formatter = DiscordBoldFormatter()
        embed = discord.Embed(
            title="Results",
            color=discord.Color(0x3cd63d),
            description="From search: **{}**".format(query_str))
        for hit in results:
            # embed.add_field(name="[{}]({})".format(hit["title"], hit["url"]), value="{}".format(hit.highlights("content")))
            embed.add_field(name="\u200b",
                            value=f"[{hit['title']}]({hit['url']})\n"
                            f"{hit.highlights('content', minscore=0)}",
                            inline=False)
    await ctx.send(embed=embed)

Example #2

0

Show file

def test_sentence_fragment():
    text = u("This is the first sentence. This one doesn't have the word. " +
             "This sentence is the second. Third sentence here.")
    terms = ("sentence", )
    sa = analysis.StandardAnalyzer(stoplist=None)
    sf = highlight.SentenceFragmenter()
    uc = highlight.UppercaseFormatter()
    htext = highlight.highlight(text, terms, sa, sf, uc)
    assert htext == "This is the first SENTENCE...This SENTENCE is the second...Third SENTENCE here"

Example #3

0

Show file

 def _search_logs(self, request):
     querystr = bytes_to_str(request.args[b"q"][0])
     if b"page" in request.args:
         try:
             page = int(request.args[b"page"][0])
         except ValueError:
             page = -1
     else:
         page = 1
     if page < 1:
         log_data = "Invalid page number specified"
         request.write(
             str_to_bytes(
                 search_page_template.format(log_data=log_data,
                                             title=self.title,
                                             header=header,
                                             footer=footer,
                                             channel=self.channel)))
         request.finish()
         return
     with self.ix.searcher() as searcher:
         query = QueryParser("content", self.ix.schema).parse(querystr)
         res_page = searcher.search_page(query,
                                         page,
                                         pagelen=self.pagelen,
                                         sortedby="date",
                                         reverse=True)
         res_page.results.fragmenter = highlight.SentenceFragmenter(
             sentencechars=u".!?\u2026", charlimit=None)
         log_data = ""
         for hit in res_page:
             log_data += ("<ul><div><label><a href='{channel}?date="
                          "{date}'>{date}</a></label>".format(
                              channel=self.channel_link(),
                              date=hit["date"].strftime("%Y-%m-%d")) +
                          hit.highlights("content") + "</div></ul>")
         else:
             if not res_page.is_last_page():
                 log_data += "<a href='?q={}&page={}'>Next</a>".format(
                     querystr, page + 1)
         if not res_page:
             log_data = "No Logs found containg: {}".format(
                 htmlescape(querystr))
     request.write(
         str_to_bytes(
             search_page_template.format(log_data=log_data,
                                         title=self.title,
                                         header=header,
                                         footer=footer,
                                         channel=self.channel_link())))
     request.finish()

Example #4

0

Show file

def test_highlight_ngrams():
    schema = fields.Schema(text=fields.NGRAMWORDS(stored=True))
    ix = RamStorage().create_index(schema)
    with ix.writer() as w:
        w.add_document(text=u("Multiplication and subtraction are good"))

    with ix.searcher() as s:
        qp = qparser.QueryParser("text", ix.schema)
        q = qp.parse(u("multiplication"))
        r = s.search(q)
        assert r.scored_length() == 1

        r.fragmenter = highlight.SentenceFragmenter()
        r.formatter = highlight.UppercaseFormatter()
        snippet = r[0].highlights("text")
        assert snippet == "MULTIPLICATIon and subtracTION are good"

Example #5

0

Show file

File: whoosh_backend.py Project: oclay1st/Alexandria

    def normalize_data(self, response):
        """
        Normalize the response adding pagination
        :param response: Response from elastic search  
        :return: data normalized
        """
        data = {'items': [], 'id_list': []}
        response.results.fragmenter.surround = 80  #: summary length
        # page_result.results.fragmenter.maxchars = 300
        my_cf = highlight.SentenceFragmenter()
        # page_result.results.fragmenter = my_cf
        for result in response.results:
            # print result.title
            result_dict = dict(result)
            result_dict['summary'] = result.highlights("content", top=2)
            data['items'].append(result_dict)
            data['id_list'].append(int(result_dict['id']))

        data['total'] = response.total
        data['pages'] = response.pagecount
        data['page'] = response.pagenum
        return data

Example #6

0

Show file

File: test_results.py Project: altamir-bricks/whooshing-opendata

def test_snippets():
    ana = analysis.StemmingAnalyzer()
    schema = fields.Schema(text=fields.TEXT(stored=True, analyzer=ana))
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    w.add_document(text=u(
        "Lay out the rough animation by creating the important poses where they occur on the timeline."
    ))
    w.add_document(text=u(
        "Set key frames on everything that's key-able. This is for control and predictability: you don't want to accidentally leave something un-keyed. This is also much faster than selecting the parameters to key."
    ))
    w.add_document(text=u(
        "Use constant (straight) or sometimes linear transitions between keyframes in the channel editor. This makes the character jump between poses."
    ))
    w.add_document(text=u(
        "Keying everything gives quick, immediate results. But it can become difficult to tweak the animation later, especially for complex characters."
    ))
    w.add_document(text=u(
        "Copy the current pose to create the next one: pose the character, key everything, then copy the keyframe in the playbar to another frame, and key everything at that frame."
    ))
    w.commit()

    target = [
        "Set KEY frames on everything that's KEY-able",
        "Copy the current pose to create the next one: pose the character, KEY everything, then copy the keyframe in the playbar to another frame, and KEY everything at that frame",
        "KEYING everything gives quick, immediate results"
    ]

    with ix.searcher() as s:
        qp = qparser.QueryParser("text", ix.schema)
        q = qp.parse(u("key"))
        r = s.search(q, terms=True)
        r.fragmenter = highlight.SentenceFragmenter()
        r.formatter = highlight.UppercaseFormatter()

        assert sorted([hit.highlights("text", top=1)
                       for hit in r]) == sorted(target)