async def search(query_str, ctx): ix = open_dir("indexdir") parser = QueryParser("content", ix.schema) parser.add_plugin(qparser.FuzzyTermPlugin()) parser.add_plugin(GtLtPlugin()) parser.add_plugin(DateParserPlugin()) query = parser.parse(query_str) print(query) with ix.searcher(weighting=scoring.PL2) as searcher: results = searcher.search(query, limit=5) results.fragmenter = highlight.SentenceFragmenter() results.fragmenter.surround = 50 results.fragmenter.maxchars = 10000 results.formatter = DiscordBoldFormatter() embed = discord.Embed( title="Results", color=discord.Color(0x3cd63d), description="From search: **{}**".format(query_str)) for hit in results: # embed.add_field(name="[{}]({})".format(hit["title"], hit["url"]), value="{}".format(hit.highlights("content"))) embed.add_field(name="\u200b", value=f"[{hit['title']}]({hit['url']})\n" f"{hit.highlights('content', minscore=0)}", inline=False) await ctx.send(embed=embed)
def test_sentence_fragment(): text = u("This is the first sentence. This one doesn't have the word. " + "This sentence is the second. Third sentence here.") terms = ("sentence", ) sa = analysis.StandardAnalyzer(stoplist=None) sf = highlight.SentenceFragmenter() uc = highlight.UppercaseFormatter() htext = highlight.highlight(text, terms, sa, sf, uc) assert htext == "This is the first SENTENCE...This SENTENCE is the second...Third SENTENCE here"
def _search_logs(self, request): querystr = bytes_to_str(request.args[b"q"][0]) if b"page" in request.args: try: page = int(request.args[b"page"][0]) except ValueError: page = -1 else: page = 1 if page < 1: log_data = "Invalid page number specified" request.write( str_to_bytes( search_page_template.format(log_data=log_data, title=self.title, header=header, footer=footer, channel=self.channel))) request.finish() return with self.ix.searcher() as searcher: query = QueryParser("content", self.ix.schema).parse(querystr) res_page = searcher.search_page(query, page, pagelen=self.pagelen, sortedby="date", reverse=True) res_page.results.fragmenter = highlight.SentenceFragmenter( sentencechars=u".!?\u2026", charlimit=None) log_data = "" for hit in res_page: log_data += ("<ul><div><label><a href='{channel}?date=" "{date}'>{date}</a></label>".format( channel=self.channel_link(), date=hit["date"].strftime("%Y-%m-%d")) + hit.highlights("content") + "</div></ul>") else: if not res_page.is_last_page(): log_data += "<a href='?q={}&page={}'>Next</a>".format( querystr, page + 1) if not res_page: log_data = "No Logs found containg: {}".format( htmlescape(querystr)) request.write( str_to_bytes( search_page_template.format(log_data=log_data, title=self.title, header=header, footer=footer, channel=self.channel_link()))) request.finish()
def test_highlight_ngrams(): schema = fields.Schema(text=fields.NGRAMWORDS(stored=True)) ix = RamStorage().create_index(schema) with ix.writer() as w: w.add_document(text=u("Multiplication and subtraction are good")) with ix.searcher() as s: qp = qparser.QueryParser("text", ix.schema) q = qp.parse(u("multiplication")) r = s.search(q) assert r.scored_length() == 1 r.fragmenter = highlight.SentenceFragmenter() r.formatter = highlight.UppercaseFormatter() snippet = r[0].highlights("text") assert snippet == "MULTIPLICATIon and subtracTION are good"
def normalize_data(self, response): """ Normalize the response adding pagination :param response: Response from elastic search :return: data normalized """ data = {'items': [], 'id_list': []} response.results.fragmenter.surround = 80 #: summary length # page_result.results.fragmenter.maxchars = 300 my_cf = highlight.SentenceFragmenter() # page_result.results.fragmenter = my_cf for result in response.results: # print result.title result_dict = dict(result) result_dict['summary'] = result.highlights("content", top=2) data['items'].append(result_dict) data['id_list'].append(int(result_dict['id'])) data['total'] = response.total data['pages'] = response.pagecount data['page'] = response.pagenum return data
def test_snippets(): ana = analysis.StemmingAnalyzer() schema = fields.Schema(text=fields.TEXT(stored=True, analyzer=ana)) ix = RamStorage().create_index(schema) w = ix.writer() w.add_document(text=u( "Lay out the rough animation by creating the important poses where they occur on the timeline." )) w.add_document(text=u( "Set key frames on everything that's key-able. This is for control and predictability: you don't want to accidentally leave something un-keyed. This is also much faster than selecting the parameters to key." )) w.add_document(text=u( "Use constant (straight) or sometimes linear transitions between keyframes in the channel editor. This makes the character jump between poses." )) w.add_document(text=u( "Keying everything gives quick, immediate results. But it can become difficult to tweak the animation later, especially for complex characters." )) w.add_document(text=u( "Copy the current pose to create the next one: pose the character, key everything, then copy the keyframe in the playbar to another frame, and key everything at that frame." )) w.commit() target = [ "Set KEY frames on everything that's KEY-able", "Copy the current pose to create the next one: pose the character, KEY everything, then copy the keyframe in the playbar to another frame, and KEY everything at that frame", "KEYING everything gives quick, immediate results" ] with ix.searcher() as s: qp = qparser.QueryParser("text", ix.schema) q = qp.parse(u("key")) r = s.search(q, terms=True) r.fragmenter = highlight.SentenceFragmenter() r.formatter = highlight.UppercaseFormatter() assert sorted([hit.highlights("text", top=1) for hit in r]) == sorted(target)