def search(self, query_list, fields=None): with self.ix.searcher() as searcher: query_list2 = [] for qq in query_list: if qq=='AND' or qq=='OR': query_list2.append(qq) else: query_list2.append(qq.lower()) query_string = " ".join(query_list2) query = None if ":" in query_string: # If the user DOES specify a field, # setting the fields determines what fields # are searched with the free terms (no field) fields = ['title', 'content','owner_name','owner_email','github_user'] query = MultifieldParser(fields, schema=self.ix.schema) est = pytz.timezone('America/New_York') query.add_plugin(DateParserPlugin(free=True, basedate=est.localize(datetime.utcnow()))) query.add_plugin(GtLtPlugin()) try: query = query.parse(query_string) except: # Because the DateParser plugin is an idiot query_string2 = re.sub(r':(\w+)',':\'\g<1>\'',query_string) try: query = query.parse(query_string2) except: print("parsing query %s failed"%(query_string)) print("parsing query %s also failed"%(query_string2)) query = query.parse('') else: # If the user does not specify a field, # these are the fields that are actually searched fields = ['url','title', 'content','owner_name','owner_email','github_user'] query = MultifieldParser(fields, schema=self.ix.schema) est = pytz.timezone('America/New_York') query.add_plugin(DateParserPlugin(free=True, basedate=est.localize(datetime.utcnow()))) query.add_plugin(GtLtPlugin()) try: query = query.parse(query_string) except: print("parsing query %s failed"%(query_string)) query = query.parse('') parsed_query = "%s" % query print("query: %s" % parsed_query) results = searcher.search(query, terms=False, scored=True, groupedby="kind") search_result = self.create_search_result(results) return parsed_query, search_result
async def search(query_str, ctx): ix = open_dir("indexdir") parser = QueryParser("content", ix.schema) parser.add_plugin(qparser.FuzzyTermPlugin()) parser.add_plugin(GtLtPlugin()) parser.add_plugin(DateParserPlugin()) query = parser.parse(query_str) print(query) with ix.searcher(weighting=scoring.PL2) as searcher: results = searcher.search(query, limit=5) results.fragmenter = highlight.SentenceFragmenter() results.fragmenter.surround = 50 results.fragmenter.maxchars = 10000 results.formatter = DiscordBoldFormatter() embed = discord.Embed( title="Results", color=discord.Color(0x3cd63d), description="From search: **{}**".format(query_str)) for hit in results: # embed.add_field(name="[{}]({})".format(hit["title"], hit["url"]), value="{}".format(hit.highlights("content"))) embed.add_field(name="\u200b", value=f"[{hit['title']}]({hit['url']})\n" f"{hit.highlights('content', minscore=0)}", inline=False) await ctx.send(embed=embed)
def searchFe(busqueda): ix = open_dir("index") searcher = ix.searcher() date = "{" + busqueda + " to]" parser = QueryParser("fecha", ix.schema) parser.add_plugin(DateParserPlugin(free=True)) parser.add_plugin(GtLtPlugin()) myquery = parser.parse(date) results = searcher.search(myquery) return results
def __init__(self, fieldname): ''' Constructor ''' self.w_parser = SimpleParser(fieldname, None) self.w_parser.add_plugin(FieldsPlugin()) self.w_parser.add_plugin(OperatorsPlugin()) self.w_parser.add_plugin(PhrasePlugin()) self.w_parser.add_plugin(SingleQuotePlugin()) self.w_parser.add_plugin(GroupPlugin()) self.w_parser.add_plugin(PrefixPlugin()) self.w_parser.add_plugin(GtLtPlugin()) self.w_parser.add_plugin(RangePlugin()) self.query = None self.current_node_stack = []
def search_langs(repos, q, limit=1000, **kw): index_ = get_langs_index(repos) qp = QueryParser("ini", schema=index_.schema) qp.add_plugin(GtLtPlugin()) q = '{0} {1}'.format( q, ' '.join('{0}:"{1}"'.format(k, v) for k, v in kw.items())) def highlight(res): hl = res.highlights('ini', top=1) if hl: for line in hl.split('\n'): if '[[' in line: return line.strip() with index_.searcher() as searcher: results = searcher.search(qp.parse(q), limit=limit) results.formatter = BracketFormatter() return (len(results), [ Languoid(r['id'], r.get('iso'), r['name'], r['level'], r['fname'], highlight(r)) for r in results ])
def __init__(self, location): """ create a new redis store, the location given will be used to generate keys this keys will be combined to get/set instance config Args: location (Location) """ super().__init__(location, Serializer()) config = self.config_env.get_store_config("whoosh") self.base_index_path = config["path"] self.schema = self.get_schema() self.index = self.get_index(self.schema) self.default_plugins = [ FuzzyTermPlugin(), GtLtPlugin(), PhrasePlugin() ] self.default_pagenum = 1 self.default_pagelen = 20
def search(self,terms, limit=100, time_slice=None, saveAs="search"): big_tables = {} for i in CATS: big_tables[i]=[] f = open("./visualization/"+saveAs+"_results.html", "w+") master_str = "<!DOCTYPE html><html><style>hr {border: 4;width: 80%;}</style>"+ "<title>Search Results [term(s): "+terms+"]</title><body><br>" ix = index.open_dir("twitter_index", indexname="TWTTR") w = ix.writer() qp = QueryParser("content", schema=w.schema) qp.add_plugin(DateParserPlugin()) qp.add_plugin(GtLtPlugin()) q = qp.parse(terms) print("search terms", q) list_IDs =[] with w.searcher() as s: results = s.search(q, limit=limit) if time_slice != None: within = [] start = int("".join(time_slice[0].split(":"))) end = int("".join(time_slice[1].split(":"))) if (0<=start<=2400) and (0 <=end<=2400): for res in results: time = res["posted"] if time.minute < 10: t = int(str(time.hour)+"0"+ str(time.minute)) else: t = int(str(time.hour)+ str(time.minute)) if start < end and start <= t <=end: within.append(res) elif end < start and (start <= t or t <= end): within.append(res) else: pass results = within else: print("Invalid time slice, no results returned.") results = [] print("%d search results" % len(results)) print("--"*15) for res in results: list_IDs.append(int(res["tweet_id"])) self.to_nums(res["liwc"], big_tables) master_str += self.to_html(res, True) master_str += "</body></html>" try: f.write(master_str) f.close() except: f.write("Unicode parsing error") f.close() res_str = "<!DOCTYPE html><html><title>LIWC statistics for term(s):"+terms+"</title><body><br>" res_str += "<table><tr>"+("<th>Category </th><th>Average</th><th>Std Dev</th><th>Max </th><th>Min </th>"*3)+"</tr>" count = 0 for_later = {} for j in list(big_tables.keys()): vals = big_tables[j] #print j, vals outputs = [] if len(vals) != 0: avg = sum(vals)/float(len(vals)) outputs.append(round(avg,4)) var = [(i-avg)**2 for i in vals] std = math.sqrt(sum(var)/len(var)) outputs.append(round(std,4)) outputs.append(round(max(vals),4)) outputs.append(round(min(vals),4)) else: outputs = ["NA","NA","NA","NA"] if count%3 == 0: res_str+= "<tr>" res_str += "<td>"+str(j)+"</td><td>"+str(outputs[0])+"</td><td>"+str(outputs[1]) res_str += "</td><td>"+str(outputs[2])+"</td><td>"+str(outputs[3])+"</td>" count +=1 if count%3 == 0: res_str+= "</tr>" for_later[j] = outputs res_str+="</table>" if big_tables["WC"] == []: big_tables = "" res_str = "<!DOCTYPE html><html><title>LIWC statistics for term(s): " res_str += terms+"</title><body><br>" res_str += "<p>No matches found </p></body></html>" t = open("./visualization/"+ saveAs +"_averages.html", "w+") try: t.write(res_str) t.close() except: t.write("Unicode Error") t.close() self.graph_Tweets(results,saveAs) return res_str, for_later, master_str, list_IDs