def on_search(self, request): if request.method == "GET": return self.render_template('search.html') elif request.method == "POST": search_string = request.form.get("keywords", None) if search_string: search_string_norm = normalize_keyword(search_string) words = search_string_norm.split() id_ = self.searcher.start_search(words) return redirect("/search/%s" % "+".join(words)) return redirect("/")
def on_search(self, request): if request.method == "GET": return self.render_template('search.html') elif request.method == "POST": search_string = request.form.get("keywords", None) search_string = urllib.unquote_plus(search_string) if search_string: search_string_norm = normalize_keyword(search_string) words = search_string_norm.split() self.searcher.start_search(words) return redirect("/search/%s" % "+".join(words)) return redirect("/")
def on_search_results(self, request, key): search_string_norm = normalize_keyword(key) words = search_string_norm.split() start = int(request.args.get("start", 0)) quantity = int(request.args.get("quantity", config.SEARCH_RESULTS)) id_ = self.searcher.start_search(words) results = self.searcher.get_results(id_, start, quantity) CLEAN = re.compile("[(),]") # group by link, giving priority to the title of the original articles grouped_results = {} for link, title, ptje, original, texto in results: # remove 3 dirs from link and add the proper base url link = "%s/%s" % (ARTICLES_BASE_URL, to3dirs.from_path(link)) # convert tokens to lower case tit_tokens = set(CLEAN.sub("", x.lower()) for x in title.split()) if link in grouped_results: (tit, prv_ptje, tokens, txt) = grouped_results[link] tokens.update(tit_tokens) if original: # save the info of the original article tit = title txt = texto grouped_results[link] = (tit, prv_ptje + ptje, tokens, txt) else: grouped_results[link] = (title, ptje, tit_tokens, texto) # clean the tokens for link, (tit, ptje, tokens, texto) in grouped_results.iteritems(): tit_tokens = set(CLEAN.sub("", x.lower()) for x in tit.split()) tokens.difference_update(tit_tokens) # sort the results candidates = ((k, ) + tuple(v) for k, v in grouped_results.iteritems()) sorted_results = sorted(candidates, key=operator.itemgetter(2), reverse=True) return self.render_template('search.html', search_words=words, results=sorted_results, start=start, quantity=quantity )
def on_search_results(self, request, key): search_string_norm = urllib.unquote_plus(normalize_keyword(key)) words = search_string_norm.split() start = int(request.args.get("start", 0)) quantity = int(request.args.get("quantity", config.SEARCH_RESULTS)) id_ = self.searcher.start_search(words) results = self.searcher.get_results(id_, start, quantity) CLEAN = re.compile("[(),]") # group by link, giving priority to the title of the original articles grouped_results = {} for link, title, ptje, original, texto in results: # remove 3 dirs from link and add the proper base url link = "%s/%s" % (ARTICLES_BASE_URL, to3dirs.from_path(link)) # convert tokens to lower case tit_tokens = set(CLEAN.sub("", x.lower()) for x in title.split()) if link in grouped_results: (tit, prv_ptje, tokens, txt) = grouped_results[link] tokens.update(tit_tokens) if original: # save the info of the original article tit = title txt = texto grouped_results[link] = (tit, prv_ptje + ptje, tokens, txt) else: grouped_results[link] = (title, ptje, tit_tokens, texto) # clean the tokens for link, (tit, ptje, tokens, texto) in grouped_results.iteritems(): tit_tokens = set(CLEAN.sub("", x.lower()) for x in tit.split()) tokens.difference_update(tit_tokens) # sort the results candidates = ((k, ) + tuple(v) for k, v in grouped_results.iteritems()) sorted_results = sorted(candidates, key=operator.itemgetter(2), reverse=True) return self.render_template('search.html', search_words=words, results=sorted_results, start=start, quantity=quantity)