Beispiel #1
0
 def on_search(self, request):
     if request.method == "GET":
         return self.render_template('search.html')
     elif request.method == "POST":
         search_string = request.form.get("keywords", None)
         if search_string:
             search_string_norm = normalize_keyword(search_string)
             words = search_string_norm.split()
             id_ = self.searcher.start_search(words)
             return redirect("/search/%s" % "+".join(words))
         return redirect("/")
Beispiel #2
0
 def on_search(self, request):
     if request.method == "GET":
         return self.render_template('search.html')
     elif request.method == "POST":
         search_string = request.form.get("keywords", None)
         search_string = urllib.unquote_plus(search_string)
         if search_string:
             search_string_norm = normalize_keyword(search_string)
             words = search_string_norm.split()
             self.searcher.start_search(words)
             return redirect("/search/%s" % "+".join(words))
         return redirect("/")
Beispiel #3
0
    def on_search_results(self, request, key):
        search_string_norm = normalize_keyword(key)
        words = search_string_norm.split()
        start = int(request.args.get("start", 0))
        quantity = int(request.args.get("quantity", config.SEARCH_RESULTS))
        id_ = self.searcher.start_search(words)
        results = self.searcher.get_results(id_, start, quantity)

        CLEAN = re.compile("[(),]")

        # group by link, giving priority to the title of the original articles
        grouped_results = {}
        for link, title, ptje, original, texto in results:
            # remove 3 dirs from link and add the proper base url
            link = "%s/%s" % (ARTICLES_BASE_URL, to3dirs.from_path(link))

            # convert tokens to lower case
            tit_tokens = set(CLEAN.sub("", x.lower()) for x in title.split())

            if link in grouped_results:
                (tit, prv_ptje, tokens, txt) = grouped_results[link]
                tokens.update(tit_tokens)
                if original:
                    # save the info of the original article
                    tit = title
                    txt = texto
                grouped_results[link] = (tit, prv_ptje + ptje, tokens, txt)
            else:
                grouped_results[link] = (title, ptje, tit_tokens, texto)

        # clean the tokens
        for link, (tit, ptje, tokens, texto) in grouped_results.iteritems():
            tit_tokens = set(CLEAN.sub("", x.lower()) for x in tit.split())
            tokens.difference_update(tit_tokens)

        # sort the results
        candidates = ((k, ) + tuple(v) for k, v in grouped_results.iteritems())
        sorted_results = sorted(candidates, key=operator.itemgetter(2),
                                reverse=True)

        return self.render_template('search.html',
            search_words=words,
            results=sorted_results,
            start=start,
            quantity=quantity
        )
Beispiel #4
0
    def on_search_results(self, request, key):
        search_string_norm = urllib.unquote_plus(normalize_keyword(key))
        words = search_string_norm.split()
        start = int(request.args.get("start", 0))
        quantity = int(request.args.get("quantity", config.SEARCH_RESULTS))
        id_ = self.searcher.start_search(words)
        results = self.searcher.get_results(id_, start, quantity)

        CLEAN = re.compile("[(),]")

        # group by link, giving priority to the title of the original articles
        grouped_results = {}
        for link, title, ptje, original, texto in results:
            # remove 3 dirs from link and add the proper base url
            link = "%s/%s" % (ARTICLES_BASE_URL, to3dirs.from_path(link))

            # convert tokens to lower case
            tit_tokens = set(CLEAN.sub("", x.lower()) for x in title.split())

            if link in grouped_results:
                (tit, prv_ptje, tokens, txt) = grouped_results[link]
                tokens.update(tit_tokens)
                if original:
                    # save the info of the original article
                    tit = title
                    txt = texto
                grouped_results[link] = (tit, prv_ptje + ptje, tokens, txt)
            else:
                grouped_results[link] = (title, ptje, tit_tokens, texto)

        # clean the tokens
        for link, (tit, ptje, tokens, texto) in grouped_results.iteritems():
            tit_tokens = set(CLEAN.sub("", x.lower()) for x in tit.split())
            tokens.difference_update(tit_tokens)

        # sort the results
        candidates = ((k, ) + tuple(v) for k, v in grouped_results.iteritems())
        sorted_results = sorted(candidates,
                                key=operator.itemgetter(2),
                                reverse=True)

        return self.render_template('search.html',
                                    search_words=words,
                                    results=sorted_results,
                                    start=start,
                                    quantity=quantity)