Beispiel #1
0
    def _search(self, search_string):
        """Really do the search."""
        search_string_norm = normalize_words(search_string)
        words = search_string_norm.split()
        results = list(self.index.search(words))

        # remove 3 dirs from link and add the proper base url
        for result in results:
            result.link = "wiki/{}".format(
                urllib.parse.quote(to3dirs.from_path(result.link), safe=()))

        return results
Beispiel #2
0
    def on_search_results(self, request, key):
        search_string_norm = normalize_keyword(key)
        words = search_string_norm.split()
        start = int(request.args.get("start", 0))
        quantity = int(request.args.get("quantity", config.SEARCH_RESULTS))
        id_ = self.searcher.start_search(words)
        results = self.searcher.get_results(id_, start, quantity)

        CLEAN = re.compile("[(),]")

        # group by link, giving priority to the title of the original articles
        grouped_results = {}
        for link, title, ptje, original, texto in results:
            # remove 3 dirs from link and add the proper base url
            link = "%s/%s" % (ARTICLES_BASE_URL, to3dirs.from_path(link))

            # convert tokens to lower case
            tit_tokens = set(CLEAN.sub("", x.lower()) for x in title.split())

            if link in grouped_results:
                (tit, prv_ptje, tokens, txt) = grouped_results[link]
                tokens.update(tit_tokens)
                if original:
                    # save the info of the original article
                    tit = title
                    txt = texto
                grouped_results[link] = (tit, prv_ptje + ptje, tokens, txt)
            else:
                grouped_results[link] = (title, ptje, tit_tokens, texto)

        # clean the tokens
        for link, (tit, ptje, tokens, texto) in grouped_results.iteritems():
            tit_tokens = set(CLEAN.sub("", x.lower()) for x in tit.split())
            tokens.difference_update(tit_tokens)

        # sort the results
        candidates = ((k, ) + tuple(v) for k, v in grouped_results.iteritems())
        sorted_results = sorted(candidates, key=operator.itemgetter(2),
                                reverse=True)

        return self.render_template('search.html',
            search_words=words,
            results=sorted_results,
            start=start,
            quantity=quantity
        )
Beispiel #3
0
    def on_search_results(self, request, key):
        search_string_norm = urllib.unquote_plus(normalize_keyword(key))
        words = search_string_norm.split()
        start = int(request.args.get("start", 0))
        quantity = int(request.args.get("quantity", config.SEARCH_RESULTS))
        id_ = self.searcher.start_search(words)
        results = self.searcher.get_results(id_, start, quantity)

        CLEAN = re.compile("[(),]")

        # group by link, giving priority to the title of the original articles
        grouped_results = {}
        for link, title, ptje, original, texto in results:
            # remove 3 dirs from link and add the proper base url
            link = "%s/%s" % (ARTICLES_BASE_URL, to3dirs.from_path(link))

            # convert tokens to lower case
            tit_tokens = set(CLEAN.sub("", x.lower()) for x in title.split())

            if link in grouped_results:
                (tit, prv_ptje, tokens, txt) = grouped_results[link]
                tokens.update(tit_tokens)
                if original:
                    # save the info of the original article
                    tit = title
                    txt = texto
                grouped_results[link] = (tit, prv_ptje + ptje, tokens, txt)
            else:
                grouped_results[link] = (title, ptje, tit_tokens, texto)

        # clean the tokens
        for link, (tit, ptje, tokens, texto) in grouped_results.iteritems():
            tit_tokens = set(CLEAN.sub("", x.lower()) for x in tit.split())
            tokens.difference_update(tit_tokens)

        # sort the results
        candidates = ((k, ) + tuple(v) for k, v in grouped_results.iteritems())
        sorted_results = sorted(candidates,
                                key=operator.itemgetter(2),
                                reverse=True)

        return self.render_template('search.html',
                                    search_words=words,
                                    results=sorted_results,
                                    start=start,
                                    quantity=quantity)
Beispiel #4
0
 def on_random(self, request):
     """Redirect to a random article."""
     idx_entry = self.index.get_random()
     link = "%s/%s" % (ARTICLES_BASE_URL, to3dirs.from_path(idx_entry.link))
     return redirect(urllib.parse.quote(link.encode("utf-8")))
Beispiel #5
0
 def test_from_path(self):
     self.assertEqual(from_path(_to_complete_path(u"unnombre")) , u"unnombre")
     self.assertEqual(from_path(_to_complete_path(u"/s")) , u"/s")
     self.assertEqual(from_path(_to_complete_path(u"s/s/s/")) , u"s/s/s/")
     self.assertEqual(from_path(_to_complete_path(u"s/s/s/SLASH")) , u"s/s/s//")
Beispiel #6
0
 def on_al_azar(self, request):
     link, tit = self.index.get_random()
     link = "%s/%s" % (ARTICLES_BASE_URL, to3dirs.from_path(link))
     return redirect(urllib.quote(link.encode("utf-8")))
Beispiel #7
0
 def on_al_azar(self, request):
     link, tit = self.index.get_random()
     link = "%s/%s" % (ARTICLES_BASE_URL, to3dirs.from_path(link))
     return redirect(urllib.quote(link.encode("utf-8")))