Ejemplo n.º 1
0
def get_one_document(idx, by_id=None):
    with idx.searcher() as search:
        q = QueryParser('uuid', idx.schema).parse(by_id)
        results = search.search(q)
        related = results[0].more_like_this('keywords', top=3, numterms=10)
        if len(related.top_n) == 0:
            # if we can't find anything with related keywords, we want
            # to extend our search into the content with lots of possible terms.
            # the goal is to have 3 related articles...
            related = results[0].more_like_this('content', top=3, numterms=80)
        return clean_results(idx, results), clean_results(idx, related)
Ejemplo n.º 2
0
def generic(idx, qs=None, q=None, limit=5, parser=None, page=1):
    if qs is q is None:
        raise ValueError('cannot have a null querystring and query')

    if parser is None:
        parser = MultifieldParser(
                ['title', 'keywords', 'summary', 'content', 'author'], idx.schema, group=OrGroup)

    # add better date parsing support
    parser.add_plugin(DateParserPlugin())
    parser.remove_plugin_class(WildcardPlugin)

    with idx.searcher() as search:
        # generate the Query object
        if qs:
            query = parser.parse(qs)
        else:
            query = q

        facet = MultiFacet()
        facet.add_score()
        facet.add_field('modified', reverse=True)
        facet.add_field('title')

        results = search.search_page(query, pagenum=page, sortedby=facet, pagelen=limit)
        res = clean_results(idx, results, query)

        # pagination attributes on `search_page` method
        res.page_number = results.pagenum   # current page number
        res.page_total = results.pagecount  # total pages in results
        res.offset = results.offset         # first result of current page
        res.pagelen = results.pagelen       # the number of max results per page

    return res
Ejemplo n.º 3
0
    def get(self, old_slug):
        # There are still cached sites from google hitting
        # the server, that don't match the new routing scheme.
        #
        # To compensate for this, the "OldBlogHandler" will
        # parse the previous slug format, and do a broad generic
        # query with the keywords (and title). Since old content
        # is basically being ported 1-to-1, if we have ANY search
        # results, we want to return that page via permanent redirect.
        pieces = list(map(str.strip, old_slug.split('-')))

        idx = self.meta.search_index

        with idx.searcher() as searcher:
            q = QueryParser('title', idx.schema).parse(' OR '.join(pieces))
            results = searcher.search(q, limit=1)
            results = clean_results(idx, results)

        if results.get('count', 0) > 0:
            # rebuild the url, and redirect PERMANENTLY
            # to the correct endpoing
            reslug = document_slug(results.results[0])
            self.redirect(reslug, permanent=True)

        else:
            # we couldn't find a "missing" old article
            # given the slug in the new data store, we need to
            # return some kind of "Missing" page.
            self.write('That content appears to be missing.')
Ejemplo n.º 4
0
def get_all_documents(idx, limit=None):
    with idx.searcher() as search:
        results = search.search(Every(), limit=limit)
        return clean_results(idx, results)