Ejemplo n.º 1
0
    def run_search(cls, q, sort, solr_params, reverse, after, num):
        "returns pysolr.Results(docs=[fullname()],hits=int())"

        if reverse:
            sort = swap_strings(sort,'asc','desc')

        g.log.debug("Searching q = %s; params = %s" % (q,repr(solr_params)))

        with SolrConnection() as s:
            if after:
                # size of the pre-search to run in the case that we
                # need to search more than once. A bigger one can
                # reduce the number of searches that need to be run
                # twice, but if it's bigger than the default display
                # size, it could waste some
                PRESEARCH_SIZE = num

                # run a search and get back the number of hits, so
                # that we can re-run the search with that max_count.
                pre_search = s.search(q,sort,rows=PRESEARCH_SIZE,
                                      other_params = solr_params)

                if (PRESEARCH_SIZE >= pre_search.hits
                    or pre_search.hits == len(pre_search.docs)):
                    # don't run a second search if our pre-search
                    # found all of the elements anyway
                    search = pre_search
                else:
                    # we have to run a second search, but we can limit
                    # the duplicated transfer of the first few records
                    # since we already have those from the pre_search
                    second_search = s.search(q,sort,
                                             start=len(pre_search.docs),
                                             rows=pre_search.hits - len(pre_search.docs),
                                             other_params = solr_params)
                    search = pysolr.Results(pre_search.docs + second_search.docs,
                                            pre_search.hits)

                search.docs = [ i['fullname'] for i in search.docs ]
                search.docs = get_after(search.docs, after._fullname, num)
            else:
                search = s.search(q,sort,rows=num,
                                  other_params = solr_params)
                search.docs = [ i['fullname'] for i in search.docs ]

            return search
Ejemplo n.º 2
0
    def run_search_cached(q, sort, start, rows, other_params):
        with SolrConnection() as s:
            g.log.debug(
                ("Searching q = %r; sort = %r," + " start = %r, rows = %r," +
                 " params = %r") % (q, sort, start, rows, other_params))

            res = s.search(q,
                           sort,
                           start=start,
                           rows=rows,
                           other_params=other_params)

        # extract out the fullname in the 'docs' field, since that's
        # all we care about
        res = pysolr.Results(docs=[i['fullname'] for i in res.docs],
                             hits=res.hits)

        return res
Ejemplo n.º 3
0
    def run(self, after=None, num=1000, reverse=False, _update=False):
        if not self.q:
            return pysolr.Results([], 0)

        if not g.solr_url:
            raise SolrError("g.solr_url is not set")

        # there are two parts to our query: what the user typed
        # (parsed with Solr's DisMax parser), and what we are adding
        # to it. The latter is called the "boost" (and is parsed using
        # full Lucene syntax), and it can be added to via the `boost`
        # parameter
        boost = []

        if not self.spam:
            boost.append("-spam:true")
        if not self.deleted:
            boost.append("-deleted:true")

        if self.timerange:

            def time_to_searchstr(t):
                if isinstance(t, datetime):
                    t = t.strftime('%Y-%m-%dT%H:%M:%S.000Z')
                elif isinstance(t, date):
                    t = t.strftime('%Y-%m-%dT00:00:00.000Z')
                elif isinstance(t, str):
                    t = t
                return t

            (fromtime, totime) = self.timerange
            fromtime = time_to_searchstr(fromtime)
            totime = time_to_searchstr(totime)
            boost.append("+date:[%s TO %s]" % (fromtime, totime))

        if self.subreddits:

            def subreddit_to_searchstr(sr):
                if isinstance(sr, Subreddit):
                    return ('sr_id', '%d' % sr.id)
                elif isinstance(sr, str) or isinstance(sr, unicode):
                    return ('subreddit', sr)
                else:
                    return ('sr_id', '%d' % sr)

            s_subreddits = map(subreddit_to_searchstr, tup(self.subreddits))

            boost.append("+(%s)" % combine_searchterms(s_subreddits))

        if self.authors:

            def author_to_searchstr(a):
                if isinstance(a, Account):
                    return ('author_id', '%d' % a.id)
                elif isinstance(a, str) or isinstance(a, unicode):
                    return ('author', a)
                else:
                    return ('author_id', '%d' % a)

            s_authors = map(author_to_searchstr, tup(self.authors))

            boost.append('+(%s)^2' % combine_searchterms(s_authors))

        def type_to_searchstr(t):
            if isinstance(t, str):
                return ('type', t)
            else:
                return ('type', t.__name__.lower())

        s_types = map(type_to_searchstr, self.types)
        boost.append("+%s" % combine_searchterms(s_types))

        q, solr_params = self.solr_params(self.q, boost)

        search = self.run_search(q,
                                 self.sort,
                                 solr_params,
                                 reverse,
                                 after,
                                 num,
                                 _update=_update)
        return search