def run_search(cls, q, sort, solr_params, reverse, after, num): "returns pysolr.Results(docs=[fullname()],hits=int())" if reverse: sort = swap_strings(sort,'asc','desc') g.log.debug("Searching q = %s; params = %s" % (q,repr(solr_params))) with SolrConnection() as s: if after: # size of the pre-search to run in the case that we # need to search more than once. A bigger one can # reduce the number of searches that need to be run # twice, but if it's bigger than the default display # size, it could waste some PRESEARCH_SIZE = num # run a search and get back the number of hits, so # that we can re-run the search with that max_count. pre_search = s.search(q,sort,rows=PRESEARCH_SIZE, other_params = solr_params) if (PRESEARCH_SIZE >= pre_search.hits or pre_search.hits == len(pre_search.docs)): # don't run a second search if our pre-search # found all of the elements anyway search = pre_search else: # we have to run a second search, but we can limit # the duplicated transfer of the first few records # since we already have those from the pre_search second_search = s.search(q,sort, start=len(pre_search.docs), rows=pre_search.hits - len(pre_search.docs), other_params = solr_params) search = pysolr.Results(pre_search.docs + second_search.docs, pre_search.hits) search.docs = [ i['fullname'] for i in search.docs ] search.docs = get_after(search.docs, after._fullname, num) else: search = s.search(q,sort,rows=num, other_params = solr_params) search.docs = [ i['fullname'] for i in search.docs ] return search
def run_search_cached(q, sort, start, rows, other_params): with SolrConnection() as s: g.log.debug( ("Searching q = %r; sort = %r," + " start = %r, rows = %r," + " params = %r") % (q, sort, start, rows, other_params)) res = s.search(q, sort, start=start, rows=rows, other_params=other_params) # extract out the fullname in the 'docs' field, since that's # all we care about res = pysolr.Results(docs=[i['fullname'] for i in res.docs], hits=res.hits) return res
def run(self, after=None, num=1000, reverse=False, _update=False): if not self.q: return pysolr.Results([], 0) if not g.solr_url: raise SolrError("g.solr_url is not set") # there are two parts to our query: what the user typed # (parsed with Solr's DisMax parser), and what we are adding # to it. The latter is called the "boost" (and is parsed using # full Lucene syntax), and it can be added to via the `boost` # parameter boost = [] if not self.spam: boost.append("-spam:true") if not self.deleted: boost.append("-deleted:true") if self.timerange: def time_to_searchstr(t): if isinstance(t, datetime): t = t.strftime('%Y-%m-%dT%H:%M:%S.000Z') elif isinstance(t, date): t = t.strftime('%Y-%m-%dT00:00:00.000Z') elif isinstance(t, str): t = t return t (fromtime, totime) = self.timerange fromtime = time_to_searchstr(fromtime) totime = time_to_searchstr(totime) boost.append("+date:[%s TO %s]" % (fromtime, totime)) if self.subreddits: def subreddit_to_searchstr(sr): if isinstance(sr, Subreddit): return ('sr_id', '%d' % sr.id) elif isinstance(sr, str) or isinstance(sr, unicode): return ('subreddit', sr) else: return ('sr_id', '%d' % sr) s_subreddits = map(subreddit_to_searchstr, tup(self.subreddits)) boost.append("+(%s)" % combine_searchterms(s_subreddits)) if self.authors: def author_to_searchstr(a): if isinstance(a, Account): return ('author_id', '%d' % a.id) elif isinstance(a, str) or isinstance(a, unicode): return ('author', a) else: return ('author_id', '%d' % a) s_authors = map(author_to_searchstr, tup(self.authors)) boost.append('+(%s)^2' % combine_searchterms(s_authors)) def type_to_searchstr(t): if isinstance(t, str): return ('type', t) else: return ('type', t.__name__.lower()) s_types = map(type_to_searchstr, self.types) boost.append("+%s" % combine_searchterms(s_types)) q, solr_params = self.solr_params(self.q, boost) search = self.run_search(q, self.sort, solr_params, reverse, after, num, _update=_update) return search