Example #1
0
    def _run_cached(cls, query, bq, sort="relevance", rank_expressions=None,
                    faceting=None, start=0, num=1000, _update=False):
        '''Query the cloudsearch API. _update parameter allows for supposed
        easy memoization at later date.
        
        Example result set:
        
        {u'facets': {u'reddit': {u'constraints':
                                    [{u'count': 114, u'value': u'politics'},
                                    {u'count': 42, u'value': u'atheism'},
                                    {u'count': 27, u'value': u'wtf'},
                                    {u'count': 19, u'value': u'gaming'},
                                    {u'count': 12, u'value': u'bestof'},
                                    {u'count': 12, u'value': u'tf2'},
                                    {u'count': 11, u'value': u'AdviceAnimals'},
                                    {u'count': 9, u'value': u'todayilearned'},
                                    {u'count': 9, u'value': u'pics'},
                                    {u'count': 9, u'value': u'funny'}]}},
         u'hits': {u'found': 399,
                   u'hit': [{u'id': u't3_11111'},
                            {u'id': u't3_22222'},
                            {u'id': u't3_33333'},
                            {u'id': u't3_44444'},
                            ...
                            ],
                   u'start': 0},
         u'info': {u'cpu-time-ms': 10,
                   u'messages': [{u'code': u'CS-InvalidFieldOrRankAliasInRankParameter',
                                  u'message': u"Unable to create score object for rank '-hot'",
                                  u'severity': u'warning'}],
                   u'rid': u'<hash>',
                   u'time-ms': 9},
                   u'match-expr': u"(label 'my query')",
                   u'rank': u'-text_relevance'}
        
        '''
        try:
            response = basic_query(query=query, bq=bq, size=num, start=start,
                                   rank=sort, rank_expressions=rank_expressions,
                                   search_api=cls.search_api,
                                   faceting=faceting, record_stats=True)
        except (SearchHTTPError, SearchError) as e:
            g.log.error("Search Error: %r", e)
            raise

        warnings = response['info'].get('messages', [])
        for warning in warnings:
            g.log.warning("%(code)s (%(severity)s): %(message)s" % warning)

        hits = response['hits']['found']
        docs = [doc['id'] for doc in response['hits']['hit']]
        facets = response.get('facets', {})
        for facet in facets.keys():
            values = facets[facet]['constraints']
            facets[facet] = values

        results = Results(docs, hits, facets)
        return results
Example #2
0
    def run(self, after=None, reverse=False, num=1000, _update=False):
        self.bq = u''
        results = self._run(_update=_update)

        docs, hits, facets = results.docs, results.hits, results._facets

        after_docs = r2utils.get_after(docs, after, num, reverse=reverse)

        self.results = Results(after_docs, hits, facets)
        return self.results
Example #3
0
    def _run(self, _update=False):
        '''Run the search against self.query'''
        try:
            self._parse()
        except InvalidQuery:
            return Results([], 0, {})

        if g.sqlprinting:
            g.log.info("%s", self)

        return self._run_cached(self.q, self.bq.encode('utf-8'), self.sort,
                                self.rank_expressions, self.faceting,
                                start=self.start, num=self.num, _update=_update)
Example #4
0
    def _run_cached(cls,
                    query,
                    bq,
                    sort="score",
                    faceting=None,
                    start=0,
                    num=1000,
                    _update=False):
        '''Query the solr HOST. _update parameter allows for supposed
        easy memoization at later date.
        
        Example result set:
        {
            u'responseHeader':{
                u'status':0,
                u'QTime':2,
                u'params':{
                    u'sort':u'activity desc',
                    u'defType':u'edismax',
                    u'q':u'coffee',
                    u'start':u'0',
                    u'wt':u'json',
                    u'size':u'1000'
                }
            },
            u'response':{
                u'start':0,
                u'numFound':1,
                u'docs':[
                    {
                        u'_version_':1496564637825499136,
                        u'type_id':5,
                        u'reddit':u'coffee',
                        u'fullname':u't5_3',
                        u'author':u'grandpa',
                        u'url':u'http://hamsandwich.com/sideoffries/?attachment_id=44',
                        u'num_comments':0,
                        u'downs':1,
                        u'title':u'013',
                        u'site':u"[u'reddit.com',u'hamsandwich.reddit.com']", 
                        u'author_s': u'grandpa', 
                        u'over18': False, 
                        u'timestamp': 1427180669, 
                        u'sr_id': 2, 
                        u'author_fullname': u't2_1', 
                        u'is_self': False, 
                        u'subreddit': u'coffee', 
                        u'ups': 0, u'id': u't5_3'}, 
                    {
                ]
            }
        }
        '''
        if not query:
            return Results([], 0, {})
        try:
            response = basic_query(query=query,
                                   bq=bq,
                                   size=num,
                                   start=start,
                                   rank=sort,
                                   search_api=cls.search_api,
                                   faceting=faceting,
                                   record_stats=True)
        except (SearchHTTPError, SearchError) as e:
            g.log.error("Search Error: %r", e)
            raise

        hits = response['response']['numFound']
        docs = [doc['id'] for doc in response['response']['docs']]
        facets = {}
        if hits and faceting:
            facet_fields = response['facet_counts'].get('facet_fields', {})
            for field in facet_fields:
                facets[field] = []
                while facet_fields[field]:
                    value = facet_fields[field].pop(0)
                    count = facet_fields[field].pop(0)
                    facets[field].append(dict(value=value, count=count))

        results = Results(docs, hits, facets)
        return results
Example #5
0
 def run(self, _update=False):
     results = self._run(_update=_update)
     self.results = Results(results.docs, results.hits, results._facets)
     return self.results