Esempio n. 1
0
    def __getitem__(self, key):
        start = 0
        if isinstance(key, slice):
            start = key.start or 0
            stop = key.stop or len(self)
            if key.step:
                raise KeyError('step of %s is not permitted' % key.step)
        elif isinstance(key, int):
            start = key
            stop = key + 1

        if self.es_search:
            search = dict(self.es_search)
            search['sort'] = [{self.sort: 'desc'}, 'bill_id']
            search['from'] = start
            search['size'] = stop - start
            es_result = elasticsearch.search(search,
                                             index='billy',
                                             doc_type='bills')
            _mongo_query = {
                '_id': {
                    '$in': [r['_id'] for r in es_result['hits']['hits']]
                }
            }
            return db.bills.find(_mongo_query, fields=self.fields).sort([
                (self.sort, pymongo.DESCENDING), ('bill_id', pymongo.ASCENDING)
            ])
        else:
            return db.bills.find(self.mongo_query, fields=self.fields).sort([
                (self.sort, pymongo.DESCENDING)
            ]).skip(start).limit(stop - start)
Esempio n. 2
0
    def __getitem__(self, key):
        start = 0
        if isinstance(key, slice):
            start = key.start or 0
            stop = key.stop or len(self)
            if key.step:
                raise KeyError('step of %s is not permitted' % key.step)
        elif isinstance(key, int):
            start = key
            stop = key + 1

        if self.es_search:
            search = dict(self.es_search)
            search['sort'] = [{self.sort: 'desc'}, 'bill_id']
            search['from'] = start
            search['size'] = stop - start
            es_result = elasticsearch.search(search,
                                             index='billy', doc_type='bills')
            _mongo_query = {'_id': {'$in': [r['_id'] for r in
                                            es_result['hits']['hits']]}}
            return db.bills.find(_mongo_query, fields=self.fields).sort(
                [(self.sort, pymongo.DESCENDING),
                 ('bill_id', pymongo.ASCENDING)])
        else:
            return db.bills.find(self.mongo_query, fields=self.fields).sort(
                [(self.sort, pymongo.DESCENDING)]
            ).skip(start).limit(stop - start)
Esempio n. 3
0
    def search(query=None, abbr=None, chamber=None, subjects=None,
               bill_id=None, bill_id__in=None, search_window=None,
               updated_since=None, sponsor_id=None, bill_fields=None,
               status=None, type_=None, session=None):
        _filter = {}
        for key, value in [(settings.LEVEL_FIELD, abbr),
                           ('chamber', chamber),
                           ('subjects', subjects),
                           ('bill_id', bill_id),
                          ]:
            if value is not None:
                _filter[key] = value

        if search_window:
            if search_window == 'session':
                _filter['_current_session'] = True
            elif search_window == 'term':
                _filter['_current_term'] = True
            elif search_window.startswith('session:'):
                _filter['session'] = search_window.split('session:')[1]
            elif search_window.startswith('term:'):
                _filter['_term'] = search_window.split('term:')[1]
            elif search_window == 'all':
                pass
            else:
                raise ValueError('invalid search_window. valid choices are '
                                 ' "term", "session", "all"')
        if updated_since:
            try:
                _filter['updated_at'] = {'$gte': parse_param_dt(updated_since)}
            except ValueError:
                raise ValueError('invalid updated_since parameter. '
                                 'please supply date in YYYY-MM-DD format')
        if sponsor_id:
            _filter['sponsors.leg_id'] = sponsor_id

        if status:
            # Status is slightly different: it's a dict like--
            # {'action_dates.signed': {'$ne': None}}
            _filter.update(**status)

        if type_:
            _filter['type'] = type_

        if session:
            _filter['session'] = session

        # process full-text query
        if query and settings.ENABLE_ELASTICSEARCH:
            # block spammers, possibly move to a BANNED_SEARCH_LIST setting
            if '<a href' in query:
                return db.bills.find({settings.LEVEL_FIELD: None})

            # if query is numeric convert to an id filter
            if re.findall('\d+', query):
                _id_filter = dict(_filter)

                # if query is entirely numeric make it a regex
                if not re.findall('\D', query):
                    _id_filter['bill_id'] = {'$regex':
                                             fix_bill_id(query).upper()}
                else:
                    _id_filter['bill_id'] = fix_bill_id(query).upper()

                # check for a result
                result = db.bills.find(_id_filter, fields=bill_fields)
                if result.count():
                    return result

            query = {"query_string": {"fields": ["text", "title"],
                                      "default_operator": "AND",
                                      "query": query}}
            search = pyes.Search(query, fields=[])

            # take terms from mongo query
            es_terms = []
            if settings.LEVEL_FIELD in _filter:
                es_terms.append(pyes.TermFilter(
                    settings.LEVEL_FIELD, _filter.pop(settings.LEVEL_FIELD)))
            if 'session' in _filter:
                es_terms.append(pyes.TermFilter('session',
                                                _filter.pop('session')))
            if 'chamber' in _filter:
                es_terms.append(pyes.TermFilter('chamber',
                                                _filter.pop('chamber')))
            if 'subjects' in _filter:
                es_terms.append(pyes.TermFilter(
                    'subjects', _filter.pop('subjects')['$all']))
            if 'sponsors.leg_id' in _filter:
                es_terms.append(pyes.TermFilter(
                    'sponsors', _filter.pop('sponsors.leg_id')))

            # add terms
            if es_terms:
                search.filter = pyes.ANDFilter(es_terms)

            # page size is a guess, could use tweaks
            es_result = elasticsearch.search(search, search_type='scan',
                                             scroll='3m', size=250)
            doc_ids = [r.get_id() for r in es_result]
            _filter['versions.doc_id'] = {'$in': doc_ids}
        elif query:
            _filter['title'] = {'$regex': query, '$options': 'i'}

        # return query
        return db.bills.find(_filter, fields=bill_fields)
Esempio n. 4
0
    def search(query=None,
               abbr=None,
               chamber=None,
               subjects=None,
               bill_id=None,
               bill_id__in=None,
               search_window=None,
               updated_since=None,
               sponsor_id=None,
               bill_fields=None,
               status=None,
               type_=None,
               session=None):
        _filter = {}
        for key, value in [
            (settings.LEVEL_FIELD, abbr),
            ('chamber', chamber),
            ('subjects', subjects),
            ('bill_id', bill_id),
        ]:
            if value is not None:
                _filter[key] = value

        if search_window:
            if search_window == 'session':
                _filter['_current_session'] = True
            elif search_window == 'term':
                _filter['_current_term'] = True
            elif search_window.startswith('session:'):
                _filter['session'] = search_window.split('session:')[1]
            elif search_window.startswith('term:'):
                _filter['_term'] = search_window.split('term:')[1]
            elif search_window == 'all':
                pass
            else:
                raise ValueError('invalid search_window. valid choices are '
                                 ' "term", "session", "all"')
        if updated_since:
            try:
                _filter['updated_at'] = {'$gte': parse_param_dt(updated_since)}
            except ValueError:
                raise ValueError('invalid updated_since parameter. '
                                 'please supply date in YYYY-MM-DD format')
        if sponsor_id:
            _filter['sponsors.leg_id'] = sponsor_id

        if status:
            # Status is slightly different: it's a dict like--
            # {'action_dates.signed': {'$ne': None}}
            _filter.update(**status)

        if type_:
            _filter['type'] = type_

        if session:
            _filter['session'] = session

        # process full-text query
        if query and settings.ENABLE_ELASTICSEARCH:
            # block spammers, possibly move to a BANNED_SEARCH_LIST setting
            if '<a href' in query:
                return db.bills.find({settings.LEVEL_FIELD: None})

            if re.findall('\d+', query):
                _id_filter = dict(_filter)
                _id_filter['bill_id'] = fix_bill_id(query).upper()
                result = db.bills.find(_id_filter)
                if result:
                    return result

            query = {
                "query_string": {
                    "fields": ["text", "title"],
                    "default_operator": "AND",
                    "query": query
                }
            }
            search = pyes.Search(query, fields=[])

            # take terms from mongo query
            es_terms = []
            if settings.LEVEL_FIELD in _filter:
                es_terms.append(
                    pyes.TermFilter(settings.LEVEL_FIELD,
                                    _filter.pop(settings.LEVEL_FIELD)))
            if 'session' in _filter:
                es_terms.append(
                    pyes.TermFilter('session', _filter.pop('session')))
            if 'chamber' in _filter:
                es_terms.append(
                    pyes.TermFilter('chamber', _filter.pop('chamber')))
            if 'subjects' in _filter:
                es_terms.append(
                    pyes.TermFilter('subjects',
                                    _filter.pop('subjects')['$all']))
            if 'sponsors.leg_id' in _filter:
                es_terms.append(
                    pyes.TermFilter('sponsors',
                                    _filter.pop('sponsors.leg_id')))

            # add terms
            if es_terms:
                search.filter = pyes.ANDFilter(es_terms)

            # page size is a guess, could use tweaks
            es_result = elasticsearch.search(search,
                                             search_type='scan',
                                             scroll='3m',
                                             size=250)
            doc_ids = [r.get_id() for r in es_result]
            _filter['versions.doc_id'] = {'$in': doc_ids}
        elif query:
            _filter['title'] = {'$regex': query, '$options': 'i'}

        # return query
        return db.bills.find(_filter, bill_fields)