Beispiel #1
0
    def read(self, request, _id=None, events=[]):
        if events:
            return events

        if id:
            return db.events.find_one({'_id': _id})

        spec = {}

        for key in (settings.LEVEL_FIELD, 'type'):
            value = request.GET.get(key)
            if not value:
                continue

            split = value.split(',')

            if len(split) == 1:
                spec[key] = value
            else:
                spec[key] = {'$in': split}

        invalid_date = False

        if 'dtstart' in request.GET:
            try:
                spec['when'] = {'$gte': parse_param_dt(request.GET['dtstart'])}
            except ValueError:
                invalid_date = True
        else:
            # By default, go back 7 days
            now = datetime.datetime.now()
            before = now - datetime.timedelta(7)
            spec['when'] = {'$gte': before}

        if 'dtend' in request.GET:
            try:
                spec['when']['$lte'] = parse_param_dt(request.GET['dtend'])
            except ValueError:
                invalid_date = True

        if invalid_date:
            resp = rc.BAD_REQUEST
            resp.write(": invalid updated_since parameter."
                       " Please supply a date in YYYY-MM-DD format.")
            return resp

        return list(db.events.find(spec, fields=_build_field_list(request)
                                   ).sort('when',
                                          pymongo.ASCENDING).limit(1000)
                    )
Beispiel #2
0
    def read(self, request, id=None, events=[]):
        if events:
            return events

        if id:
            return db.events.find_one({'_id': id})

        spec = {}

        for key in (settings.LEVEL_FIELD, 'type'):
            value = request.GET.get(key)
            if not value:
                continue

            split = value.split(',')

            if len(split) == 1:
                spec[key] = value
            else:
                spec[key] = {'$in': split}

        invalid_date = False

        if 'dtstart' in request.GET:
            try:
                spec['when'] = {'$gte': parse_param_dt(request.GET['dtstart'])}
            except ValueError:
                invalid_date = True
        else:
            # By default, go back 7 days
            now = datetime.datetime.now()
            before = now - datetime.timedelta(7)
            spec['when'] = {'$gte': before}

        if 'dtend' in request.GET:
            try:
                spec['when']['$lte'] = parse_param_dt(request.GET['dtend'])
            except ValueError:
                invalid_date = True

        if invalid_date:
            resp = rc.BAD_REQUEST
            resp.write("invalid updated_since parameter."
                       " Please supply a date in YYYY-MM-DD format.")
            return resp

        return list(db.events.find(spec, fields=_build_field_list(request)
                                  ).sort('when', pymongo.ASCENDING).limit(1000)
                   )
Beispiel #3
0
    def search(query=None, abbr=None, chamber=None, subjects=None,
               bill_id=None, bill_id__in=None, search_window=None,
               updated_since=None, sponsor_id=None, bill_fields=None,
               status=None, type_=None, session=None):
        _filter = {}
        for key, value in [(settings.LEVEL_FIELD, abbr),
                           ('chamber', chamber),
                           ('subjects', subjects),
                           ('bill_id', bill_id),
                          ]:
            if value is not None:
                _filter[key] = value

        if search_window:
            if search_window == 'session':
                _filter['_current_session'] = True
            elif search_window == 'term':
                _filter['_current_term'] = True
            elif search_window.startswith('session:'):
                _filter['session'] = search_window.split('session:')[1]
            elif search_window.startswith('term:'):
                _filter['_term'] = search_window.split('term:')[1]
            elif search_window == 'all':
                pass
            else:
                raise ValueError('invalid search_window. valid choices are '
                                 ' "term", "session", "all"')
        if updated_since:
            try:
                _filter['updated_at'] = {'$gte': parse_param_dt(updated_since)}
            except ValueError:
                raise ValueError('invalid updated_since parameter. '
                                 'please supply date in YYYY-MM-DD format')
        if sponsor_id:
            _filter['sponsors.leg_id'] = sponsor_id

        if status:
            # Status is slightly different: it's a dict like--
            # {'action_dates.signed': {'$ne': None}}
            _filter.update(**status)

        if type_:
            _filter['type'] = type_

        if session:
            _filter['session'] = session

        # process full-text query
        if query and settings.ENABLE_ELASTICSEARCH:
            # block spammers, possibly move to a BANNED_SEARCH_LIST setting
            if '<a href' in query:
                return db.bills.find({settings.LEVEL_FIELD: None})

            # if query is numeric convert to an id filter
            if re.findall('\d+', query):
                _id_filter = dict(_filter)

                # if query is entirely numeric make it a regex
                if not re.findall('\D', query):
                    _id_filter['bill_id'] = {'$regex':
                                             fix_bill_id(query).upper()}
                else:
                    _id_filter['bill_id'] = fix_bill_id(query).upper()

                # check for a result
                result = db.bills.find(_id_filter, fields=bill_fields)
                if result.count():
                    return result

            query = {"query_string": {"fields": ["text", "title"],
                                      "default_operator": "AND",
                                      "query": query}}
            search = pyes.Search(query, fields=[])

            # take terms from mongo query
            es_terms = []
            if settings.LEVEL_FIELD in _filter:
                es_terms.append(pyes.TermFilter(
                    settings.LEVEL_FIELD, _filter.pop(settings.LEVEL_FIELD)))
            if 'session' in _filter:
                es_terms.append(pyes.TermFilter('session',
                                                _filter.pop('session')))
            if 'chamber' in _filter:
                es_terms.append(pyes.TermFilter('chamber',
                                                _filter.pop('chamber')))
            if 'subjects' in _filter:
                es_terms.append(pyes.TermFilter(
                    'subjects', _filter.pop('subjects')['$all']))
            if 'sponsors.leg_id' in _filter:
                es_terms.append(pyes.TermFilter(
                    'sponsors', _filter.pop('sponsors.leg_id')))

            # add terms
            if es_terms:
                search.filter = pyes.ANDFilter(es_terms)

            # page size is a guess, could use tweaks
            es_result = elasticsearch.search(search, search_type='scan',
                                             scroll='3m', size=250)
            doc_ids = [r.get_id() for r in es_result]
            _filter['versions.doc_id'] = {'$in': doc_ids}
        elif query:
            _filter['title'] = {'$regex': query, '$options': 'i'}

        # return query
        return db.bills.find(_filter, fields=bill_fields)
Beispiel #4
0
    def search(query=None,
               abbr=None,
               chamber=None,
               subjects=None,
               bill_id=None,
               search_window=None,
               updated_since=None,
               last_action_since=None,
               sponsor_id=None,
               status=None,
               type_=None,
               session=None,
               bill_fields=None,
               sort=None,
               limit=None):

        use_elasticsearch = False
        numeric_query = False
        mongo_filter = {}
        es_terms = []

        if status is None:
            status = []

        if query:
            use_elasticsearch = settings.ENABLE_ELASTICSEARCH

            # spammers get a 400
            if '<a href' in query:
                raise PermissionDenied('html detected')

            # if query is numeric convert to an id filter
            #   (TODO: maybe this should be an $or)
            if re.findall('\d+', query):
                # if query is entirely numeric make it a regex and hit mongo
                if not re.findall('\D', query):
                    mongo_filter['bill_id'] = {
                        '$regex': fix_bill_id(query).upper()
                    }
                else:
                    mongo_filter['bill_id'] = fix_bill_id(query).upper()
                use_elasticsearch = False
                numeric_query = True

        # handle abbr
        if abbr and use_elasticsearch:
            es_terms.append({'term': {'jurisdiction': abbr}})
        elif abbr:
            mongo_filter[settings.LEVEL_FIELD] = abbr

        # sponsor_id
        if sponsor_id and use_elasticsearch:
            es_terms.append({'term': {'sponsor_ids': sponsor_id}})
        elif sponsor_id:
            mongo_filter['sponsors.leg_id'] = sponsor_id

        # handle simple term arguments (chamber, bill_id, type, session)
        if isinstance(bill_id, list) and not use_elasticsearch:
            bill_id = {'$in': bill_id}
        simple_args = {
            'chamber': chamber,
            'bill_id': bill_id,
            'type': type_,
            'session': session
        }
        if search_window:
            if search_window == 'session':
                simple_args['_current_session'] = True
            elif search_window == 'term':
                simple_args['_current_term'] = True
            elif search_window.startswith('session:'):
                simple_args['session'] = search_window.split('session:')[1]
            elif search_window.startswith('term:'):
                simple_args['_term'] = search_window.split('term:')[1]
            elif search_window != 'all':
                raise ValueError('invalid search_window. valid choices are '
                                 ' "term", "session", "all"')
        for key, value in simple_args.iteritems():
            if value is not None:
                if use_elasticsearch:
                    es_terms.append({'term': {key: value}})
                else:
                    mongo_filter[key] = value

        if subjects and use_elasticsearch:
            for subject in subjects:
                es_terms.append({'term': {'subjects': subject}})
        elif subjects:
            mongo_filter['subjects'] = {'$all': filter(None, subjects)}

        if updated_since and use_elasticsearch:
            es_terms.append({'range': {'updated_at': {'gte': updated_since}}})
        elif updated_since:
            try:
                mongo_filter['updated_at'] = {
                    '$gte': parse_param_dt(updated_since)
                }
            except ValueError:
                raise ValueError('invalid updated_since parameter. '
                                 'please supply date in YYYY-MM-DD format')

        if last_action_since and use_elasticsearch:
            es_terms.append(
                {'range': {
                    'action_dates.last': {
                        'gte': last_action_since
                    }
                }})
        elif last_action_since:
            try:
                mongo_filter['action_dates.last'] = {
                    '$gte': parse_param_dt(last_action_since)
                }
            except ValueError:
                raise ValueError('invalid last_action_since parameter. '
                                 'please supply date in YYYY-MM-DD format')

        # Status comes in as a list and needs to become:
        # {'action_dates.signed': {'$ne': None}}
        status_spec = []
        for _status in status:
            status_spec.append({'action_dates.%s' % _status: {'$ne': None}})

        if len(status_spec) == 1:
            status_spec = status_spec[0]
        elif len(status_spec) > 1:
            status_spec = {'$and': status_spec}

        if status_spec and use_elasticsearch:
            for key in status:
                es_terms.append({'exists': {'field': key}})
        elif status_spec:
            mongo_filter.update(**status_spec)

        # preprocess sort
        if sort in ('first', 'last', 'signed', 'passed_lower', 'passed_upper'):
            sort = 'action_dates.' + sort
        elif sort not in ('updated_at', 'created_at'):
            sort = 'action_dates.last'

        # do the actual ES query
        if query and use_elasticsearch:
            search = {
                'query': {
                    "query_string": {
                        "fields": ["text", "title"],
                        "default_operator": "AND",
                        "query": query
                    }
                }
            }
            if es_terms:
                search['filter'] = {'and': es_terms}
                search = {'query': {'filtered': search}}
            search['fields'] = []
            return BillSearchResults(search, None, sort, bill_fields)

        elif query and not numeric_query:
            mongo_filter['title'] = {'$regex': query, '$options': 'i'}

        return BillSearchResults(None, mongo_filter, sort, bill_fields)
Beispiel #5
0
    def search(query=None,
               abbr=None,
               chamber=None,
               subjects=None,
               bill_id=None,
               bill_id__in=None,
               search_window=None,
               updated_since=None,
               sponsor_id=None,
               bill_fields=None,
               status=None,
               type_=None,
               session=None):
        _filter = {}
        for key, value in [
            (settings.LEVEL_FIELD, abbr),
            ('chamber', chamber),
            ('subjects', subjects),
            ('bill_id', bill_id),
        ]:
            if value is not None:
                _filter[key] = value

        if search_window:
            if search_window == 'session':
                _filter['_current_session'] = True
            elif search_window == 'term':
                _filter['_current_term'] = True
            elif search_window.startswith('session:'):
                _filter['session'] = search_window.split('session:')[1]
            elif search_window.startswith('term:'):
                _filter['_term'] = search_window.split('term:')[1]
            elif search_window == 'all':
                pass
            else:
                raise ValueError('invalid search_window. valid choices are '
                                 ' "term", "session", "all"')
        if updated_since:
            try:
                _filter['updated_at'] = {'$gte': parse_param_dt(updated_since)}
            except ValueError:
                raise ValueError('invalid updated_since parameter. '
                                 'please supply date in YYYY-MM-DD format')
        if sponsor_id:
            _filter['sponsors.leg_id'] = sponsor_id

        if status:
            # Status is slightly different: it's a dict like--
            # {'action_dates.signed': {'$ne': None}}
            _filter.update(**status)

        if type_:
            _filter['type'] = type_

        if session:
            _filter['session'] = session

        # process full-text query
        if query and settings.ENABLE_ELASTICSEARCH:
            # block spammers, possibly move to a BANNED_SEARCH_LIST setting
            if '<a href' in query:
                return db.bills.find({settings.LEVEL_FIELD: None})

            if re.findall('\d+', query):
                _id_filter = dict(_filter)
                _id_filter['bill_id'] = fix_bill_id(query).upper()
                result = db.bills.find(_id_filter)
                if result:
                    return result

            query = {
                "query_string": {
                    "fields": ["text", "title"],
                    "default_operator": "AND",
                    "query": query
                }
            }
            search = pyes.Search(query, fields=[])

            # take terms from mongo query
            es_terms = []
            if settings.LEVEL_FIELD in _filter:
                es_terms.append(
                    pyes.TermFilter(settings.LEVEL_FIELD,
                                    _filter.pop(settings.LEVEL_FIELD)))
            if 'session' in _filter:
                es_terms.append(
                    pyes.TermFilter('session', _filter.pop('session')))
            if 'chamber' in _filter:
                es_terms.append(
                    pyes.TermFilter('chamber', _filter.pop('chamber')))
            if 'subjects' in _filter:
                es_terms.append(
                    pyes.TermFilter('subjects',
                                    _filter.pop('subjects')['$all']))
            if 'sponsors.leg_id' in _filter:
                es_terms.append(
                    pyes.TermFilter('sponsors',
                                    _filter.pop('sponsors.leg_id')))

            # add terms
            if es_terms:
                search.filter = pyes.ANDFilter(es_terms)

            # page size is a guess, could use tweaks
            es_result = elasticsearch.search(search,
                                             search_type='scan',
                                             scroll='3m',
                                             size=250)
            doc_ids = [r.get_id() for r in es_result]
            _filter['versions.doc_id'] = {'$in': doc_ids}
        elif query:
            _filter['title'] = {'$regex': query, '$options': 'i'}

        # return query
        return db.bills.find(_filter, bill_fields)
Beispiel #6
0
    def search(query=None, state=None, chamber=None, subjects=None,
               bill_id=None, bill_id__in=None, search_window=None,
               updated_since=None, sponsor_id=None, bill_fields=None):

        _filter = {}
        for key, value in [('state', state),
                            ('chamber', chamber),
                            ('subjects', subjects)]:
            if value is not None:
                _filter[key] = value

        if search_window:
            if search_window == 'session':
                _filter['_current_session'] = True
            elif search_window == 'term':
                _filter['_current_term'] = True
            elif search_window.startswith('session:'):
                _filter['session'] = search_window.split('session:')[1]
            elif search_window.startswith('term:'):
                _filter['_term'] = search_window.split('term:')[1]
            elif search_window == 'all':
                pass
            else:
                raise ValueError('invalid search_window. valid choices are '
                                 ' "term", "session", "all"')
        if updated_since:
            try:
                _filter['updated_at'] = {'$gte': parse_param_dt(updated_since)}
            except ValueError:
                raise ValueError('invalid updated_since parameter. '
                                 'please supply date in YYYY-MM-DD format')
        if sponsor_id:
            _filter['sponsors.leg_id'] = sponsor_id

        # process full-text query
        if query:
            query = {"query_string": {"fields": ["text", "title"],
                                      "default_operator": "AND",
                                      "query": query}}
            search = pyes.Search(query, fields=[])

            # take terms from mongo query
            es_terms = []
            if 'state' in _filter:
                es_terms.append(pyes.TermFilter('state',
                                                _filter.pop('state')))
            if 'session' in _filter:
                es_terms.append(pyes.TermFilter('session',
                                                _filter.pop('session')))
            if 'chamber' in _filter:
                es_terms.append(pyes.TermFilter('chamber',
                                                _filter.pop('chamber')))
            if 'subjects' in _filter:
                es_terms.append(pyes.TermFilter('subjects',
                                           _filter.pop('subjects')['$all']))
            if 'sponsors.leg_id' in _filter:
                es_terms.append(pyes.TermFilter('sponsors',
                                                _filter.pop('sponsors.leg_id')))

            # add terms
            if es_terms:
                search.filter = pyes.ANDFilter(es_terms)

            # page size is a guess, could use tweaks
            es_result = elasticsearch.search(search, search_type='scan',
                                             scroll='3m', size=250)
            doc_ids = [r.get_id() for r in es_result]
            _filter['versions.doc_id'] = {'$in': doc_ids}

        # return query
        return db.bills.find(_filter, bill_fields)
Beispiel #7
0
    def search(query=None, abbr=None, chamber=None, subjects=None,
               bill_id=None, search_window=None, updated_since=None,
               last_action_since=None, sponsor_id=None, status=None,
               type_=None, session=None, bill_fields=None,
               sort=None, limit=None):

        use_elasticsearch = False
        numeric_query = False
        mongo_filter = {}
        es_terms = []

        if status is None:
            status = []

        if query:
            use_elasticsearch = settings.ENABLE_ELASTICSEARCH

            # spammers get a 400
            if '<a href' in query:
                raise PermissionDenied('html detected')

            # if query is numeric convert to an id filter
            #   (TODO: maybe this should be an $or)
            if re.findall('\d+', query):
                # if query is entirely numeric make it a regex and hit mongo
                if not re.findall('\D', query):
                    mongo_filter['bill_id'] = {'$regex':
                                               fix_bill_id(query).upper()}
                else:
                    mongo_filter['bill_id'] = fix_bill_id(query).upper()
                use_elasticsearch = False
                numeric_query = True

        # handle abbr
        if abbr and use_elasticsearch:
            es_terms.append({'term': {'jurisdiction': abbr}})
        elif abbr:
            mongo_filter[settings.LEVEL_FIELD] = abbr

        # sponsor_id
        if sponsor_id and use_elasticsearch:
            es_terms.append({'term': {'sponsor_ids': sponsor_id}})
        elif sponsor_id:
            mongo_filter['sponsors.leg_id'] = sponsor_id

        # handle simple term arguments (chamber, bill_id, type, session)
        if isinstance(bill_id, list) and not use_elasticsearch:
            bill_id = {'$in': bill_id}
        simple_args = {'chamber': chamber, 'bill_id': bill_id, 'type': type_,
                       'session': session}
        if search_window:
            if search_window == 'session':
                simple_args['_current_session'] = True
            elif search_window == 'term':
                simple_args['_current_term'] = True
            elif search_window.startswith('session:'):
                simple_args['session'] = search_window.split('session:')[1]
            elif search_window.startswith('term:'):
                simple_args['_term'] = search_window.split('term:')[1]
            elif search_window != 'all':
                raise ValueError('invalid search_window. valid choices are '
                                 ' "term", "session", "all"')
        for key, value in simple_args.iteritems():
            if value is not None:
                if use_elasticsearch:
                    es_terms.append({'term': {key: value}})
                else:
                    mongo_filter[key] = value

        if subjects and use_elasticsearch:
            for subject in subjects:
                es_terms.append({'term': {'subjects': subject}})
        elif subjects:
            mongo_filter['subjects'] = {'$all': filter(None, subjects)}

        if updated_since and use_elasticsearch:
            es_terms.append({'range': {'updated_at': {'gte': updated_since}}})
        elif updated_since:
            try:
                mongo_filter['updated_at'] = {'$gte':
                                              parse_param_dt(updated_since)}
            except ValueError:
                raise ValueError('invalid updated_since parameter. '
                                 'please supply date in YYYY-MM-DD format')

        if last_action_since and use_elasticsearch:
            es_terms.append({'range': {'action_dates.last':
                                       {'gte': last_action_since}}})
        elif last_action_since:
            try:
                mongo_filter['action_dates.last'] = {'$gte': parse_param_dt(last_action_since)}
            except ValueError:
                raise ValueError('invalid last_action_since parameter. '
                                 'please supply date in YYYY-MM-DD format')

        # Status comes in as a list and needs to become:
        # {'action_dates.signed': {'$ne': None}}
        status_spec = []
        for _status in status:
            status_spec.append({'action_dates.%s' % _status: {'$ne': None}})

        if len(status_spec) == 1:
            status_spec = status_spec[0]
        elif len(status_spec) > 1:
            status_spec = {'$and': status_spec}

        if status_spec and use_elasticsearch:
            for key in status:
                es_terms.append({'exists': {'field': key}})
        elif status_spec:
            mongo_filter.update(**status_spec)

        # preprocess sort
        if sort in ('first', 'last', 'signed', 'passed_lower', 'passed_upper'):
            sort = 'action_dates.' + sort
        elif sort not in ('updated_at', 'created_at'):
            sort = 'action_dates.last'

        # do the actual ES query
        if query and use_elasticsearch:
            search = {'query': {"query_string": {"fields": ["text", "title"],
                                                 "default_operator": "AND",
                                                 "query": query}}}
            if es_terms:
                search['filter'] = {'and': es_terms}
                search = {'query': {'filtered': search}}
            search['fields'] = []
            return BillSearchResults(search, None, sort, bill_fields)

        elif query and not numeric_query:
            mongo_filter['title'] = {'$regex': query, '$options': 'i'}

        return BillSearchResults(None, mongo_filter, sort, bill_fields)