def read(self, request, _id=None, events=[]): if events: return events if id: return db.events.find_one({'_id': _id}) spec = {} for key in (settings.LEVEL_FIELD, 'type'): value = request.GET.get(key) if not value: continue split = value.split(',') if len(split) == 1: spec[key] = value else: spec[key] = {'$in': split} invalid_date = False if 'dtstart' in request.GET: try: spec['when'] = {'$gte': parse_param_dt(request.GET['dtstart'])} except ValueError: invalid_date = True else: # By default, go back 7 days now = datetime.datetime.now() before = now - datetime.timedelta(7) spec['when'] = {'$gte': before} if 'dtend' in request.GET: try: spec['when']['$lte'] = parse_param_dt(request.GET['dtend']) except ValueError: invalid_date = True if invalid_date: resp = rc.BAD_REQUEST resp.write(": invalid updated_since parameter." " Please supply a date in YYYY-MM-DD format.") return resp return list(db.events.find(spec, fields=_build_field_list(request) ).sort('when', pymongo.ASCENDING).limit(1000) )
def read(self, request, id=None, events=[]): if events: return events if id: return db.events.find_one({'_id': id}) spec = {} for key in (settings.LEVEL_FIELD, 'type'): value = request.GET.get(key) if not value: continue split = value.split(',') if len(split) == 1: spec[key] = value else: spec[key] = {'$in': split} invalid_date = False if 'dtstart' in request.GET: try: spec['when'] = {'$gte': parse_param_dt(request.GET['dtstart'])} except ValueError: invalid_date = True else: # By default, go back 7 days now = datetime.datetime.now() before = now - datetime.timedelta(7) spec['when'] = {'$gte': before} if 'dtend' in request.GET: try: spec['when']['$lte'] = parse_param_dt(request.GET['dtend']) except ValueError: invalid_date = True if invalid_date: resp = rc.BAD_REQUEST resp.write("invalid updated_since parameter." " Please supply a date in YYYY-MM-DD format.") return resp return list(db.events.find(spec, fields=_build_field_list(request) ).sort('when', pymongo.ASCENDING).limit(1000) )
def search(query=None, abbr=None, chamber=None, subjects=None, bill_id=None, bill_id__in=None, search_window=None, updated_since=None, sponsor_id=None, bill_fields=None, status=None, type_=None, session=None): _filter = {} for key, value in [(settings.LEVEL_FIELD, abbr), ('chamber', chamber), ('subjects', subjects), ('bill_id', bill_id), ]: if value is not None: _filter[key] = value if search_window: if search_window == 'session': _filter['_current_session'] = True elif search_window == 'term': _filter['_current_term'] = True elif search_window.startswith('session:'): _filter['session'] = search_window.split('session:')[1] elif search_window.startswith('term:'): _filter['_term'] = search_window.split('term:')[1] elif search_window == 'all': pass else: raise ValueError('invalid search_window. valid choices are ' ' "term", "session", "all"') if updated_since: try: _filter['updated_at'] = {'$gte': parse_param_dt(updated_since)} except ValueError: raise ValueError('invalid updated_since parameter. ' 'please supply date in YYYY-MM-DD format') if sponsor_id: _filter['sponsors.leg_id'] = sponsor_id if status: # Status is slightly different: it's a dict like-- # {'action_dates.signed': {'$ne': None}} _filter.update(**status) if type_: _filter['type'] = type_ if session: _filter['session'] = session # process full-text query if query and settings.ENABLE_ELASTICSEARCH: # block spammers, possibly move to a BANNED_SEARCH_LIST setting if '<a href' in query: return db.bills.find({settings.LEVEL_FIELD: None}) # if query is numeric convert to an id filter if re.findall('\d+', query): _id_filter = dict(_filter) # if query is entirely numeric make it a regex if not re.findall('\D', query): _id_filter['bill_id'] = {'$regex': fix_bill_id(query).upper()} else: _id_filter['bill_id'] = fix_bill_id(query).upper() # check for a result result = db.bills.find(_id_filter, fields=bill_fields) if result.count(): return result query = {"query_string": {"fields": ["text", "title"], "default_operator": "AND", "query": query}} search = pyes.Search(query, fields=[]) # take terms from mongo query es_terms = [] if settings.LEVEL_FIELD in _filter: es_terms.append(pyes.TermFilter( settings.LEVEL_FIELD, _filter.pop(settings.LEVEL_FIELD))) if 'session' in _filter: es_terms.append(pyes.TermFilter('session', _filter.pop('session'))) if 'chamber' in _filter: es_terms.append(pyes.TermFilter('chamber', _filter.pop('chamber'))) if 'subjects' in _filter: es_terms.append(pyes.TermFilter( 'subjects', _filter.pop('subjects')['$all'])) if 'sponsors.leg_id' in _filter: es_terms.append(pyes.TermFilter( 'sponsors', _filter.pop('sponsors.leg_id'))) # add terms if es_terms: search.filter = pyes.ANDFilter(es_terms) # page size is a guess, could use tweaks es_result = elasticsearch.search(search, search_type='scan', scroll='3m', size=250) doc_ids = [r.get_id() for r in es_result] _filter['versions.doc_id'] = {'$in': doc_ids} elif query: _filter['title'] = {'$regex': query, '$options': 'i'} # return query return db.bills.find(_filter, fields=bill_fields)
def search(query=None, abbr=None, chamber=None, subjects=None, bill_id=None, search_window=None, updated_since=None, last_action_since=None, sponsor_id=None, status=None, type_=None, session=None, bill_fields=None, sort=None, limit=None): use_elasticsearch = False numeric_query = False mongo_filter = {} es_terms = [] if status is None: status = [] if query: use_elasticsearch = settings.ENABLE_ELASTICSEARCH # spammers get a 400 if '<a href' in query: raise PermissionDenied('html detected') # if query is numeric convert to an id filter # (TODO: maybe this should be an $or) if re.findall('\d+', query): # if query is entirely numeric make it a regex and hit mongo if not re.findall('\D', query): mongo_filter['bill_id'] = { '$regex': fix_bill_id(query).upper() } else: mongo_filter['bill_id'] = fix_bill_id(query).upper() use_elasticsearch = False numeric_query = True # handle abbr if abbr and use_elasticsearch: es_terms.append({'term': {'jurisdiction': abbr}}) elif abbr: mongo_filter[settings.LEVEL_FIELD] = abbr # sponsor_id if sponsor_id and use_elasticsearch: es_terms.append({'term': {'sponsor_ids': sponsor_id}}) elif sponsor_id: mongo_filter['sponsors.leg_id'] = sponsor_id # handle simple term arguments (chamber, bill_id, type, session) if isinstance(bill_id, list) and not use_elasticsearch: bill_id = {'$in': bill_id} simple_args = { 'chamber': chamber, 'bill_id': bill_id, 'type': type_, 'session': session } if search_window: if search_window == 'session': simple_args['_current_session'] = True elif search_window == 'term': simple_args['_current_term'] = True elif search_window.startswith('session:'): simple_args['session'] = search_window.split('session:')[1] elif search_window.startswith('term:'): simple_args['_term'] = search_window.split('term:')[1] elif search_window != 'all': raise ValueError('invalid search_window. valid choices are ' ' "term", "session", "all"') for key, value in simple_args.iteritems(): if value is not None: if use_elasticsearch: es_terms.append({'term': {key: value}}) else: mongo_filter[key] = value if subjects and use_elasticsearch: for subject in subjects: es_terms.append({'term': {'subjects': subject}}) elif subjects: mongo_filter['subjects'] = {'$all': filter(None, subjects)} if updated_since and use_elasticsearch: es_terms.append({'range': {'updated_at': {'gte': updated_since}}}) elif updated_since: try: mongo_filter['updated_at'] = { '$gte': parse_param_dt(updated_since) } except ValueError: raise ValueError('invalid updated_since parameter. ' 'please supply date in YYYY-MM-DD format') if last_action_since and use_elasticsearch: es_terms.append( {'range': { 'action_dates.last': { 'gte': last_action_since } }}) elif last_action_since: try: mongo_filter['action_dates.last'] = { '$gte': parse_param_dt(last_action_since) } except ValueError: raise ValueError('invalid last_action_since parameter. ' 'please supply date in YYYY-MM-DD format') # Status comes in as a list and needs to become: # {'action_dates.signed': {'$ne': None}} status_spec = [] for _status in status: status_spec.append({'action_dates.%s' % _status: {'$ne': None}}) if len(status_spec) == 1: status_spec = status_spec[0] elif len(status_spec) > 1: status_spec = {'$and': status_spec} if status_spec and use_elasticsearch: for key in status: es_terms.append({'exists': {'field': key}}) elif status_spec: mongo_filter.update(**status_spec) # preprocess sort if sort in ('first', 'last', 'signed', 'passed_lower', 'passed_upper'): sort = 'action_dates.' + sort elif sort not in ('updated_at', 'created_at'): sort = 'action_dates.last' # do the actual ES query if query and use_elasticsearch: search = { 'query': { "query_string": { "fields": ["text", "title"], "default_operator": "AND", "query": query } } } if es_terms: search['filter'] = {'and': es_terms} search = {'query': {'filtered': search}} search['fields'] = [] return BillSearchResults(search, None, sort, bill_fields) elif query and not numeric_query: mongo_filter['title'] = {'$regex': query, '$options': 'i'} return BillSearchResults(None, mongo_filter, sort, bill_fields)
def search(query=None, abbr=None, chamber=None, subjects=None, bill_id=None, bill_id__in=None, search_window=None, updated_since=None, sponsor_id=None, bill_fields=None, status=None, type_=None, session=None): _filter = {} for key, value in [ (settings.LEVEL_FIELD, abbr), ('chamber', chamber), ('subjects', subjects), ('bill_id', bill_id), ]: if value is not None: _filter[key] = value if search_window: if search_window == 'session': _filter['_current_session'] = True elif search_window == 'term': _filter['_current_term'] = True elif search_window.startswith('session:'): _filter['session'] = search_window.split('session:')[1] elif search_window.startswith('term:'): _filter['_term'] = search_window.split('term:')[1] elif search_window == 'all': pass else: raise ValueError('invalid search_window. valid choices are ' ' "term", "session", "all"') if updated_since: try: _filter['updated_at'] = {'$gte': parse_param_dt(updated_since)} except ValueError: raise ValueError('invalid updated_since parameter. ' 'please supply date in YYYY-MM-DD format') if sponsor_id: _filter['sponsors.leg_id'] = sponsor_id if status: # Status is slightly different: it's a dict like-- # {'action_dates.signed': {'$ne': None}} _filter.update(**status) if type_: _filter['type'] = type_ if session: _filter['session'] = session # process full-text query if query and settings.ENABLE_ELASTICSEARCH: # block spammers, possibly move to a BANNED_SEARCH_LIST setting if '<a href' in query: return db.bills.find({settings.LEVEL_FIELD: None}) if re.findall('\d+', query): _id_filter = dict(_filter) _id_filter['bill_id'] = fix_bill_id(query).upper() result = db.bills.find(_id_filter) if result: return result query = { "query_string": { "fields": ["text", "title"], "default_operator": "AND", "query": query } } search = pyes.Search(query, fields=[]) # take terms from mongo query es_terms = [] if settings.LEVEL_FIELD in _filter: es_terms.append( pyes.TermFilter(settings.LEVEL_FIELD, _filter.pop(settings.LEVEL_FIELD))) if 'session' in _filter: es_terms.append( pyes.TermFilter('session', _filter.pop('session'))) if 'chamber' in _filter: es_terms.append( pyes.TermFilter('chamber', _filter.pop('chamber'))) if 'subjects' in _filter: es_terms.append( pyes.TermFilter('subjects', _filter.pop('subjects')['$all'])) if 'sponsors.leg_id' in _filter: es_terms.append( pyes.TermFilter('sponsors', _filter.pop('sponsors.leg_id'))) # add terms if es_terms: search.filter = pyes.ANDFilter(es_terms) # page size is a guess, could use tweaks es_result = elasticsearch.search(search, search_type='scan', scroll='3m', size=250) doc_ids = [r.get_id() for r in es_result] _filter['versions.doc_id'] = {'$in': doc_ids} elif query: _filter['title'] = {'$regex': query, '$options': 'i'} # return query return db.bills.find(_filter, bill_fields)
def search(query=None, state=None, chamber=None, subjects=None, bill_id=None, bill_id__in=None, search_window=None, updated_since=None, sponsor_id=None, bill_fields=None): _filter = {} for key, value in [('state', state), ('chamber', chamber), ('subjects', subjects)]: if value is not None: _filter[key] = value if search_window: if search_window == 'session': _filter['_current_session'] = True elif search_window == 'term': _filter['_current_term'] = True elif search_window.startswith('session:'): _filter['session'] = search_window.split('session:')[1] elif search_window.startswith('term:'): _filter['_term'] = search_window.split('term:')[1] elif search_window == 'all': pass else: raise ValueError('invalid search_window. valid choices are ' ' "term", "session", "all"') if updated_since: try: _filter['updated_at'] = {'$gte': parse_param_dt(updated_since)} except ValueError: raise ValueError('invalid updated_since parameter. ' 'please supply date in YYYY-MM-DD format') if sponsor_id: _filter['sponsors.leg_id'] = sponsor_id # process full-text query if query: query = {"query_string": {"fields": ["text", "title"], "default_operator": "AND", "query": query}} search = pyes.Search(query, fields=[]) # take terms from mongo query es_terms = [] if 'state' in _filter: es_terms.append(pyes.TermFilter('state', _filter.pop('state'))) if 'session' in _filter: es_terms.append(pyes.TermFilter('session', _filter.pop('session'))) if 'chamber' in _filter: es_terms.append(pyes.TermFilter('chamber', _filter.pop('chamber'))) if 'subjects' in _filter: es_terms.append(pyes.TermFilter('subjects', _filter.pop('subjects')['$all'])) if 'sponsors.leg_id' in _filter: es_terms.append(pyes.TermFilter('sponsors', _filter.pop('sponsors.leg_id'))) # add terms if es_terms: search.filter = pyes.ANDFilter(es_terms) # page size is a guess, could use tweaks es_result = elasticsearch.search(search, search_type='scan', scroll='3m', size=250) doc_ids = [r.get_id() for r in es_result] _filter['versions.doc_id'] = {'$in': doc_ids} # return query return db.bills.find(_filter, bill_fields)
def search(query=None, abbr=None, chamber=None, subjects=None, bill_id=None, search_window=None, updated_since=None, last_action_since=None, sponsor_id=None, status=None, type_=None, session=None, bill_fields=None, sort=None, limit=None): use_elasticsearch = False numeric_query = False mongo_filter = {} es_terms = [] if status is None: status = [] if query: use_elasticsearch = settings.ENABLE_ELASTICSEARCH # spammers get a 400 if '<a href' in query: raise PermissionDenied('html detected') # if query is numeric convert to an id filter # (TODO: maybe this should be an $or) if re.findall('\d+', query): # if query is entirely numeric make it a regex and hit mongo if not re.findall('\D', query): mongo_filter['bill_id'] = {'$regex': fix_bill_id(query).upper()} else: mongo_filter['bill_id'] = fix_bill_id(query).upper() use_elasticsearch = False numeric_query = True # handle abbr if abbr and use_elasticsearch: es_terms.append({'term': {'jurisdiction': abbr}}) elif abbr: mongo_filter[settings.LEVEL_FIELD] = abbr # sponsor_id if sponsor_id and use_elasticsearch: es_terms.append({'term': {'sponsor_ids': sponsor_id}}) elif sponsor_id: mongo_filter['sponsors.leg_id'] = sponsor_id # handle simple term arguments (chamber, bill_id, type, session) if isinstance(bill_id, list) and not use_elasticsearch: bill_id = {'$in': bill_id} simple_args = {'chamber': chamber, 'bill_id': bill_id, 'type': type_, 'session': session} if search_window: if search_window == 'session': simple_args['_current_session'] = True elif search_window == 'term': simple_args['_current_term'] = True elif search_window.startswith('session:'): simple_args['session'] = search_window.split('session:')[1] elif search_window.startswith('term:'): simple_args['_term'] = search_window.split('term:')[1] elif search_window != 'all': raise ValueError('invalid search_window. valid choices are ' ' "term", "session", "all"') for key, value in simple_args.iteritems(): if value is not None: if use_elasticsearch: es_terms.append({'term': {key: value}}) else: mongo_filter[key] = value if subjects and use_elasticsearch: for subject in subjects: es_terms.append({'term': {'subjects': subject}}) elif subjects: mongo_filter['subjects'] = {'$all': filter(None, subjects)} if updated_since and use_elasticsearch: es_terms.append({'range': {'updated_at': {'gte': updated_since}}}) elif updated_since: try: mongo_filter['updated_at'] = {'$gte': parse_param_dt(updated_since)} except ValueError: raise ValueError('invalid updated_since parameter. ' 'please supply date in YYYY-MM-DD format') if last_action_since and use_elasticsearch: es_terms.append({'range': {'action_dates.last': {'gte': last_action_since}}}) elif last_action_since: try: mongo_filter['action_dates.last'] = {'$gte': parse_param_dt(last_action_since)} except ValueError: raise ValueError('invalid last_action_since parameter. ' 'please supply date in YYYY-MM-DD format') # Status comes in as a list and needs to become: # {'action_dates.signed': {'$ne': None}} status_spec = [] for _status in status: status_spec.append({'action_dates.%s' % _status: {'$ne': None}}) if len(status_spec) == 1: status_spec = status_spec[0] elif len(status_spec) > 1: status_spec = {'$and': status_spec} if status_spec and use_elasticsearch: for key in status: es_terms.append({'exists': {'field': key}}) elif status_spec: mongo_filter.update(**status_spec) # preprocess sort if sort in ('first', 'last', 'signed', 'passed_lower', 'passed_upper'): sort = 'action_dates.' + sort elif sort not in ('updated_at', 'created_at'): sort = 'action_dates.last' # do the actual ES query if query and use_elasticsearch: search = {'query': {"query_string": {"fields": ["text", "title"], "default_operator": "AND", "query": query}}} if es_terms: search['filter'] = {'and': es_terms} search = {'query': {'filtered': search}} search['fields'] = [] return BillSearchResults(search, None, sort, bill_fields) elif query and not numeric_query: mongo_filter['title'] = {'$regex': query, '$options': 'i'} return BillSearchResults(None, mongo_filter, sort, bill_fields)