def main(self): # look for events in last 15 mins date_timedelta = dict(minutes=15) # Configure filters using pyes must = [ pyes.TermFilter('_type', 'event'), pyes.TermFilter('eventsource', 'systemslogs'), pyes.ExistsFilter('details.sourceipaddress'), pyes.QueryFilter(pyes.MatchQuery('summary', 'failed', 'phrase')), pyes.TermFilter('program', 'sshd'), pyes.QueryFilter( pyes.MatchQuery('summary', 'login ldap_count_entries', 'boolean')) ] must_not = [ pyes.QueryFilter( pyes.MatchQuery('summary', '10.22.8.128', 'phrase')), pyes.QueryFilter(pyes.MatchQuery('summary', '10.8.75.35', 'phrase')), pyes.QueryFilter( pyes.MatchQuery('summary', '208.118.237.', 'phrase')) ] self.filtersManual(date_timedelta, must=must, must_not=must_not) # Search aggregations on field 'sourceipaddress', keep 50 samples of events at most self.searchEventsAggreg('sourceipaddress', samplesLimit=50) # alert when >= X matching events in an aggregation self.walkAggregations(threshold=10)
def main(self): date_timedelta = dict(minutes=30) self.config_file = './unauth_ssh_pyes.conf' self.config = None self.initConfiguration() must = [ pyes.TermFilter('_type', 'event'), pyes.TermFilter('category', 'syslog'), pyes.TermFilter('details.program', 'sshd'), pyes.QueryFilter( pyes.QueryStringQuery('details.hostname: /{}/'.format( self.config.hostfilter))), pyes.QueryFilter( pyes.MatchQuery('summary', 'Accepted publickey {}'.format( self.config.user), operator='and')) ] must_not = [] for x in self.config.skiphosts: must_not.append(pyes.QueryFilter(pyes.MatchQuery('summary', x))) self.filtersManual(date_timedelta, must=must, must_not=must_not) self.searchEventsSimple() self.walkEvents()
def esLdapResults(begindateUTC=None, enddateUTC=None): '''an ES query/facet to count success/failed logins''' resultsList = list() if begindateUTC is None: begindateUTC = datetime.now() - timedelta(hours=1) begindateUTC = toUTC(begindateUTC) if enddateUTC is None: enddateUTC = datetime.now() enddateUTC = toUTC(enddateUTC) try: es = pyes.ES((list('{0}'.format(s) for s in options.esservers))) qDate = pyes.RangeQuery(qrange=pyes.ESRange('utctimestamp', from_value=begindateUTC, to_value=enddateUTC)) q = pyes.MatchAllQuery() q = pyes.FilteredQuery(q, qDate) q = pyes.FilteredQuery(q, pyes.TermFilter('tags', 'ldap')) q = pyes.FilteredQuery(q, pyes.TermFilter('details.result', 'ldap_invalid_credentials')) q2 = q.search() q2.facet.add_term_facet('details.result') q2.facet.add_term_facet('details.dn', size=20) results = es.search(q2, indices='events') stoplist = ('o', 'mozilla', 'dc', 'com', 'mozilla.com', 'mozillafoundation.org', 'org') for t in results.facets['details.dn'].terms: if t['term'] in stoplist: continue #print(t['term']) failures = 0 success = 0 dn = t['term'] #re-query with the terms of the details.dn qt = pyes.MatchAllQuery() qt = pyes.FilteredQuery(qt, qDate) qt = pyes.FilteredQuery(qt, pyes.TermFilter('tags', 'ldap')) qt = pyes.FilteredQuery(qt, pyes.TermFilter('details.dn', t['term'])) qt2 = qt.search() qt2.facet.add_term_facet('details.result') results = es.search(qt2) #sys.stdout.write('{0}\n'.format(results.facets['details.result'].terms)) for t in results.facets['details.result'].terms: #print(t['term'],t['count']) if t['term'] == 'ldap_success': success = t['count'] if t['term'] == 'ldap_invalid_credentials': failures = t['count'] resultsList.append(dict(dn=dn, failures=failures, success=success, begin=begindateUTC.isoformat(), end=enddateUTC.isoformat())) return(json.dumps(resultsList)) except pyes.exceptions.NoServerAvailable: sys.stderr.write('Elastic Search server could not be reached, check network connectivity\n')
def main(self): date_timedelta = dict(minutes=30) must = [ pyes.TermFilter('_type', 'event'), pyes.TermFilter('category', 'geomodelnotice'), ] self.filtersManual(date_timedelta, must=must, must_not=[]) self.searchEventsSimple() self.walkEvents()
def esBroXSSEvents(): begindateUTC = toUTC(datetime.now() - timedelta(minutes=30)) enddateUTC = toUTC(datetime.now()) qDate = pyes.RangeQuery(qrange=pyes.ESRange( 'utctimestamp', from_value=begindateUTC, to_value=enddateUTC)) qType = pyes.TermFilter('_type', 'event') qEvents = pyes.TermFilter("category", "broxsslog") qalerted = pyes.ExistsFilter('alerttimestamp') q = pyes.ConstantScoreQuery(pyes.MatchAllQuery()) q.filters.append( pyes.BoolFilter(must=[qType, qDate, qEvents, pyes.ExistsFilter('uri')], must_not=[qalerted])) return q
def main(self): # look for events in last x date_timedelta = dict(minutes=15) # Configure filters using pyes must = [ pyes.TermFilter('category', 'ldapChange'), pyes.TermFilter('changetype', 'modify'), pyes.QueryFilter(pyes.MatchQuery("summary","groups")) ] self.filtersManual(date_timedelta, must=must) # Search events self.searchEventsSimple() self.walkEvents()
def main(self): # look for events in last 10 mins date_timedelta = dict(minutes=10) # Configure filters using pyes must = [ pyes.TermFilter('_type', 'event'), pyes.TermFilter('program', 'fail2ban'), pyes.QueryFilter(pyes.MatchQuery("summary","has been banned","phrase")) ] self.filtersManual(date_timedelta, must=must) # Search events self.searchEventsSimple() self.walkEvents()
def main(self): # look for events in last 30 mins date_timedelta = dict(minutes=30) # Configure filters using pyes must = [ pyes.TermFilter('_type', 'event'), pyes.TermFilter('category', 'brointel'), pyes.ExistsFilter('seenindicator') ] self.filtersManual(date_timedelta, must=must) # Search aggregations on field 'seenindicator', keep 50 samples of events at most self.searchEventsAggregated('details.seenindicator', samplesLimit=50) # alert when >= 5 matching events in an aggregation self.walkAggregations(threshold=5)
def main(self): # look for events in last x date_timedelta = dict(minutes=15) # Configure filters # looking for pwdAccountLockedTime setting by admin must = [ pyes.TermFilter('category', 'ldapChange'), pyes.TermFilter("actor", "cn=admin,dc=mozilla"), pyes.QueryFilter(pyes.MatchQuery('changepairs', 'replace:pwdAccountLockedTime','phrase')) ] self.filtersManual(date_timedelta, must=must) # Search events self.searchEventsSimple() self.walkEvents()
def main(self): # look for events in last 15 mins date_timedelta = dict(minutes=15) # Configure filters using pyes must = [ pyes.TermFilter('_type', 'bro'), pyes.TermFilter('eventsource', 'nsm'), pyes.TermFilter('category', 'bronotice'), pyes.ExistsFilter('details.sourceipaddress'), pyes.QueryFilter(pyes.MatchQuery('details.note','SSH::Password_Guessing','phrase')), ] self.filtersManual(date_timedelta, must=must) # Search events self.searchEventsSimple() self.walkEvents()
def main(self): # look for events in last 15 mins date_timedelta = dict(minutes=15) # Configure filters using pyes must = [ pyes.TermFilter('_type', 'bro'), pyes.TermFilter('eventsource', 'nsm'), pyes.TermFilter('category', 'brointel'), pyes.TermFilter('details.sources', 'abuse.ch SSLBL'), pyes.ExistsFilter('details.sourceipaddress') ] self.filtersManual(date_timedelta, must=must) # Search events self.searchEventsSimple() self.walkEvents()
def esCloudTrailSearch(es, begindateUTC=None, enddateUTC=None): resultsList = list() if begindateUTC is None: begindateUTC = toUTC(datetime.now() - timedelta(hours=160)) if enddateUTC is None: enddateUTC = toUTC(datetime.now()) try: #search for actions within the date range that haven't already been alerted (i.e. given an alerttimestamp) qDate = pyes.RangeQuery(qrange=pyes.ESRange( 'utctimestamp', from_value=begindateUTC, to_value=enddateUTC)) qcloud = pyes.TermFilter('_type', 'cloudtrail') qEvents = pyes.TermsFilter( 'eventName', ['runinstances', 'stopinstances', 'startinstances']) qalerted = pyes.ExistsFilter('alerttimestamp') results = es.search(pyes.ConstantScoreQuery( pyes.BoolFilter(must=[qcloud, qDate, qEvents], must_not=[qalerted])), indices='events') #uncomment for debugging to recreate alerts for events that already have an alerttimestamp #results=es.search(pyes.ConstantScoreQuery(pyes.BoolFilter(must=[qcloud,qDate,qEvents]))) return (results._search_raw()['hits']['hits']) except pyes.exceptions.NoServerAvailable: logger.error( 'Elastic Search server could not be reached, check network connectivity' )
def main(self): # look for events in last 15 mins date_timedelta = dict(minutes=15) # Configure filters using pyes must = [ pyes.TermFilter('tags', 'nubis_events_non_prod'), pyes.TermFilter('tags', 'nubis_events_prod'), pyes.TermFilter('category', 'syslog'), pyes.TermFilter('details.__tag', 'ec2.forward.squid.access'), pyes.QueryFilter(pyes.MatchQuery('details.summary','is DENIED, because it matched','phrase')), ] self.filtersManual(date_timedelta, must=must) # Search events self.searchEventsSimple() self.walkEvents()
def esSearch(es, begindateUTC=None, enddateUTC=None): resultsList = list() if begindateUTC is None: begindateUTC = toUTC(datetime.now() - timedelta(minutes=options.aggregationminutes)) if enddateUTC is None: enddateUTC = toUTC(datetime.now()) try: # search for aggregated event stats summaries within the date range qDate = pyes.RangeQuery(qrange=pyes.ESRange('utctimestamp', from_value=begindateUTC, to_value=enddateUTC)) q = pyes.ConstantScoreQuery(pyes.MatchAllQuery()) q = pyes.FilteredQuery(q,pyes.BoolFilter(must=[qDate, pyes.TermFilter('_type', 'mozdefstats')])) results=es.search(query=q,size=100,indices=['events','events-previous']) #avoid pyes iteration bug rawresults = results._search_raw() #examine the results #for each details.counts, append the count #as a list to the stats dict stats=dict() for r in rawresults['hits']['hits']: for i in r['_source']['details']['counts']: #print(i.values()[0]) if i.keys()[0] not in stats.keys(): stats[i.keys()[0]]=list() stats[i.keys()[0]].append(i.values()[0]) # make a dictionairy of user-defined # aggregation threshold percentages aggregationthresholds = dict(zip(options.aggregations, options.aggregationthresholds)) #for our running history of counts per category #do some simple stats math to see if we #should alert on anything for s in stats: alert = False smean=round(numpy.mean(stats[s])) sstd=round(numpy.std(stats[s])) stat = round((sstd/smean)*100, 2) if s in aggregationthresholds.keys(): if stat > aggregationthresholds[s]: alert = True elif stat > options.defaultthreshold: alert = True if alert: print('{0} {1}%: \n\t\t{2} \n\t\t{3} \n\t\t{4}'.format( s, stat, stats[s], smean, sstd ) ) except pyes.exceptions.NoServerAvailable: logger.error('Elastic Search server could not be reached, check network connectivity')
def main(self): # look for events in last X mins date_timedelta = dict(minutes=2) # Configure filters using pyes must = [ pyes.TermFilter('_type', 'bro'), pyes.TermFilter('eventsource', 'nsm'), pyes.TermFilter('category', 'brointel'), pyes.ExistsFilter('seenindicator'), pyes.QueryFilter(pyes.MatchQuery('hostname', 'sensor1 sensor2 sensor3', 'boolean')) ] self.filtersManual(date_timedelta, must=must) # Search aggregations on field 'seenindicator', keep X samples of events at most self.searchEventsAggregated('details.seenindicator', samplesLimit=10) # alert when >= X matching events in an aggregation self.walkAggregations(threshold=10)
def main(self): # look for events in last X mins date_timedelta = dict(minutes=2) # Configure filters using pyes must = [ pyes.TermFilter('_type', 'event'), pyes.QueryFilter(pyes.MatchQuery('summary','failed','phrase')), pyes.TermFilter('program','sshd'), pyes.QueryFilter(pyes.MatchQuery('summary', 'login invalid ldap_count_entries', 'boolean')) ] must_not = [ pyes.QueryFilter(pyes.MatchQuery('summary','10.22.75.203','phrase')), pyes.QueryFilter(pyes.MatchQuery('summary','10.8.75.144','phrase')) ] self.filtersManual(date_timedelta, must=must, must_not=must_not) # Search aggregations on field 'sourceipaddress', keep X samples of events at most self.searchEventsAggreg('sourceipaddress', samplesLimit=10) # alert when >= X matching events in an aggregation self.walkAggregations(threshold=10)
def getESAlerts(es): begindateUTC = toUTC(datetime.now() - timedelta(minutes=50)) enddateUTC = toUTC(datetime.now()) qDate = pyes.RangeQuery(qrange=pyes.ESRange( 'utctimestamp', from_value=begindateUTC, to_value=enddateUTC)) qType = pyes.TermFilter('_type', 'alert') q = pyes.ConstantScoreQuery(pyes.MatchAllQuery()) q.filters.append(pyes.BoolFilter(must=[qDate, qType])) results = es.search(q, size=10000, indices='alerts') # return raw search to avoid pyes iteration bug return results._search_raw()
def getFrontendStats(es): begindateUTC = toUTC(datetime.now() - timedelta(minutes=1)) enddateUTC = toUTC(datetime.now()) qDate = pyes.RangeQuery(qrange=pyes.ESRange( 'utctimestamp', from_value=begindateUTC, to_value=enddateUTC)) qType = pyes.TermFilter('_type', 'mozdefhealth') qMozdef = pyes.TermsFilter('category', ['mozdef']) pyesresults = es.search(pyes.ConstantScoreQuery( pyes.BoolFilter(must=[qType, qDate, qMozdef])), indices='events') return pyesresults._search_raw()['hits']['hits']
def searchForSSHKeys(es): begindateUTC = toUTC(datetime.now() - timedelta(minutes=5)) enddateUTC = toUTC(datetime.now()) qDate = pyes.RangeQuery(qrange=pyes.ESRange( 'utctimestamp', from_value=begindateUTC, to_value=enddateUTC)) qType = pyes.TermFilter('_type', 'event') qEvents = pyes.TermFilter("program", "sshd") q = pyes.ConstantScoreQuery(pyes.MatchAllQuery()) q.filters.append( pyes.BoolFilter(must=[ qType, qDate, qEvents, pyes.QueryFilter( pyes.MatchQuery("summary", "found matching key accepted publickey", "boolean")) ])) results = es.search(q, size=10000, indices='events') # return raw search to avoid pyes iteration bug return results._search_raw()
def query(self, sort='timestamp', start=0, size=20, severity=None, timestamp_from=None, timestamp_till=None): fltr = [] if severity is not None: fltr.append( pyes.TermFilter(field='severity', value=severity) ) if timestamp_from is not None: if isinstance(timestamp_from, datetime.datetime): timestamp_from = timestamp_from.isoformat() fltr.append( pyes.RangeFilter( pyes.ESRangeOp( 'timestamp', 'gte', timestamp_from ) ) ) if timestamp_till is not None: if isinstance(timestamp_till, datetime.datetime): timestamp_till = timestamp_till.isoformat() fltr.append( pyes.RangeFilter( pyes.ESRangeOp( 'timestamp', 'lte', timestamp_till ) ) ) f = None if fltr: f = pyes.ANDFilter(fltr) q = pyes.MatchAllQuery() s = pyes.Search( query=q, filter=f, start=start, size=size) return self.es.search( s, indices=[self.index], doc_types=[self.document_type])
def main(self): # look for events in last x hours date_timedelta = dict(hours=1) # Configure filters using pyes must = [ pyes.TermFilter('_type', 'cloudtrail'), pyes.TermsFilter('eventName',['runinstances','stopinstances','startinstances']) ] self.filtersManual(date_timedelta, must=must) # Search events self.searchEventsSimple() self.walkEvents()
def esSearch(es, begindateUTC=None, enddateUTC=None): resultsList = list() if begindateUTC is None: begindateUTC = toUTC(datetime.now() - timedelta(minutes=60)) if enddateUTC is None: enddateUTC = toUTC(datetime.now()) try: #search for events within the date range that haven't already been alerted (i.e. given an alerttimestamp) qDate = pyes.RangeQuery(qrange=pyes.ESRange( 'utctimestamp', from_value=begindateUTC, to_value=enddateUTC)) qType = pyes.TermFilter('_type', 'event') qEvents = pyes.TermsFilter('category', ['brointel']) qalerted = pyes.ExistsFilter('alerttimestamp') qdetails = pyes.ExistsFilter('details') qindicator = pyes.ExistsFilter('seenindicator') pyesresults = es.search(pyes.ConstantScoreQuery( pyes.BoolFilter(must=[qType, qDate, qEvents, qdetails, qindicator], must_not=[qalerted])), size=1000) #uncomment for debugging to recreate alerts for events that already have an alerttimestamp #results=es.search(pyes.ConstantScoreQuery(pyes.BoolFilter(must=[qcloud,qDate,qEvents]))) #logger.debug(results.count()) #correlate any matches by the seenindicator field. #make a simple list of indicator values that can be counted/summarized by Counter resultsIndicators = list() #bug in pyes..capture results as raw list or it mutates after first access: #copy the hits.hits list as our resusts, which is the same as the official elastic search library returns. results = pyesresults._search_raw()['hits']['hits'] for r in results: resultsIndicators.append(r['_source']['details']['seenindicator']) #use the list of tuples ('indicator',count) to create a dictionary with: #indicator,count,es records #and add it to a list to return. indicatorList = list() for i in Counter(resultsIndicators).most_common(): idict = dict(indicator=i[0], count=i[1], events=[]) for r in results: if r['_source']['details']['seenindicator'].encode( 'ascii', 'ignore') == i[0]: idict['events'].append(r) indicatorList.append(idict) return indicatorList except pyes.exceptions.NoServerAvailable: logger.error( 'Elastic Search server could not be reached, check network connectivity' )
def esBroIntelEvents(): begindateUTC = toUTC(datetime.now() - timedelta(minutes=30)) enddateUTC = toUTC(datetime.now()) #search for events within the date range that haven't already been alerted (i.e. given an alerttimestamp) qDate = pyes.RangeQuery(qrange=pyes.ESRange( 'utctimestamp', from_value=begindateUTC, to_value=enddateUTC)) qType = pyes.TermFilter('_type', 'event') qEvents = pyes.TermsFilter('category', ['brointel']) qalerted = pyes.ExistsFilter('alerttimestamp') q = pyes.ConstantScoreQuery(pyes.MatchAllQuery()) q.filters.append( pyes.BoolFilter( must=[qType, qDate, qEvents, pyes.ExistsFilter('seenindicator')], must_not=[qalerted])) return q
def search(request): q = request.GET.get('q', u'') q = balance_quotes(q) start = int(request.GET.get('start', 0)) size = int(request.GET.get('size', 100)) page = request.GET.get('page', 1) sort = request.GET.get('sort', 'trefwoord') filters, filtercheckboxes, filterurl = {}, {}, [] for fname in ('taal', 'sfeer', 'woordsoort'): filterlist = request.GET.getlist(fname) if filterlist: filters[fname] = pyes.ORFilter( [pyes.TermFilter(fname, fl) for fl in filterlist]) filtercheckboxes[fname] = filterlist for filterlistitem in filterlist: filterurl.append('%s=%s' % (fname, filterlistitem)) if filters: filters = pyes.ANDFilter(filters.values()) pageurl = '?q=%s' % urllib.quote_plus(q.encode('utf8')) if filterurl: pageurl += '&' pageurl += '&'.join(filterurl) pageurl_nosort = pageurl pageurl += '&sort=' + sort results = get_search_results(q, extra_filter=filters, sort=sort) pagi = Paginator(results, size) data = pagi.page(page) pagerange = pagi.page_range[int(page) - 1:min(int(page) + 9, int(pagi.num_pages))] return direct_to_template( request, "search.html", { 'results': results, 'data': data, 'paginator': pagi, 'page': page, 'q': q, 'filters': filtercheckboxes, 'start': start, 'size': size, 'pageurl': pageurl, 'pageurl_nosort': pageurl_nosort, 'pagerange': pagerange })
def esSearch(es, macassignments=None, begindateUTC=None, enddateUTC=None): ''' Search ES for an event that ties a username to a mac address This example searches for junos wifi correlations on authentication success Expecting an event like: user: [email protected]; mac: 5c:f9:38:b1:de:cf; author reason: roamed session; ssid: ANSSID; AP 46/2\n ''' usermacre=re.compile(r'''user: (?P<username>.*?); mac: (?P<macaddress>.*?); ''',re.IGNORECASE) correlations={} # list of dicts to populate hits we find if begindateUTC is None: begindateUTC = toUTC(datetime.now() - timedelta(minutes=options.correlationminutes)) if enddateUTC is None: enddateUTC = toUTC(datetime.now()) try: # search for events within the date range qDate = pyes.RangeQuery(qrange=pyes.ESRange('utctimestamp', from_value=begindateUTC, to_value=enddateUTC)) q=pyes.ConstantScoreQuery(pyes.MatchAllQuery()) q.filters.append(pyes.BoolFilter(must=[ qDate, pyes.TermFilter("program","AUTHORIZATION-SUCCESS") ], must_not=[ pyes.QueryFilter( pyes.MatchQuery("summary","last-resort","phrase") ) ])) results = es.search(q, size=10000, indices=['events', 'events-previous']) rawresults=results._search_raw() for r in rawresults['hits']['hits']: fields = re.search(usermacre,r['_source']['summary']) if fields: if '{0} {1}'.format(fields.group('username'),fields.group('macaddress')) not in correlations.keys(): if fields.group('macaddress')[0:8].lower() in macassignments.keys(): entity=macassignments[fields.group('macaddress')[0:8].lower()] else: entity='unknown' correlations['{0} {1}'.format(fields.group('username'),fields.group('macaddress'))]=dict(username=fields.group('username'), macaddress=fields.group('macaddress'), entity=entity, utctimestamp=r['_source']['utctimestamp']) return correlations except pyes.exceptions.NoServerAvailable: logger.error('Elastic Search server could not be reached, check network connectivity')
def execute(self): conn = pyes.ES(self.elastic_hosts) queries = [] filters = [] filters.append(pyes.TermFilter('account', self.account)) for field, val in self.conditions: if val != '': queries.append(pyes.TextQuery(field, val)) if self.type not in [None, '']: queries.append(pyes.TextQuery('type', self.type)) if self.path not in [None, '']: if self.recursive: filters.append(pyes.PrefixFilter('path', '%s/' % self.path)) else: queries.append(pyes.TermQuery('dir', self.path)) if self.marker not in [None, '']: filters.append( pyes.RangeFilter(pyes.ESRangeOp('name', 'gt', self.marker))) q = pyes.MatchAllQuery() if len(queries) > 0: q = pyes.BoolQuery(queries) q = pyes.FilteredQuery(q, pyes.ANDFilter(filters)) self.logger.info("Running query: %s" % q.serialize()) results = conn.search(q, self.search_index_name, start=self.start, size=self.limit) if self.sort not in [None, '']: sort_list = [] for row in self.sort.split(','): sort_data = row.split(' ') prefix = "" if len(sort_data) > 1 and sort_data[1].lower() == 'desc': prefix = "-" if sort_data[0] in SORT_WHITELIST: sort_list.append("{0}{1}".format(prefix, sort_data[0])) if sort_list: results.order_by(sort_list) return results
def filtersFromKibanaDash(self, fp, date_timedelta): """ Import filters from a kibana dashboard fp is the file path of the json file date_timedelta is a dict in timedelta format see https://docs.python.org/2/library/datetime.html#timedelta-objects """ f = open(fp) data = json.load(f) must = [] should = [] must_not = [] for filtid in data['services']['filter']['list'].keys(): filt = data['services']['filter']['list'][filtid] if filt['active'] and 'query' in filt.keys(): value = filt['query'].split(':')[-1] fieldname = filt['query'].split(':')[0].split('.')[-1] # self.log.info(fieldname) # self.log.info(value) # field: fieldname # query: value if 'field' in filt.keys(): fieldname = filt['field'] value = filt['query'] if '\"' in value: value = value.split('\"')[1] pyesfilt = pyes.QueryFilter( pyes.MatchQuery(fieldname, value, 'phrase')) else: pyesfilt = pyes.TermFilter(fieldname, value) else: # _exists_:field if filt['query'].startswith('_exists_:'): pyesfilt = pyes.ExistsFilter(value.split('.')[-1]) # self.log.info('exists %s' % value.split('.')[-1]) # _missing_:field elif filt['query'].startswith('_missing_:'): pyesfilt = pyes.filters.MissingFilter( value.split('.')[-1]) # self.log.info('missing %s' % value.split('.')[-1]) # field:"value" elif '\"' in value: value = value.split('\"')[1] pyesfilt = pyes.QueryFilter( pyes.MatchQuery(fieldname, value, 'phrase')) # self.log.info("phrase %s %s" % (fieldname, value)) # field:(value1 value2 value3) elif '(' in value and ')' in value: value = value.split('(')[1] value = value.split('(')[0] pyesfilt = pyes.QueryFilter( pyes.MatchQuery(fieldname, value, "boolean")) # field:value else: pyesfilt = pyes.TermFilter(fieldname, value) # self.log.info("terms %s %s" % (fieldname, value)) if filt['mandate'] == 'must': must.append(pyesfilt) elif filt['mandate'] == 'either': should.append(pyesfilt) elif filt['mandate'] == 'mustNot': must_not.append(pyesfilt) # self.log.info(must) f.close() self.filtersManual(date_timedelta, must=must, should=should, must_not=must_not)
def read(self, request): bill_fields = { 'title': 1, 'created_at': 1, 'updated_at': 1, 'bill_id': 1, 'type': 1, 'state': 1, 'level': 1, 'country': 1, 'session': 1, 'chamber': 1, 'subjects': 1, '_type': 1, 'id': 1 } # replace with request's fields if they exist bill_fields = _build_field_list(request, bill_fields) # normal mongo search logic _filter = _build_mongo_filter( request, ('state', 'chamber', 'subjects', 'bill_id', 'bill_id__in')) # process search_window search_window = request.GET.get('search_window', '') if search_window: if search_window == 'session': _filter['_current_session'] = True elif search_window == 'term': _filter['_current_term'] = True elif search_window.startswith('session:'): _filter['session'] = search_window.split('session:')[1] elif search_window.startswith('term:'): _filter['_term'] = search_window.split('term:')[1] elif search_window == 'all': pass else: resp = rc.BAD_REQUEST resp.write(": invalid search_window. Valid choices are " "'term', 'session' or 'all'") return resp # process updated_since since = request.GET.get('updated_since') if since: try: _filter['updated_at'] = {'$gte': parse_param_dt(since)} except ValueError: resp = rc.BAD_REQUEST resp.write(": invalid updated_since parameter." " Please supply a date in YYYY-MM-DD format.") return resp # process sponsor_id sponsor_id = request.GET.get('sponsor_id') if sponsor_id: _filter['sponsors.leg_id'] = sponsor_id # process full-text query query = request.GET.get('q') if query: query = { "query_string": { "fields": ["text", "title"], "default_operator": "AND", "query": query } } search = pyes.Search(query, fields=[]) # take terms from mongo query es_terms = [] if 'state' in _filter: es_terms.append(pyes.TermFilter('state', _filter.pop('state'))) if 'session' in _filter: es_terms.append( pyes.TermFilter('session', _filter.pop('session'))) if 'chamber' in _filter: es_terms.append( pyes.TermFilter('chamber', _filter.pop('chamber'))) if 'subjects' in _filter: es_terms.append( pyes.TermFilter('subjects', _filter.pop('subjects')['$all'])) if 'sponsors.leg_id' in _filter: es_terms.append( pyes.TermFilter('sponsors', _filter.pop('sponsors.leg_id'))) # add terms if es_terms: search.filter = pyes.ANDFilter(es_terms) # page size is a guess, could use tweaks es_result = elasticsearch.search(search, search_type='scan', scroll='3m', size=250) doc_ids = [r.get_id() for r in es_result] _filter['versions.doc_id'] = {'$in': doc_ids} # start with base query query = db.bills.find(_filter, bill_fields) # pagination page = request.GET.get('page') per_page = request.GET.get('per_page') if page and not per_page: per_page = 50 if per_page and not page: page = 1 if page: page = int(page) per_page = int(per_page) query = query.limit(per_page).skip(per_page * (page - 1)) else: # limit response size count = db.bills.find(_filter, bill_fields).count() if count > 5000: resp = rc.BAD_REQUEST resp.write(': request too large, try narrowing your search by ' 'adding more filters.') return resp # sorting sort = request.GET.get('sort') if sort == 'updated_at': query = query.sort([('updated_at', -1)]) elif sort == 'created_at': query = query.sort([('created_at', -1)]) return list(query)
def search(query=None, abbr=None, chamber=None, subjects=None, bill_id=None, bill_id__in=None, search_window=None, updated_since=None, sponsor_id=None, bill_fields=None, status=None, type_=None, session=None): _filter = {} for key, value in [ (settings.LEVEL_FIELD, abbr), ('chamber', chamber), ('subjects', subjects), ('bill_id', bill_id), ]: if value is not None: _filter[key] = value if search_window: if search_window == 'session': _filter['_current_session'] = True elif search_window == 'term': _filter['_current_term'] = True elif search_window.startswith('session:'): _filter['session'] = search_window.split('session:')[1] elif search_window.startswith('term:'): _filter['_term'] = search_window.split('term:')[1] elif search_window == 'all': pass else: raise ValueError('invalid search_window. valid choices are ' ' "term", "session", "all"') if updated_since: try: _filter['updated_at'] = {'$gte': parse_param_dt(updated_since)} except ValueError: raise ValueError('invalid updated_since parameter. ' 'please supply date in YYYY-MM-DD format') if sponsor_id: _filter['sponsors.leg_id'] = sponsor_id if status: # Status is slightly different: it's a dict like-- # {'action_dates.signed': {'$ne': None}} _filter.update(**status) if type_: _filter['type'] = type_ if session: _filter['session'] = session # process full-text query if query and settings.ENABLE_ELASTICSEARCH: # block spammers, possibly move to a BANNED_SEARCH_LIST setting if '<a href' in query: return db.bills.find({settings.LEVEL_FIELD: None}) if re.findall('\d+', query): _id_filter = dict(_filter) _id_filter['bill_id'] = fix_bill_id(query).upper() result = db.bills.find(_id_filter) if result: return result query = { "query_string": { "fields": ["text", "title"], "default_operator": "AND", "query": query } } search = pyes.Search(query, fields=[]) # take terms from mongo query es_terms = [] if settings.LEVEL_FIELD in _filter: es_terms.append( pyes.TermFilter(settings.LEVEL_FIELD, _filter.pop(settings.LEVEL_FIELD))) if 'session' in _filter: es_terms.append( pyes.TermFilter('session', _filter.pop('session'))) if 'chamber' in _filter: es_terms.append( pyes.TermFilter('chamber', _filter.pop('chamber'))) if 'subjects' in _filter: es_terms.append( pyes.TermFilter('subjects', _filter.pop('subjects')['$all'])) if 'sponsors.leg_id' in _filter: es_terms.append( pyes.TermFilter('sponsors', _filter.pop('sponsors.leg_id'))) # add terms if es_terms: search.filter = pyes.ANDFilter(es_terms) # page size is a guess, could use tweaks es_result = elasticsearch.search(search, search_type='scan', scroll='3m', size=250) doc_ids = [r.get_id() for r in es_result] _filter['versions.doc_id'] = {'$in': doc_ids} elif query: _filter['title'] = {'$regex': query, '$options': 'i'} # return query return db.bills.find(_filter, bill_fields)