def test_sort(self): q = ElasticQuery() q.sort('sort_field') assert_equal(self, q.dict(), { 'sort': ['sort_field'] })
def test_sort_order(self): q = ElasticQuery() q.sort('sort_field', order='desc') assert_equal(self, q.dict(), {'sort': [{ 'sort_field': { 'order': 'desc' } }]})
def test_query_aggregate_and_suggester(self): q = ElasticQuery() q.query(Query.match('field', 'query')) q.aggregate(Aggregate.max('agg_name', 'field')) q.suggest(Suggester.term('sugg_name', 'term text', 'term_field')) assert_equal( self, q.dict(), { 'query': { 'match': { 'field': { 'query': 'query' } } }, 'aggregations': { 'agg_name': { 'max': { 'field': 'field' } } }, 'suggest': { 'sugg_name': { 'text': 'term text', 'term': { 'field': 'term_field' } } } })
def test_just_query(self): q = ElasticQuery() q.query(Query.query_string('this is a querystring')) assert_equal( self, q.dict(), {'query': { 'query_string': { 'query': 'this is a querystring' } }})
def test_aggregate(self): q = ElasticQuery() q.aggregate(Aggregate.terms('agg_name', 'field')) assert_equal( self, q.dict(), {'aggregations': { 'agg_name': { 'terms': { 'field': 'field' } } }})
def test_suggester(self): q = ElasticQuery() q.suggest(Suggester.term('sugg_name', 'term text', 'term_field')) assert_equal(self, q.dict(), { 'suggest': { 'sugg_name': { 'text': 'term text', 'term': { 'field': 'term_field' } } } })
def search(self, parms): device_controller = DeviceController(self.db, self.logger) if "location" in parms.keys(): list_location_ip = device_controller.get_device_list_by_locationid( parms["location"]) if "device" in parms.keys(): list_device_ip = device_controller.get_device_list_by_hostname( parms["device"]) #Doing intersection between device search and device in location search_ip = list(set(list_location_ip) & set(list_device_ip)) print search_ip query_search = [] if search_ip: query_search.append(Query.terms('host', search_ip)) if parms["time"]: time_from = parms["time"]["from"].split(" ")[0] time_to = parms["time"]["to"].split(" ")[0] query_search.append( Query.range('@timestamp', gte=time_from, lte=time_to)) if parms["severityLevel"]: query_search.append( Query.terms('severity', parms["severityLevel"])) if parms["keywordMessage"]: message_search = [] message_search.append(parms["keywordMessage"]) query_search.append(Query.terms('message', message_search)) index = "syslog*" es = Elasticsearch(["http://192.168.100.249:9200"]) q = ElasticQuery(es=es, index=index, doc_type='doc') # q.query(Query.match_all()) q.size(1000) q.query(Query.bool(must=query_search)) #q.query(Aggregate.terms(must=query_search)) print q.json(indent=4) query_result = self.format_results(q.get()) return query_result #No index to query else: return []
def query_ec(self, str_query, q_fields, start_date=0, end_date=0, index='logs-*', doc_type='logs', hours=24, debug=False): if start_date > end_date: raise Exception('The start_date can\'t be greater than the end_date') if start_date == 0 or end_date == 0: dt_end_date = datetime.now().timestamp() dt_start_date = (datetime.now() - timedelta(hours=hours)).timestamp() start_date = int(dt_start_date) * 1000 end_date = int(dt_end_date) * 1000 # print(str(start_date) + ' -- ' + str(end_date)) elastic_qry = ElasticQuery(es=self.elastic_client, index=index, doc_type=doc_type) elastic_qry.query( Query.bool( must=[Query.query_string(str_query), Query.range('normalDate', gte=start_date, lte=end_date)] ) ) elastic_qry.aggregate( Aggregate.date_histogram('2', 'normalDate', '12h') ) my_qry = elastic_qry.dict() my_qry['stored_fields'] = q_fields search_arr = list() header_qry = {"index": ["logs-*"], "ignore_unavailable": True} search_arr.append(header_qry) search_arr.append(my_qry) print('Elastic Query: ' + str(search_arr)) print('------------------------------------------------------------------------------------') print('Lucene Query: ' + str_query) request = '' for each in search_arr: request += '%s \n' % json.dumps(each) # print(request) resp = self.elastic_client.msearch(body=request) if resp is None and len(resp['responses']) <= 0: return None else: response = resp['responses'][0] hits_data = list() if response['hits']['total'] > 0: for hit in response['hits']['hits']: hits_data.append(hit) # print(str(hits_data)) return search_arr, hits_data
def run(start_time: datetime, end_time: datetime, window_size: timedelta): current_time = start_time eq = ElasticQuery(server, index, username, password) dp = DetectorPool() src_dst = Detector(name='src_dst', n_seeds=1, n_bins=1024, features=['src_addr', 'dst_addr'], filt=None, thresh=10, flag_th=1) int_ext = Detector(name='int_ext', n_seeds=1, n_bins=1024, features=['internal', 'external'], filt=int_ext_filter, thresh=10, flag_th=1) dp.add_detector(src_dst) dp.add_detector(int_ext) src_dst_divs = [] int_ext_divs = [] while current_time < end_time: frame = eq.query_time(current_time, window_size) #Do not care about results dp.run_next_timestep(frame) src_dst_divs.append(src_dst.get_divs()) int_ext_divs.append(int_ext.get_divs()) current_time += window_size #Merge all divs? src_dst_divs = np.concatenate(src_dst_divs) int_ext_divs = np.concatenate(int_ext_divs) np.save('output/src_dst_divs_15_1024', src_dst_divs) np.save('output/int_ext_divs_15_1024', int_ext_divs)
def test_nested_aggregate(self): q = ElasticQuery() q.aggregate(Aggregate.terms('agg_name', 'field').aggregate( Aggregate.sum('sub_agg_name', 'sub_field') )) assert_equal(self, q.dict(), { 'aggregations': { 'agg_name': { 'terms': { 'field': 'field' }, 'aggregations': { 'sub_agg_name': { 'sum': { 'field': 'sub_field' } } } } } })
def run(start_time: datetime, end_time: datetime, window_size: timedelta): current_time = start_time eq = ElasticQuery(server, index, username, password) dp = DetectorPool() detectors = [ Detector(name='ext_4_sigma', n_seeds=8, n_bins=1024, features=['external'], filt=int_ext_filter, thresh=0.36, flag_th=6, detection_rule='two_step'), Detector(name='int_4_sigma', n_seeds=8, n_bins=1024, features=['internal'], filt=int_ext_filter, thresh=0.44, flag_th=6, detection_rule='two_step'), Detector(name='src_4_sigma', n_seeds=8, n_bins=1024, features=['src_addr'], filt=None, thresh=0.32, flag_th=6, detection_rule='two_step'), Detector(name='dst_4_sigma', n_seeds=8, n_bins=1024, features=['dst_addr'], filt=None, thresh=0.32, flag_th=6, detection_rule='two_step') ] name_list = [] all_divs = {} # Add all detectors to detection pool for concurrency for detector in detectors: dp.add_detector(detector) name_list.append(detector.name) all_divs[detector.name] = [] # Threading detections = [] detection_frames = [] divs_detector = detectors[0] # Only need the divs from one detector ext_divs = [] # Main Operation Loop while current_time < end_time: # Load the data from local drive/ElasticSearch df = eq.query_time(current_time, window_size) current_time += window_size # Run detectors results = dp.run_next_timestep(df) # Result processing detections.append(results[0]) detection_frames.append(results[1]) logger.debug(' '.join([str(len(_)) for _ in results])) for det in detectors: all_divs[det.name].append(det.get_divs()) ext_divs.append(divs_detector.get_divs()) full_detections = pd.concat(detection_frames) window_size_fmt = int(window_size.total_seconds() / 60) pd.to_pickle( full_detections, 'output/detection_frame_{}-{}_{}.pkl'.format(start_time.day, start_time.month, window_size_fmt)) pd.to_pickle( detection_list_to_df(detections), 'output/detections_{}-{}_{}.pkl'.format(start_time.day, start_time.month, window_size_fmt)) with open( 'output/ext_divs_{}-{}_{}.pkl'.format(start_time.day, start_time.month, window_size_fmt), 'wb') as fp: pickle.dump(ext_divs, fp, protocol=pickle.HIGHEST_PROTOCOL) for det in detectors: with open( 'output/divs_{}_{}-{}_{}.pkl'.format(det.name, start_time.day, start_time.month, window_size_fmt), 'wb') as fp: pickle.dump(all_divs[det.name], fp, protocol=pickle.HIGHEST_PROTOCOL)
def return_single_field_search(field,search): q = ElasticQuery(es=Elasticsearch(),index=all_indexes,doc_type='') q.aggregate(Aggregate.terms(search,field)) q.query(Query.query_string(search,field,default_operator='OR',analyze_wildcard=True)) q.fields(field) ElasticQuery.sort(q,"_score",order="desc")
def test_misc(self): q = ElasticQuery() q.size(10) q.from_(50) q.timeout('60s') q.set('key', 'value') q.fields(('one_field', 'two_field')) assert_equal( self, q.dict(), { 'size': 10, 'from': 50, 'timeout': '60s', 'key': 'value', '_source': ['one_field', 'two_field'] })
def test_get(self): # Test no ES q = ElasticQuery() with self.assertRaises(ValueError): q.get() # Test no index q = ElasticQuery(es=FakeElasticSearch()) with self.assertRaises(ValueError): q.get() # Test no index q = ElasticQuery(es=FakeElasticSearch(), index='') with self.assertRaises(ValueError): q.get() # Test working q = ElasticQuery(es=FakeElasticSearch(), index='', doc_type='') self.assertEqual(q.get(), 'FakeElasticSearch')
def logicalSearch(self,query): size = 500 must_fields = "" must_values = "" should_fields = "" should_values = "" mustnot_fields = "" mustnot_values = "" all_indexes = "" # Remove space on query string and add % as prefix and suffix searchquery = re.sub(r'\s+', ' ', searchquery) query = re.sub(r'\s+', ' ', query).replace(") (",")(").replace("MUST(","%MUST%(").replace("SHOULD(","%SHOULD%(").replace("MUST_NOT(","%MUST_NOT%(").replace("MUST (","%MUST%(").replace("SHOULD (","%SHOULD%(").replace("MUST_NOT (","%MUST_NOT%(") + " %" # Populate class variables with values only if the relative condition is present on our query if query.find("%MUST%") != -1: result = self.return_elements(query,"MUST") must_fields = self.return_values(result,".","=") must_fields = must_fields[:len(self.remove_dupl(must_fields))] must_values = self.return_values(result,"=",")") if query.find("%SHOULD%") != -1: result = self.return_elements(query,"SHOULD") should_fields = self.return_values(result,".","=") should_fields = should_fields[:len(self.remove_dupl(should_fields))] should_values = self.return_values(result,"=",")") if query.find("%MUST_NOT%") != -1: result = self.return_elements(query,"MUST_NOT") mustnot_fields = self.return_values(result,".","=") mustnot_fields = mustnot_fields[:len(self.remove_dupl(mustnot_fields))] mustnot_values = self.return_values(result,"=",")") # Elasticsearch connection initialization all_indexes = self.return_values(result,"(",".") es = Elasticsearch(hosts = [{"host": self.host, "port": self.port}]) q = ElasticQuery(es,index=self.remove_dupl(all_indexes),doc_type='') ElasticQuery.sort(q,"_score",order="desc") ElasticQuery.size(q,str(size)) # Code for query creation like "MUST (...) SHOULD (...) MUST_NOT(...)" if must_fields != "" and should_fields != "" and mustnot_fields != "": q.query(Query.bool( must=[self.compose_query(must_values,must_fields)], should=[self.compose_query(should_values,should_fields)], must_not=[self.compose_query(mustnot_values,mustnot_fields)] )) # Code for query creation like "MUST (...) SHOULD (...)" elif must_fields != "" and should_fields != "" and mustnot_fields == "": q.query(Query.bool( must=[self.compose_query(must_values,must_fields)], should=[self.compose_query(should_values,should_fields)] )) # Code for query creation like "SHOULD (...) MUST_NOT(...)" elif must_fields == "" and should_fields != "" and mustnot_fields != "": q.query(Query.bool( should=[self.compose_query(should_values,should_fields)], must_not=[self.compose_query(mustnot_values,mustnot_fields)] )) # Code for query creation like "MUST (...) MUST_NOT(...)" elif must_fields != "" and should_fields == "" and mustnot_fields != "": q.query(Query.bool( must=[self.compose_query(must_values,must_fields)], must_not=[self.compose_query(mustnot_values,mustnot_fields)] )) # Code for query creation like "MUST (...)" elif must_fields != "" and should_fields == "" and mustnot_fields == "": q.query(Query.bool( must=[self.compose_query(must_values,must_fields)] )) # Code for query creation like "SHOULD (...)" elif must_fields == "" and should_fields != "" and mustnot_fields == "": q.query(Query.bool( should=[self.compose_query(should_values,should_fields)] )) # Code for query creation like "MUST_NOT (...)" elif must_fields == "" and should_fields == "" and mustnot_fields != "": q.query(Query.bool( must_not=[self.compose_query(mustnot_values,mustnot_fields)] )) # ERROR else: return HttpResponse('Server: Wrong query syntax!') return q.get()
def freeSearch(self, searchquery): searchquery = re.sub(r'\s+', ' ', searchquery) # Elasticsearch connection initialization es = Elasticsearch(hosts = [{"host": self.host, "port": self.port}]) size = 500 index = "" # Find all indexes and remove them from the query if searchquery.find("\index") != -1: index = searchquery.replace(", ",",").replace(" ",",")[searchquery.find("\index") + 7:] searchquery = searchquery[:searchquery.find("\index")] q = ElasticQuery(es, index=index, doc_type='') ElasticQuery.sort(q,"_score",order="desc") ElasticQuery.size(q,size) # Check correct query syntax (query must have max 2 '\in' and max 1 '\filter') if searchquery.count("\in ") <= 2 and searchquery.count("\\filter ") <= 1: # Code for query creation like "SELECT *** IN *** FILTER *** IN ***" if searchquery.count("\in ") == 2 and searchquery.find("\\filter ") != -1: q.query(Query.bool( must=[self.compose_query(searchquery[:searchquery.find("\in")-1],searchquery[searchquery.find("\in") + 4:searchquery.find("\\filter")])], must_not=[self.compose_query(searchquery[searchquery.find("\\filter") + 8:searchquery.rfind("\in")-1],searchquery[searchquery.rfind("\in") + 4:])] )) # Code for query creation like "SELECT *** IN *** FILTER ***" elif searchquery.count("\in ") == 1 and searchquery.find("\\filter ") != -1: q.query(Query.bool( must=[self.compose_query(searchquery[:searchquery.find("\in")-1],searchquery[searchquery.find("\in") + 4:searchquery.find("\\filter")])], must_not=[self.compose_query(searchquery[searchquery.find("\\filter") + 8:],"_all")] )) # Code for query creation like "SELECT *** IN ***" elif searchquery.count("\in ") == 1 and searchquery.find("\\filter ") == -1 and searchquery.find("\\filter") == -1: q.query(self.compose_query(searchquery[:searchquery.find("\in")-1],searchquery[searchquery.find("\in") + 4:])) # Code for query creation like "SELECT ***" elif searchquery.count("\in ") == 0 and searchquery.count("\in") == 0 and searchquery.find("\\filter ") == -1 and searchquery.find("\\filter") == -1: q.query(self.compose_query(searchquery,"_all")) # ERROR else: return HttpResponse('Server: Wrong query syntax!') else: return HttpResponse('Server: Wrong query syntax!') return q.get()
}, 'filter': { 'bool': { 'must': [{ 'range': { 'field_name1': { 'gte': 0, 'lt': 100 } } }], 'must_not': [], 'should': [] } }, 'sort': [] } } # Test queries query = ElasticQuery() query.must(Filter.range('field_name1', gte=0, lt=100)) query.aggregate('test_aggregate1', Aggregate.terms('field_name1')) query.aggregate('test_aggregate2', Aggregate.stats('field_name2')) test('Full query: range + terms agg + stats agg', query.structure, QUERIES['RANGE_AGGTERMS_AGGSTATS']) # If we're still here, we're done! print '[ElasticQuery] All tests complete!\n' sys.exit(0)