Example #1
0
    def test_sort(self):
        q = ElasticQuery()
        q.sort('sort_field')

        assert_equal(self, q.dict(), {
            'sort': ['sort_field']
        })
Example #2
0
    def test_sort_order(self):
        q = ElasticQuery()
        q.sort('sort_field', order='desc')

        assert_equal(self, q.dict(),
                     {'sort': [{
                         'sort_field': {
                             'order': 'desc'
                         }
                     }]})
Example #3
0
    def test_query_aggregate_and_suggester(self):
        q = ElasticQuery()
        q.query(Query.match('field', 'query'))
        q.aggregate(Aggregate.max('agg_name', 'field'))
        q.suggest(Suggester.term('sugg_name', 'term text', 'term_field'))

        assert_equal(
            self, q.dict(), {
                'query': {
                    'match': {
                        'field': {
                            'query': 'query'
                        }
                    }
                },
                'aggregations': {
                    'agg_name': {
                        'max': {
                            'field': 'field'
                        }
                    }
                },
                'suggest': {
                    'sugg_name': {
                        'text': 'term text',
                        'term': {
                            'field': 'term_field'
                        }
                    }
                }
            })
Example #4
0
    def test_just_query(self):
        q = ElasticQuery()
        q.query(Query.query_string('this is a querystring'))

        assert_equal(
            self, q.dict(),
            {'query': {
                'query_string': {
                    'query': 'this is a querystring'
                }
            }})
Example #5
0
    def test_aggregate(self):
        q = ElasticQuery()
        q.aggregate(Aggregate.terms('agg_name', 'field'))

        assert_equal(
            self, q.dict(),
            {'aggregations': {
                'agg_name': {
                    'terms': {
                        'field': 'field'
                    }
                }
            }})
Example #6
0
    def test_suggester(self):
        q = ElasticQuery()
        q.suggest(Suggester.term('sugg_name', 'term text', 'term_field'))

        assert_equal(self, q.dict(), {
            'suggest': {
                'sugg_name': {
                    'text': 'term text',
                    'term': {
                        'field': 'term_field'
                    }
                }
            }
        })
Example #7
0
    def search(self, parms):
        device_controller = DeviceController(self.db, self.logger)

        if "location" in parms.keys():
            list_location_ip = device_controller.get_device_list_by_locationid(
                parms["location"])

        if "device" in parms.keys():
            list_device_ip = device_controller.get_device_list_by_hostname(
                parms["device"])

        #Doing intersection between device search and device in location
        search_ip = list(set(list_location_ip) & set(list_device_ip))
        print search_ip
        query_search = []
        if search_ip:
            query_search.append(Query.terms('host', search_ip))

            if parms["time"]:
                time_from = parms["time"]["from"].split(" ")[0]
                time_to = parms["time"]["to"].split(" ")[0]
                query_search.append(
                    Query.range('@timestamp', gte=time_from, lte=time_to))

            if parms["severityLevel"]:
                query_search.append(
                    Query.terms('severity', parms["severityLevel"]))

            if parms["keywordMessage"]:
                message_search = []
                message_search.append(parms["keywordMessage"])
                query_search.append(Query.terms('message', message_search))

            index = "syslog*"
            es = Elasticsearch(["http://192.168.100.249:9200"])
            q = ElasticQuery(es=es, index=index, doc_type='doc')

            # q.query(Query.match_all())
            q.size(1000)
            q.query(Query.bool(must=query_search))
            #q.query(Aggregate.terms(must=query_search))

            print q.json(indent=4)
            query_result = self.format_results(q.get())
            return query_result
        #No index to query
        else:
            return []
    def query_ec(self, str_query, q_fields, start_date=0, end_date=0, index='logs-*', doc_type='logs',
                 hours=24, debug=False):
        if start_date > end_date:
            raise Exception('The start_date can\'t be greater than the end_date')

        if start_date == 0 or end_date == 0:
            dt_end_date = datetime.now().timestamp()
            dt_start_date = (datetime.now() - timedelta(hours=hours)).timestamp()
            start_date = int(dt_start_date) * 1000
            end_date = int(dt_end_date) * 1000

        # print(str(start_date) + ' -- ' + str(end_date))

        elastic_qry = ElasticQuery(es=self.elastic_client, index=index, doc_type=doc_type)
        elastic_qry.query(
            Query.bool(
                must=[Query.query_string(str_query),
                      Query.range('normalDate', gte=start_date, lte=end_date)]
            )
        )

        elastic_qry.aggregate(
            Aggregate.date_histogram('2', 'normalDate', '12h')
        )

        my_qry = elastic_qry.dict()
        my_qry['stored_fields'] = q_fields

        search_arr = list()
        header_qry = {"index": ["logs-*"], "ignore_unavailable": True}
        search_arr.append(header_qry)
        search_arr.append(my_qry)

        print('Elastic Query: ' + str(search_arr))
        print('------------------------------------------------------------------------------------')
        print('Lucene Query: ' + str_query)

        request = ''
        for each in search_arr:
            request += '%s \n' % json.dumps(each)

        # print(request)

        resp = self.elastic_client.msearch(body=request)

        if resp is None and len(resp['responses']) <= 0:
            return None
        else:
            response = resp['responses'][0]
            hits_data = list()
            if response['hits']['total'] > 0:
                for hit in response['hits']['hits']:
                    hits_data.append(hit)

        # print(str(hits_data))

        return search_arr, hits_data
Example #9
0
def run(start_time: datetime, end_time: datetime, window_size: timedelta):
    current_time = start_time
    eq = ElasticQuery(server, index, username, password)
    dp = DetectorPool()

    src_dst = Detector(name='src_dst',
                       n_seeds=1,
                       n_bins=1024,
                       features=['src_addr', 'dst_addr'],
                       filt=None,
                       thresh=10,
                       flag_th=1)
    int_ext = Detector(name='int_ext',
                       n_seeds=1,
                       n_bins=1024,
                       features=['internal', 'external'],
                       filt=int_ext_filter,
                       thresh=10,
                       flag_th=1)

    dp.add_detector(src_dst)
    dp.add_detector(int_ext)

    src_dst_divs = []
    int_ext_divs = []

    while current_time < end_time:
        frame = eq.query_time(current_time, window_size)
        #Do not care about results
        dp.run_next_timestep(frame)

        src_dst_divs.append(src_dst.get_divs())
        int_ext_divs.append(int_ext.get_divs())

        current_time += window_size

    #Merge all divs?
    src_dst_divs = np.concatenate(src_dst_divs)
    int_ext_divs = np.concatenate(int_ext_divs)
    np.save('output/src_dst_divs_15_1024', src_dst_divs)
    np.save('output/int_ext_divs_15_1024', int_ext_divs)
Example #10
0
    def test_nested_aggregate(self):
        q = ElasticQuery()
        q.aggregate(Aggregate.terms('agg_name', 'field').aggregate(
            Aggregate.sum('sub_agg_name', 'sub_field')
        ))

        assert_equal(self, q.dict(), {
            'aggregations': {
                'agg_name': {
                    'terms': {
                        'field': 'field'
                    },
                    'aggregations': {
                        'sub_agg_name': {
                            'sum': {
                                'field': 'sub_field'
                            }
                        }
                    }
                }
            }
        })
Example #11
0
def run(start_time: datetime, end_time: datetime, window_size: timedelta):
    current_time = start_time
    eq = ElasticQuery(server, index, username, password)
    dp = DetectorPool()

    detectors = [
        Detector(name='ext_4_sigma',
                 n_seeds=8,
                 n_bins=1024,
                 features=['external'],
                 filt=int_ext_filter,
                 thresh=0.36,
                 flag_th=6,
                 detection_rule='two_step'),
        Detector(name='int_4_sigma',
                 n_seeds=8,
                 n_bins=1024,
                 features=['internal'],
                 filt=int_ext_filter,
                 thresh=0.44,
                 flag_th=6,
                 detection_rule='two_step'),
        Detector(name='src_4_sigma',
                 n_seeds=8,
                 n_bins=1024,
                 features=['src_addr'],
                 filt=None,
                 thresh=0.32,
                 flag_th=6,
                 detection_rule='two_step'),
        Detector(name='dst_4_sigma',
                 n_seeds=8,
                 n_bins=1024,
                 features=['dst_addr'],
                 filt=None,
                 thresh=0.32,
                 flag_th=6,
                 detection_rule='two_step')
    ]

    name_list = []
    all_divs = {}

    # Add all detectors to detection pool for concurrency
    for detector in detectors:
        dp.add_detector(detector)
        name_list.append(detector.name)
        all_divs[detector.name] = []

    # Threading
    detections = []
    detection_frames = []

    divs_detector = detectors[0]  # Only need the divs from one detector
    ext_divs = []

    # Main Operation Loop
    while current_time < end_time:
        # Load the data from local drive/ElasticSearch
        df = eq.query_time(current_time, window_size)
        current_time += window_size

        # Run detectors
        results = dp.run_next_timestep(df)

        # Result processing
        detections.append(results[0])
        detection_frames.append(results[1])
        logger.debug(' '.join([str(len(_)) for _ in results]))

        for det in detectors:
            all_divs[det.name].append(det.get_divs())

        ext_divs.append(divs_detector.get_divs())

    full_detections = pd.concat(detection_frames)
    window_size_fmt = int(window_size.total_seconds() / 60)
    pd.to_pickle(
        full_detections,
        'output/detection_frame_{}-{}_{}.pkl'.format(start_time.day,
                                                     start_time.month,
                                                     window_size_fmt))
    pd.to_pickle(
        detection_list_to_df(detections),
        'output/detections_{}-{}_{}.pkl'.format(start_time.day,
                                                start_time.month,
                                                window_size_fmt))
    with open(
            'output/ext_divs_{}-{}_{}.pkl'.format(start_time.day,
                                                  start_time.month,
                                                  window_size_fmt),
            'wb') as fp:
        pickle.dump(ext_divs, fp, protocol=pickle.HIGHEST_PROTOCOL)
    for det in detectors:
        with open(
                'output/divs_{}_{}-{}_{}.pkl'.format(det.name, start_time.day,
                                                     start_time.month,
                                                     window_size_fmt),
                'wb') as fp:
            pickle.dump(all_divs[det.name],
                        fp,
                        protocol=pickle.HIGHEST_PROTOCOL)
Example #12
0
 def return_single_field_search(field,search):
     q = ElasticQuery(es=Elasticsearch(),index=all_indexes,doc_type='')
     q.aggregate(Aggregate.terms(search,field))
     q.query(Query.query_string(search,field,default_operator='OR',analyze_wildcard=True))
     q.fields(field)
     ElasticQuery.sort(q,"_score",order="desc")
Example #13
0
    def test_misc(self):
        q = ElasticQuery()
        q.size(10)
        q.from_(50)
        q.timeout('60s')
        q.set('key', 'value')
        q.fields(('one_field', 'two_field'))

        assert_equal(
            self, q.dict(), {
                'size': 10,
                'from': 50,
                'timeout': '60s',
                'key': 'value',
                '_source': ['one_field', 'two_field']
            })
Example #14
0
    def test_get(self):
        # Test no ES
        q = ElasticQuery()
        with self.assertRaises(ValueError):
            q.get()

        # Test no index
        q = ElasticQuery(es=FakeElasticSearch())
        with self.assertRaises(ValueError):
            q.get()

        # Test no index
        q = ElasticQuery(es=FakeElasticSearch(), index='')
        with self.assertRaises(ValueError):
            q.get()

        # Test working
        q = ElasticQuery(es=FakeElasticSearch(), index='', doc_type='')
        self.assertEqual(q.get(), 'FakeElasticSearch')
Example #15
0
    def logicalSearch(self,query):
        size = 500
        must_fields = ""
        must_values = ""
        should_fields = ""
        should_values = ""
        mustnot_fields = ""
        mustnot_values = ""
        all_indexes = ""

        # Remove space on query string and add % as prefix and suffix searchquery = re.sub(r'\s+', ' ', searchquery)
        query = re.sub(r'\s+', ' ', query).replace(") (",")(").replace("MUST(","%MUST%(").replace("SHOULD(","%SHOULD%(").replace("MUST_NOT(","%MUST_NOT%(").replace("MUST (","%MUST%(").replace("SHOULD (","%SHOULD%(").replace("MUST_NOT (","%MUST_NOT%(") + " %"
        
        # Populate class variables with values only if the relative condition is present on our query
        if query.find("%MUST%") != -1:
            result = self.return_elements(query,"MUST")
            must_fields = self.return_values(result,".","=")
            must_fields = must_fields[:len(self.remove_dupl(must_fields))]
            must_values = self.return_values(result,"=",")")
        if query.find("%SHOULD%") != -1:
            result = self.return_elements(query,"SHOULD")
            should_fields = self.return_values(result,".","=")
            should_fields = should_fields[:len(self.remove_dupl(should_fields))]
            should_values = self.return_values(result,"=",")")
        if query.find("%MUST_NOT%") != -1:
            result = self.return_elements(query,"MUST_NOT")
            mustnot_fields = self.return_values(result,".","=")
            mustnot_fields = mustnot_fields[:len(self.remove_dupl(mustnot_fields))]
            mustnot_values = self.return_values(result,"=",")")
        
        # Elasticsearch connection initialization
        all_indexes = self.return_values(result,"(",".")
        es = Elasticsearch(hosts = [{"host": self.host, "port": self.port}])
        q = ElasticQuery(es,index=self.remove_dupl(all_indexes),doc_type='')
        ElasticQuery.sort(q,"_score",order="desc")
        ElasticQuery.size(q,str(size))

        # Code for query creation like "MUST (...) SHOULD (...) MUST_NOT(...)"
        if must_fields != "" and should_fields != "" and mustnot_fields != "":
            q.query(Query.bool(
                must=[self.compose_query(must_values,must_fields)],
                should=[self.compose_query(should_values,should_fields)],
                must_not=[self.compose_query(mustnot_values,mustnot_fields)]
            ))
        
        # Code for query creation like "MUST (...) SHOULD (...)"
        elif must_fields != "" and should_fields != "" and mustnot_fields == "":
            q.query(Query.bool(
                must=[self.compose_query(must_values,must_fields)],
                should=[self.compose_query(should_values,should_fields)]
            ))
        
        # Code for query creation like "SHOULD (...) MUST_NOT(...)"
        elif must_fields == "" and should_fields != "" and mustnot_fields != "":
            q.query(Query.bool(
                should=[self.compose_query(should_values,should_fields)],
                must_not=[self.compose_query(mustnot_values,mustnot_fields)]
            ))
        
        # Code for query creation like "MUST (...) MUST_NOT(...)"
        elif must_fields != "" and should_fields == "" and mustnot_fields != "":
            q.query(Query.bool(
                must=[self.compose_query(must_values,must_fields)],
                must_not=[self.compose_query(mustnot_values,mustnot_fields)]
            ))
                
        # Code for query creation like "MUST (...)"
        elif must_fields != "" and should_fields == "" and mustnot_fields == "":
            q.query(Query.bool(
                must=[self.compose_query(must_values,must_fields)]
            ))

        # Code for query creation like "SHOULD (...)"
        elif must_fields == "" and should_fields != "" and mustnot_fields == "":
            q.query(Query.bool(
                should=[self.compose_query(should_values,should_fields)]
            ))

        # Code for query creation like "MUST_NOT (...)"
        elif must_fields == "" and should_fields == "" and mustnot_fields != "":
            q.query(Query.bool(
                must_not=[self.compose_query(mustnot_values,mustnot_fields)]
            ))
        
        # ERROR
        else:
            return HttpResponse('Server: Wrong query syntax!')
        
        return q.get()
Example #16
0
    def freeSearch(self, searchquery):
        searchquery = re.sub(r'\s+', ' ', searchquery)
        # Elasticsearch connection initialization
        es = Elasticsearch(hosts = [{"host": self.host, "port": self.port}])
        size = 500
        index = ""
        # Find all indexes and remove them from the query
        if searchquery.find("\index") != -1:
            index = searchquery.replace(", ",",").replace(" ",",")[searchquery.find("\index") + 7:]
            searchquery = searchquery[:searchquery.find("\index")]
        q = ElasticQuery(es, index=index, doc_type='')
        ElasticQuery.sort(q,"_score",order="desc")
        ElasticQuery.size(q,size)
        
        # Check correct query syntax (query must have max 2 '\in' and max 1 '\filter')
        if searchquery.count("\in ") <= 2 and searchquery.count("\\filter ") <= 1:
            # Code for query creation like "SELECT *** IN *** FILTER *** IN ***"
            if searchquery.count("\in ") == 2 and searchquery.find("\\filter ") != -1:
                q.query(Query.bool(
                    must=[self.compose_query(searchquery[:searchquery.find("\in")-1],searchquery[searchquery.find("\in") + 4:searchquery.find("\\filter")])],
                    must_not=[self.compose_query(searchquery[searchquery.find("\\filter") + 8:searchquery.rfind("\in")-1],searchquery[searchquery.rfind("\in") + 4:])]
                ))
            
            # Code for query creation like "SELECT *** IN *** FILTER ***"
            elif searchquery.count("\in ") == 1 and searchquery.find("\\filter ") != -1:
                q.query(Query.bool(
                    must=[self.compose_query(searchquery[:searchquery.find("\in")-1],searchquery[searchquery.find("\in") + 4:searchquery.find("\\filter")])],
                    must_not=[self.compose_query(searchquery[searchquery.find("\\filter") + 8:],"_all")]
                ))
            
            # Code for query creation like "SELECT *** IN ***"
            elif searchquery.count("\in ") == 1 and searchquery.find("\\filter ") == -1 and searchquery.find("\\filter") == -1:
                q.query(self.compose_query(searchquery[:searchquery.find("\in")-1],searchquery[searchquery.find("\in") + 4:]))
    
            # Code for query creation like "SELECT ***"
            elif searchquery.count("\in ") == 0 and searchquery.count("\in") == 0 and searchquery.find("\\filter ") == -1 and searchquery.find("\\filter") == -1:
                q.query(self.compose_query(searchquery,"_all"))
            
            # ERROR
            else:
                return HttpResponse('Server: Wrong query syntax!')
        else:
            return HttpResponse('Server: Wrong query syntax!')

        return q.get()
Example #17
0
        },
        'filter': {
            'bool': {
                'must': [{
                    'range': {
                        'field_name1': {
                            'gte': 0,
                            'lt': 100
                        }
                    }
                }],
                'must_not': [],
                'should': []
            }
        },
        'sort': []
    }
}

# Test queries
query = ElasticQuery()
query.must(Filter.range('field_name1', gte=0, lt=100))
query.aggregate('test_aggregate1', Aggregate.terms('field_name1'))
query.aggregate('test_aggregate2', Aggregate.stats('field_name2'))
test('Full query: range + terms agg + stats agg', query.structure,
     QUERIES['RANGE_AGGTERMS_AGGSTATS'])

# If we're still here, we're done!
print '[ElasticQuery] All tests complete!\n'
sys.exit(0)